├── .gitignore ├── JATS.lua ├── LICENSE ├── README.md ├── default.jats ├── examples ├── example.bib ├── example1.html ├── example1.md ├── example1.tex ├── example2.html ├── example2.md ├── example2.tex └── example3.md ├── jats.csl ├── sample.lua └── spec ├── input_spec.lua └── output_spec.lua /.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | cabal-dev 3 | *.o 4 | *.hi 5 | *.chi 6 | *.chs.h 7 | .virthualenv 8 | testing/* 9 | -------------------------------------------------------------------------------- /JATS.lua: -------------------------------------------------------------------------------- 1 | -- This is a JATS custom writer for pandoc. It produces output 2 | -- that tries to conform to the JATS 1.0 specification 3 | -- http://jats.nlm.nih.gov/archiving/tag-library/1.0/index.html 4 | -- 5 | -- Invoke with: pandoc -t jats.lua 6 | -- 7 | -- Note: you need not have lua installed on your system to use this 8 | -- custom writer. However, if you do have lua installed, you can 9 | -- use it to test changes to the script. 'lua JATS.lua' will 10 | -- produce informative error messages if your code contains 11 | -- syntax errors. 12 | -- 13 | -- Released under the GPL, version 2 or greater. See LICENSE for more info. 14 | 15 | -- Tables to store metadata, headers, sections, back sections, references, figures and footnotes 16 | local meta = {} 17 | local headers = {} 18 | local sections = {} 19 | local back = {} 20 | local references = {} 21 | local figures = {} 22 | 23 | -- This function is called once for the whole document. Parameters: 24 | -- body is a string, metadata is a table, variables is a table. 25 | -- This gives you a fragment. You could use the metadata table to 26 | -- fill variables in a custom lua template. Or, pass `--template=...` 27 | -- to pandoc, and pandoc will do the template processing as 28 | -- usual. 29 | function Doc(body, metadata, variables) 30 | meta = metadata or {} 31 | 32 | -- if document doesn't start with section, add top-level section without title 33 | if string.sub(body, 1, 6) ~= '' then 34 | body = Header(1, '') .. '\n' .. body 35 | end 36 | 37 | -- strip closing section tag from beginning, add to end of document 38 | body = string.sub(body, 7) .. '' 39 | 40 | -- parse sections, turn body into table of sections 41 | for lev, title, content in string.gmatch(body, '%s(.-)(.-)') do 42 | attr = section_helper(tonumber(lev), content, title) 43 | end 44 | 45 | body = xml('body', '\n' .. table.concat(sections, '\n') .. '\n') 46 | 47 | if #back > 0 then 48 | body = body .. '\n' .. xml('back', '\n' .. table.concat(back, '\n')) 49 | end 50 | 51 | return body 52 | end 53 | 54 | -- XML character entity escaping and unescaping 55 | function escape(s) 56 | local map = { ['<'] = '<', 57 | ['>'] = '>', 58 | ['&'] = '&', 59 | ['"'] = '"', 60 | ['\'']= ''' } 61 | return s:gsub("[<>&\"']", function(x) return map[x] end) 62 | end 63 | 64 | function unescape(s) 65 | local map = { ['<'] = '<', 66 | ['>'] = '>', 67 | ['&'] = '&', 68 | ['"'] = '"', 69 | [''']= '\'' } 70 | return s:gsub('(&(#?)([%d%a]+);)', function(x) return map[x] end) 71 | end 72 | 73 | -- Helper function to convert an attributes table into 74 | -- a string that can be put into XML elements. 75 | function attributes(attr) 76 | local attr_table = {} 77 | for x, y in pairsByKeys(attr) do 78 | if y and y ~= '' then 79 | table.insert(attr_table, string.format(' %s="%s"', x, escape(y))) 80 | end 81 | end 82 | return table.concat(attr_table) 83 | end 84 | 85 | -- sort table, so that attributes are in consistent order 86 | function pairsByKeys (t, f) 87 | local a = {} 88 | for n in pairs(t) do table.insert(a, n) end 89 | table.sort(a, f) 90 | local i = 0 -- iterator variable 91 | local iter = function () -- iterator function 92 | i = i + 1 93 | if a[i] == nil then return nil 94 | else return a[i], t[a[i]] 95 | end 96 | end 97 | return iter 98 | end 99 | 100 | -- generic xml builder 101 | function xml(tag, s, attr) 102 | attr = attr and attributes(attr) or '' 103 | s = s and '>' .. s .. '' or '/>' 104 | return '<' .. tag .. attr .. s 105 | end 106 | 107 | -- Flatten nested table, needed for nested YAML metadata[' 108 | -- We only flatten associative arrays and create composite key, 109 | -- numbered arrays and flat tables are left intact. 110 | -- We also convert all hyphens in keys to underscores, 111 | -- so that they are proper variable names 112 | function flatten_table(tbl) 113 | local result = {} 114 | 115 | local function flatten(tbl, key) 116 | for k, v in pairs(tbl) do 117 | if type(k) == 'number' and k > 0 and k <= #tbl then 118 | result[key] = tbl 119 | break 120 | else 121 | k = (key and key .. '-' or '') .. k 122 | if type(v) == 'table' then 123 | flatten(v, k) 124 | else 125 | result[k] = v 126 | end 127 | end 128 | end 129 | end 130 | 131 | flatten(tbl) 132 | return result 133 | end 134 | 135 | -- Read a file from the working directory and 136 | -- return its contents (or nil if not found). 137 | function read_file(name) 138 | local base, ext = name:match("([^%.]*)(.*)") 139 | local fname = base .. ext 140 | local file = io.open(fname, "r") 141 | if not file then return nil end 142 | return file:read("*all") 143 | end 144 | 145 | -- Parse YAML string and return table. 146 | -- We only understand a subset. 147 | function parse_yaml(s) 148 | local l = {} 149 | local c = {} 150 | local i = 0 151 | local k = nil 152 | 153 | -- patterns 154 | line_pattern = '(.-)\r?\n' 155 | config_pattern = '^(%s*)([%w%-]+):%s*(.-)$' 156 | 157 | -- First split string into lines 158 | local function lines(line) 159 | table.insert(l, line) 160 | return "" 161 | end 162 | 163 | lines((s:gsub(line_pattern, lines))) 164 | 165 | -- Then go over each line and check value and indentation 166 | for _, v in ipairs(l) do 167 | v:gsub(config_pattern, function(indent, tag, v) 168 | if (v == '') then 169 | i, k = string.len(indent), tag 170 | c[tag] = {} 171 | else 172 | -- check whether value is enclosed by brackets, i.e. an array 173 | if v:find('^%[(.-)%]$') then 174 | arr = {}; 175 | for match in (v:sub(2, -2) .. ','):gmatch('(.-)' .. ',%s*') do 176 | table.insert(arr, match); 177 | end 178 | v = arr; 179 | else 180 | -- if it is a string, remove optional enclosing quotes 181 | v = v:match('^["\']*(.-)["\']*$') 182 | end 183 | 184 | if string.len(indent) == i + 2 and k then 185 | c[k][tag] = v 186 | else 187 | c[tag] = v 188 | end 189 | end 190 | end) 191 | end 192 | 193 | return c 194 | end 195 | 196 | -- add appropriate sec-type attribute 197 | function sec_type_helper(s) 198 | local map = { ['Abstract']= 'abstract', 199 | ['Acknowledgments']= 'acknowledgements', 200 | ['Author Summary']= 'author-summary', 201 | ['Conclusions'] = 'conclusions', 202 | ['Discussion'] = 'discussion', 203 | ['Glossary'] = 'glossary', 204 | ['Introduction'] = 'intro', 205 | ['Materials and Methods'] = 'materials|methods', 206 | ['Notes'] = 'notes', 207 | ['References']= 'references', 208 | ['Results']= 'results', 209 | ['Supporting Information']= 'supplementary-material', 210 | ['Supplementary Information']= 'supplementary-material' } 211 | return map[s] 212 | end 213 | 214 | function section_helper(lev, s, title) 215 | local attr = { ['sec-type'] = sec_type_helper(title) } 216 | 217 | if attr['sec-type'] == "acknowledgements" then 218 | table.insert(back, Ack(s, title)) 219 | elseif attr['sec-type'] == "references" then 220 | table.insert(back, RefList(s, title)) 221 | elseif attr['sec-type'] == "notes" then 222 | table.insert(back, Note(s, title)) 223 | elseif attr['sec-type'] == "glossary" then 224 | table.insert(back, Glossary(s, title)) 225 | elseif attr['sec-type'] == "abstract" or attr['sec-type'] == "author-summary" then 226 | -- discard, should be provided via metadata 227 | elseif attr['sec-type'] == "supplementary-material" then 228 | table.insert(sections, SupplementaryMaterial(s, title)) 229 | else 230 | table.insert(sections, Section(lev, s, title, attr)) 231 | end 232 | 233 | return attr 234 | end 235 | 236 | -- Create table with year, month, day and iso8601-formatted date 237 | -- Input is iso8601-formatted date as string 238 | -- Return nil if input is not a valid date 239 | function date_helper(iso_date) 240 | if not iso_date or string.len(iso_date) ~= 10 then return nil end 241 | 242 | _,_,y,m,d = string.find(iso_date, '(%d+)-(%d+)-(%d+)') 243 | time = os.time({ year = y, month = m, day = d }) 244 | date = os.date('*t', time) 245 | date.iso8601 = string.format('%04d-%02d-%02d', date.year, date.month, date.day) 246 | return date 247 | end 248 | 249 | -- Create affiliation table, linked to authors via aff-id 250 | function affiliation_helper(tbl) 251 | 252 | set = {} 253 | i = 0 254 | for _,author in ipairs(tbl.author) do 255 | if author.affiliation then 256 | if not set[author.affiliation] then 257 | i = i + 1 258 | set[author.affiliation] = i 259 | end 260 | author['aff-id'] = set[author.affiliation] 261 | end 262 | end 263 | 264 | tbl.aff = {} 265 | for k,v in pairs(set) do 266 | aff = { id = v, name = k } 267 | table.insert(tbl.aff, aff) 268 | end 269 | 270 | return tbl 271 | end 272 | 273 | -- Create corresponding author table, linked to authors via cor-id 274 | function corresp_helper(tbl) 275 | 276 | set = {} 277 | i = 0 278 | for _,author in ipairs(tbl.author) do 279 | if author.corresp and author.email then 280 | i = i + 1 281 | set[i] = author.email 282 | author['cor-id'] = i 283 | end 284 | end 285 | 286 | tbl.corresp = {} 287 | for k,v in pairs(set) do 288 | corresp = { id = k, email = v } 289 | table.insert(tbl.corresp, corresp) 290 | end 291 | 292 | return tbl 293 | end 294 | 295 | -- temporary fix 296 | function fix_citeproc(s) 297 | s = s:gsub(', ', '') 298 | s = s:gsub('','') 299 | return s 300 | end 301 | 302 | -- Convert pandoc alignment to something HTML can use. 303 | -- align is AlignLeft, AlignRight, AlignCenter, or AlignDefault. 304 | function html_align(align) 305 | local map = { ['AlignRight']= 'right', 306 | ['AlignCenter']= 'center' } 307 | return map[align] or 'left' 308 | end 309 | 310 | -- Blocksep is used to separate block elements. 311 | function Blocksep() 312 | return "\n" 313 | end 314 | 315 | -- The functions that follow render corresponding pandoc elements. 316 | -- s is always a string, attr is always a table of attributes, and 317 | -- items is always an array of strings (the items in a list). 318 | -- Comments indicate the types of other variables. 319 | -- Defined at https://github.com/jgm/pandoc/blob/master/src/Text/Pandoc/Writers/Custom.hs 320 | 321 | -- block elements 322 | 323 | function Plain(s) 324 | return s 325 | end 326 | 327 | function Para(s) 328 | return xml('p', s) 329 | end 330 | 331 | function RawBlock(s) 332 | return xml('preformat', s) 333 | end 334 | 335 | -- JATS restricts use to inside table cells ( and ) 336 | function HorizontalRule() 337 | return '
' 338 | end 339 | 340 | -- lev is an integer, the header level. 341 | -- we can't use closing tags, as we don't know the end of the section 342 | function Header(lev, s, attr) 343 | attr = attr or {} 344 | attr['lev'] = '' .. lev 345 | return '\n\n' .. xml('title', s) 346 | end 347 | 348 | function Note(s) 349 | return s 350 | end 351 | 352 | function CodeBlock(s, attr) 353 | -- If code block has class 'dot', pipe the contents through dot 354 | -- and base64, and include the base64-encoded png as a data: URL. 355 | if attr.class and string.match(' ' .. attr.class .. ' ',' dot ') then 356 | local png = pipe("base64", pipe("dot -Tpng", s)) 357 | return '' 358 | -- otherwise treat as code (one could pipe through a highlighter) 359 | else 360 | return "
" .. escape(s) ..
361 |            "
" 362 | end 363 | end 364 | 365 | function BlockQuote(s) 366 | xml('boxed-text', s) 367 | end 368 | 369 | -- Caption is a string, aligns is an array of strings, 370 | -- widths is an array of floats, headers is an array of 371 | -- strings, rows is an array of arrays of strings. 372 | function Table(caption, aligns, widths, headers, rows) 373 | local buffer = {} 374 | local function add(s) 375 | table.insert(buffer, s) 376 | end 377 | table.insert(buffer, '') 378 | if caption ~= '' then 379 | -- if caption begins with text, make it the 380 | caption = string.gsub('<p>' .. caption, "^<p><bold>(.-)</bold>%s", "<title>%1\n

") 381 | add(xml('caption>', caption)) 382 | end 383 | add("") 384 | if widths and widths[1] ~= 0 then 385 | for _, w in pairs(widths) do 386 | add('') 387 | end 388 | end 389 | local header_row = {} 390 | local empty_header = true 391 | for i, h in pairs(headers) do 392 | local align = html_align(aligns[i]) 393 | 394 | -- remove

tag 395 | h = h:gsub("^

(.-)

", "%1") 396 | 397 | table.insert(header_row,'') 398 | empty_header = empty_header and h == "" 399 | end 400 | if empty_header then 401 | head = "" 402 | else 403 | add('') 404 | for _,h in pairs(header_row) do 405 | add(h) 406 | end 407 | add('') 408 | end 409 | for _, row in pairs(rows) do 410 | add('') 411 | for i,c in pairs(row) do 412 | -- remove

tag 413 | c = c:gsub("^

(.-)

", "%1") 414 | add('') 415 | end 416 | add('') 417 | end 418 | add('
' .. h .. '
' .. c .. '
\n') 419 | return table.concat(buffer,'\n') 420 | end 421 | 422 | function BulletList(items) 423 | local attr = { ['list-type'] = 'bullet' } 424 | return List(items, attr) 425 | end 426 | 427 | function OrderedList(items) 428 | local attr = { ['list-type'] = 'order' } 429 | return List(items, attr) 430 | end 431 | 432 | function List(items, attr) 433 | local buffer = {} 434 | for _, item in pairs(items) do 435 | table.insert(buffer, xml('list-item', item)) 436 | end 437 | return xml('list', '\n' .. table.concat(buffer, '\n') .. '\n', attr) 438 | end 439 | 440 | -- Revisit association list StackValue instance. 441 | -- items is a table of tables 442 | function DefinitionList(items) 443 | local buffer = {} 444 | for _,item in pairs(items) do 445 | for k, v in pairs(item) do 446 | local term = xml('term', k) 447 | local def = xml('def', table.concat(v,'')) 448 | table.insert(buffer, xml('def-item', term .. def)) 449 | end 450 | end 451 | return xml('def-list', '\n' .. table.concat(buffer, '\n') .. '\n') 452 | end 453 | 454 | function Div(s, attr) 455 | return s 456 | end 457 | 458 | -- custom block elements for JATS 459 | 460 | -- section is generated after header to allow reordering 461 | function Section(lev, s, title, attr) 462 | local last = headers[#headers] 463 | local h = last and last.h or {} 464 | h[lev] = (h[lev] or 0) + 1 465 | for i = lev + 1, #headers do 466 | table.remove(h, i) 467 | end 468 | 469 | local header = { ['h'] = h, 470 | ['title'] = title, 471 | ['id'] = 'sec-' .. table.concat(h,'.'), 472 | ['sec-type'] = attr['sec-type'] } 473 | 474 | table.insert(headers, header) 475 | 476 | attr = { ['id'] = header['id'], ['sec-type'] = header['sec-type'] } 477 | title = xml('title', title ~= '' and title or nil) 478 | return xml('sec', '\n' .. title .. s, attr) 479 | end 480 | 481 | function SupplementaryMaterial(s, title, attr) 482 | attr = {} 483 | title = xml('title', title) 484 | local caption = xml('caption', title .. s) 485 | return xml('supplementary-material', '\n' .. caption .. '\n', attr) 486 | end 487 | 488 | function Ack(s, title) 489 | title = title and '\n' .. xml('title', title) or '' 490 | return xml('ack', title .. s) 491 | end 492 | 493 | function Glossary(s, title, attr) 494 | title = xml('title', title) 495 | return xml('glossary', title .. s, attr) 496 | end 497 | 498 | function RefList(s, title) 499 | s = fix_citeproc(s) 500 | 501 | -- format ids 502 | s = string.gsub(s, '', function (r) 503 | local attr = { ['id'] = string.format('r%03d', tonumber(r)) } 504 | return '' 505 | end) 506 | 507 | for ref in string.gmatch(s, '()') do 508 | Ref(ref) 509 | end 510 | 511 | if #references > 0 then 512 | title = xml('title', title) 513 | return xml('ref-list', title .. table.concat(references, '\n'), attr) 514 | else 515 | return '' 516 | end 517 | end 518 | 519 | function Ref(s) 520 | table.insert(references, s) 521 | return #references 522 | end 523 | 524 | -- inline elements 525 | 526 | function Str(s) 527 | return s 528 | end 529 | 530 | function Space() 531 | return ' ' 532 | end 533 | 534 | function SoftBreak() 535 | return '' 536 | end 537 | 538 | function Emph(s) 539 | return xml('italic', s) 540 | end 541 | 542 | function Strong(s) 543 | return xml('bold', s) 544 | end 545 | 546 | function Strikeout(s) 547 | return xml('strike', s) 548 | end 549 | 550 | function Superscript(s) 551 | return xml('sup', s) 552 | end 553 | 554 | function Subscript(s) 555 | return xml('sub', s) 556 | end 557 | 558 | function SmallCaps(s) 559 | return xml('sc', s) 560 | end 561 | 562 | function SingleQuoted(s) 563 | return "'" .. s .. "'" 564 | end 565 | 566 | function DoubleQuoted(s) 567 | return '"' .. s .. '"' 568 | end 569 | 570 | -- format in-text citation 571 | function Cite(s) 572 | local ids = {} 573 | for id in string.gmatch(s, '(%d+)') do 574 | id = tonumber(id) 575 | -- workaround to discard year mistakenly taken for key 576 | if id and id < 1000 then 577 | local attr = { ['ref-type'] = 'bibr', 578 | ['rid'] = string.format("r%03d", id) } 579 | table.insert(ids, xml('xref', '[' .. id .. ']', attr)) 580 | end 581 | end 582 | if #ids > 0 then 583 | return table.concat(ids) 584 | else 585 | -- return original key for backwards compatibility 586 | return s 587 | end 588 | end 589 | 590 | function Code(s, attr) 591 | return xml('preformat', s, attr) 592 | end 593 | 594 | function DisplayMath(s) 595 | return xml('disp-formula', s) 596 | end 597 | 598 | function InlineMath(s) 599 | return xml('inline-formula', s) 600 | end 601 | 602 | function RawInline(s) 603 | return xml('preformat', s) 604 | end 605 | 606 | function LineBreak() 607 | return ' ' 608 | end 609 | 610 | function Link(s, src, title) 611 | if src ~= '' and s ~= '' then 612 | attr = { ['ext-link-type'] = 'uri', 613 | ['xlink:href'] = escape(src), 614 | ['xlink:title'] = escape(title), 615 | ['xlink:type'] = 'simple' } 616 | 617 | return xml('ext-link', s, attr) 618 | else 619 | return s 620 | end 621 | end 622 | 623 | function CaptionedImage(s, src, title) 624 | -- if title begins with text, make it the 625 | title = string.gsub(title, "^<bold>(.-)</bold>%s", function(t) xml('title', t) end) 626 | local num = #figures + 1 627 | local attr = { ['id'] = string.format("g%03d", num) } 628 | local caption = xml('caption', s) 629 | local fig = xml('fig', caption .. Image(nil, src, title), attr) 630 | 631 | table.insert(figures, fig) 632 | return fig 633 | end 634 | 635 | function Image(s, src, title) 636 | local attr = { ['mimetype'] = 'image', 637 | ['xlink:href'] = escape(src), 638 | ['xlink:title'] = escape(title), 639 | ['xlink:type'] = 'simple' } 640 | 641 | return xml('graphic', s, attr) 642 | end 643 | 644 | -- handle bold and italic 645 | function Span(s, attr) 646 | if attr.style == "font-weight:bold" then 647 | return Strong(s) 648 | elseif attr.style == "font-style:italic" then 649 | return Emph(s) 650 | elseif attr.style == "font-variant: small-caps" then 651 | return SmallCaps(s) 652 | else 653 | return s 654 | end 655 | end 656 | 657 | -- The following code will produce runtime warnings when you haven't defined 658 | -- all of the functions you need for the custom writer, so it's useful 659 | -- to include when you're working on a writer. 660 | local meta = {} 661 | meta.__index = 662 | function(_, key) 663 | io.stderr:write(string.format("WARNING: Undefined function '%s'\n",key)) 664 | return function() return "" end 665 | end 666 | setmetatable(_G, meta) 667 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., <http://fsf.org/> 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | {description} 294 | Copyright (C) {year} {fullname} 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | {signature of Ty Coon}, 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | pandoc-jats 2 | =========== 3 | 4 | A Lua [custom writer for Pandoc](http://johnmacfarlane.net/pandoc/README.html#custom-writers) generating **JATS XML** - specifically [Journal Publishing Tag Library NISO JATS Version 1.0](http://jats.nlm.nih.gov/publishing/tag-library/1.0/index.html). 5 | 6 | ### Installation 7 | Just download the file `JATS.lua` and put it in a convenient location. Pandoc includes a lua interpreter, so lua need not be installed separately. You need at least Pandoc version 1.13, released August 2014 (this release adds `--template` support for custom writers). 8 | 9 | ### Usage 10 | To convert the markdown file `example1.md` into the JATS XML file `example1.xml`, use the following command: 11 | 12 | ``` 13 | pandoc examples/example1.md --filter pandoc-citeproc -t jats.lua -o example1.xml --template=default.jats --bibliography=examples/example.bib --csl=jats.csl 14 | ``` 15 | 16 | ### Template 17 | `pandoc-jats` uses the template `default.jats` - the template uses the same format as other [Pandoc templates](https://github.com/jgm/pandoc-templates) (e.g. if/end conditions, for/end loops, and a dot can be used to select a field of a variable that takes an object), 18 | but is more complex because of the extensive metadata in JATS. Templates are parsed by Pandoc, not the custom Lua writer. 19 | 20 | ``` 21 | <?xml version="1.0" encoding="utf-8" ?> 22 | <!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.0 20120330//EN" 23 | "JATS-journalpublishing1.dtd"> 24 | $if(article.type)$ 25 | <article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="1.0" article-type="$article.type$"> 26 | $else$ 27 | <article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="1.0" article-type="other"> 28 | $endif$ 29 | <front> 30 | <journal-meta> 31 | $if(journal.publisher-id)$ 32 | <journal-id journal-id-type="publisher-id">$journal.publisher-id$</journal-id> 33 | $endif$ 34 | $if(journal.nlm-ta)$ 35 | <journal-id journal-id-type="nlm-ta">$journal.nlm-ta$</journal-id> 36 | $endif$ 37 | $if(journal.pmc)$ 38 | <journal-id journal-id-type="pmc">$journal.pmc$</journal-id> 39 | $endif$ 40 | <journal-title-group> 41 | $if(journal.title)$ 42 | <journal-title>$journal.title$</journal-title> 43 | $endif$ 44 | $if(journal.abbrev-title)$ 45 | <abbrev-journal-title>$journal.abbrev-title$</abbrev-journal-title> 46 | $endif$ 47 | </journal-title-group> 48 | $if(journal.pissn)$ 49 | <issn pub-type="ppub">$journal.pissn$</issn> 50 | $endif$ 51 | $if(journal.eissn)$ 52 | <issn pub-type="epub">$journal.eissn$</issn> 53 | $endif$ 54 | <publisher> 55 | <publisher-name>$journal.publisher-name$</publisher-name> 56 | $if(journal.publisher-loc)$ 57 | <publisher-loc>$journal.publisher-loc$</publisher-loc> 58 | $endif$ 59 | </publisher> 60 | </journal-meta> 61 | <article-meta> 62 | $if(article.publisher-id)$ 63 | <article-id pub-id-type="publisher-id">$article.publisher-id$</article-id> 64 | $endif$ 65 | $if(article.doi)$ 66 | <article-id pub-id-type="doi">$article.doi$</article-id> 67 | $endif$ 68 | $if(article.pmid)$ 69 | <article-id pub-id-type="pmid">$article.pmid$</article-id> 70 | $endif$ 71 | $if(article.pmcid)$ 72 | <article-id pub-id-type="pmcid">$article.pmcid$</article-id> 73 | $endif$ 74 | $if(article.art-access-id)$ 75 | <article-id pub-id-type="art-access-id">$article.art-access-id$</article-id> 76 | $endif$ 77 | $if(article.heading)$ 78 | <article-categories> 79 | <subj-group subj-group-type="heading"> 80 | <subject>$article.heading$</subject> 81 | </subj-group> 82 | $if(article.categories)$ 83 | <subj-group subj-group-type="categories"> 84 | $for(article.categories)$ 85 | <subject>$article.categories$</subject> 86 | $endfor$ 87 | </subj-group> 88 | $endif$ 89 | </article-categories> 90 | $endif$ 91 | $if(title)$ 92 | <title-group> 93 | <article-title>$title$</article-title> 94 | </title-group> 95 | $endif$ 96 | $if(author)$ 97 | <contrib-group> 98 | $for(author)$ 99 | <contrib contrib-type="author"> 100 | $if(author.orcid)$ 101 | <contrib-id contrib-id-type="orcid">$author.orcid$</contrib-id> 102 | $endif$ 103 | <name> 104 | $if(author.surname)$ 105 | <surname>$author.surname$</surname> 106 | <given-names>$author.given-names$</given-names> 107 | $else$ 108 | <string-name>$author$</string-name> 109 | $endif$ 110 | </name> 111 | $if(author.email)$ 112 | <email>$author.email$</email> 113 | $endif$ 114 | $if(author.aff-id)$ 115 | <xref ref-type="aff" rid="aff-$contrib.aff-id$"/> 116 | $endif$ 117 | $if(author.cor-id)$ 118 | <xref ref-type="corresp" rid="cor-$author.cor-id$"><sup>*</sup></xref> 119 | $endif$ 120 | </contrib> 121 | $endfor$ 122 | </contrib-group> 123 | $endif$ 124 | $if(article.author-notes)$ 125 | <author-notes> 126 | $if(article.author-notes.corresp)$ 127 | $for(article.author-notes.corresp)$ 128 | <corresp id="cor-$article.author-notes.corresp.id$">* E-mail: <email>$article.author-notes.corresp.email$</email></corresp> 129 | $endfor$ 130 | $endif$ 131 | $if(article.author-notes.conflict)$ 132 | <fn fn-type="conflict"><p>$article.author-notes.conflict$</p></fn> 133 | $endif$ 134 | $if(article.author-notes.con)$ 135 | <fn fn-type="con"><p>$article.author-notes.con$</p></fn> 136 | $endif$ 137 | </author-notes> 138 | $endif$ 139 | $if(date)$ 140 | $if(date.iso-8601)$ 141 | <pub-date pub-type="epub" iso-8601-date="$date.iso-8601$"> 142 | $else$ 143 | <pub-date pub-type="epub"> 144 | $endif$ 145 | $if(date.day)$ 146 | <day>$pub-date.day$</day> 147 | $endif$ 148 | $if(date.month)$ 149 | <month>$pub-date.month$</month> 150 | $endif$ 151 | $if(date.year)$ 152 | <year>$pub-date.year$</year> 153 | $else$ 154 | <string-date>$date$</string-date> 155 | $endif$ 156 | </pub-date> 157 | $endif$ 158 | $if(article.volume)$ 159 | <volume>$article.volume$</volume> 160 | $endif$ 161 | $if(article.issue)$ 162 | <issue>$article.issue$</issue> 163 | $endif$ 164 | $if(article.fpage)$ 165 | <fpage>$article.fpage$</fpage> 166 | $endif$ 167 | $if(article.lpage)$ 168 | <lpage>$article.lpage$</lpage> 169 | $endif$ 170 | $if(article.elocation-id)$ 171 | <elocation-id>$article.elocation-id$</elocation-id> 172 | $endif$ 173 | $if(history)$ 174 | <history> 175 | </history> 176 | $endif$ 177 | $if(copyright)$ 178 | <permissions> 179 | $if(copyright.statement)$ 180 | <copyright-statement>$copyright.statement$</copyright-statement> 181 | $endif$ 182 | $if(copyright.year)$ 183 | <copyright-year>$copyright.year$</copyright-year> 184 | $endif$ 185 | $if(copyright.holder)$ 186 | <copyright-holder>$copyright.holder$</copyright-holder> 187 | $endif$ 188 | $if(copyright.text)$ 189 | <license license-type="$copyright.type$" xlink:href="$copyright.link$"> 190 | <license-p>$copyright.text$</license-p> 191 | </license> 192 | </permissions> 193 | $endif$ 194 | $endif$ 195 | $if(tags)$ 196 | <kwd-group kwd-group-type="author"> 197 | $for(tags)$ 198 | <kwd>$tags$</kwd> 199 | $endfor$ 200 | </kwd-group> 201 | $endif$ 202 | $if(article.funding-statement)$ 203 | <funding-group> 204 | <funding-statement>$article.funding-statement$</funding-statement> 205 | </funding-group> 206 | $endif$ 207 | </article-meta> 208 | $if(notes)$ 209 | <notes>$notes$</notes> 210 | $endif$ 211 | </front> 212 | $body$ 213 | </article> 214 | ``` 215 | 216 | ### Citation Style (CSL) 217 | `pandoc-jats` uses the `jats.csl` citation style that is included. This style generates XML in JATS format, which is an interim solution as CSL is not really intended to generate XML. 218 | 219 | ### Metadata 220 | The metadata required for JATS can be stored in a YAML metadata block (new in Pandoc 1.12, the same format is also used by the Jekyll static blog generator. An example [from a recent blog post](http://blog.martinfenner.org/2013/12/11/what-can-article-level-metrics-do-for-you/) is below: 221 | 222 | ``` 223 | --- 224 | layout: post 225 | title: "What Can Article Level Metrics Do for You?" 226 | date: 2013-10-22 227 | tags: [example, markdown, article-level metrics, reproducibility] 228 | bibliography: examples/example.bib 229 | csl: examples/jats.csl 230 | article: 231 | type: research-article 232 | publisher-id: PBIOLOGY-D-13-03338 233 | doi: 10.1371/journal.pbio.1001687 234 | heading: Essay 235 | journal: 236 | publisher-id: plos 237 | publisher-name: Public Library of Science 238 | publisher-loc: San Francisco, USA 239 | nlm-ta: PLoS Biol 240 | pmc: plosbiol 241 | title: PLoS Biology 242 | eissn: 1545-7885 243 | pissn: 1544-9173 244 | author: 245 | - surname: Fenner 246 | given-names: Martin 247 | orcid: http://orcid.org/0000-0003-1419-2405 248 | email: mfenner@plos.org 249 | corresp: true 250 | copyright: 251 | holder: Martin Fenner 252 | year: 2013 253 | text: "This is an open-access article distributed under the terms of the Creative Commons Attribution License, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited." 254 | type: open-access 255 | link: http://creativecommons.org/licenses/by/3.0/ 256 | --- 257 | ``` 258 | 259 | The `article` and `journal` sections correspond to `<article-meta>` and `<journal-meta>` in JATS. The standard Pandoc metadata `title`, `author` and `date` are supported. 260 | 261 | ### Validation 262 | The JATS XML should be validated, for example with the excellent [jats-conversion](https://github.com/PeerJ/jats-conversion) tools written by Alf Eaton. 263 | 264 | ### To Do 265 | * supported for latest JATS version (1.1d3 in April 2015) 266 | * parsing for references in Lua instead of using a CSL style 267 | * more testing 268 | 269 | ### Feedback 270 | This tool needs extensive testing with as many markdown documents as possible. Please open an issue in the [Issue Tracker](https://github.com/mfenner/pandoc-jats/issues) if you find a conversion problem. 271 | 272 | ### Testing 273 | 274 | You'll need `busted` and `inspect` to run tests (you might need to install `luarocks` first): 275 | 276 | ``` 277 | luarocks install busted 278 | luarocks install inspect 279 | ``` 280 | 281 | Then run all tests from the root directory (the same directory as `jats.lua`): 282 | 283 | ``` 284 | busted 285 | ``` 286 | 287 | The tests in `spec/input_spec.lua` are generic and can be reused for other custom Lua writers. They currently test whether all functions called by Pandoc exist. To run these tests against the `sample.lua` custom writer included with Pandoc, do `busted spec/sample_spec.lua`. 288 | 289 | ### License 290 | © 2013-2015 Martin Fenner. Released under the [GPL](http://www.gnu.org/copyleft/gpl.html), version 2 or greater. See LICENSE for more info. 291 | -------------------------------------------------------------------------------- /default.jats: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="utf-8" ?> 2 | <!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.0 20120330//EN" 3 | "JATS-journalpublishing1.dtd"> 4 | $if(article.type)$ 5 | <article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="1.0" article-type="$article.type$"> 6 | $else$ 7 | <article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="1.0" article-type="other"> 8 | $endif$ 9 | <front> 10 | <journal-meta> 11 | $if(journal.publisher-id)$ 12 | <journal-id journal-id-type="publisher-id">$journal.publisher-id$</journal-id> 13 | $endif$ 14 | $if(journal.nlm-ta)$ 15 | <journal-id journal-id-type="nlm-ta">$journal.nlm-ta$</journal-id> 16 | $endif$ 17 | $if(journal.pmc)$ 18 | <journal-id journal-id-type="pmc">$journal.pmc$</journal-id> 19 | $endif$ 20 | <journal-title-group> 21 | $if(journal.title)$ 22 | <journal-title>$journal.title$</journal-title> 23 | $endif$ 24 | $if(journal.abbrev-title)$ 25 | <abbrev-journal-title>$journal.abbrev-title$</abbrev-journal-title> 26 | $endif$ 27 | </journal-title-group> 28 | $if(journal.pissn)$ 29 | <issn pub-type="ppub">$journal.pissn$</issn> 30 | $endif$ 31 | $if(journal.eissn)$ 32 | <issn pub-type="epub">$journal.eissn$</issn> 33 | $endif$ 34 | <publisher> 35 | <publisher-name>$journal.publisher-name$</publisher-name> 36 | $if(journal.publisher-loc)$ 37 | <publisher-loc>$journal.publisher-loc$</publisher-loc> 38 | $endif$ 39 | </publisher> 40 | </journal-meta> 41 | <article-meta> 42 | $if(article.publisher-id)$ 43 | <article-id pub-id-type="publisher-id">$article.publisher-id$</article-id> 44 | $endif$ 45 | $if(article.doi)$ 46 | <article-id pub-id-type="doi">$article.doi$</article-id> 47 | $endif$ 48 | $if(article.pmid)$ 49 | <article-id pub-id-type="pmid">$article.pmid$</article-id> 50 | $endif$ 51 | $if(article.pmcid)$ 52 | <article-id pub-id-type="pmcid">$article.pmcid$</article-id> 53 | $endif$ 54 | $if(article.art-access-id)$ 55 | <article-id pub-id-type="art-access-id">$article.art-access-id$</article-id> 56 | $endif$ 57 | $if(article.heading)$ 58 | <article-categories> 59 | <subj-group subj-group-type="heading"> 60 | <subject>$article.heading$</subject> 61 | </subj-group> 62 | $if(article.categories)$ 63 | <subj-group subj-group-type="categories"> 64 | $for(article.categories)$ 65 | <subject>$article.categories$</subject> 66 | $endfor$ 67 | </subj-group> 68 | $endif$ 69 | </article-categories> 70 | $endif$ 71 | $if(title)$ 72 | <title-group> 73 | <article-title>$title$</article-title> 74 | </title-group> 75 | $endif$ 76 | $if(author)$ 77 | <contrib-group> 78 | $for(author)$ 79 | <contrib contrib-type="author"> 80 | $if(author.orcid)$ 81 | <contrib-id contrib-id-type="orcid">$author.orcid$</contrib-id> 82 | $endif$ 83 | <name> 84 | $if(author.surname)$ 85 | <surname>$author.surname$</surname> 86 | <given-names>$author.given-names$</given-names> 87 | $else$ 88 | <string-name>$author$</string-name> 89 | $endif$ 90 | </name> 91 | $if(author.email)$ 92 | <email>$author.email$</email> 93 | $endif$ 94 | $if(author.aff-id)$ 95 | <xref ref-type="aff" rid="aff-$contrib.aff-id$"/> 96 | $endif$ 97 | $if(author.cor-id)$ 98 | <xref ref-type="corresp" rid="cor-$author.cor-id$"><sup>*</sup></xref> 99 | $endif$ 100 | </contrib> 101 | $endfor$ 102 | </contrib-group> 103 | $endif$ 104 | $if(article.author-notes)$ 105 | <author-notes> 106 | $if(article.author-notes.corresp)$ 107 | $for(article.author-notes.corresp)$ 108 | <corresp id="cor-$article.author-notes.corresp.id$">* E-mail: <email>$article.author-notes.corresp.email$</email></corresp> 109 | $endfor$ 110 | $endif$ 111 | $if(article.author-notes.conflict)$ 112 | <fn fn-type="conflict"><p>$article.author-notes.conflict$</p></fn> 113 | $endif$ 114 | $if(article.author-notes.con)$ 115 | <fn fn-type="con"><p>$article.author-notes.con$</p></fn> 116 | $endif$ 117 | </author-notes> 118 | $endif$ 119 | $if(date)$ 120 | $if(date.iso-8601)$ 121 | <pub-date pub-type="epub" iso-8601-date="$date.iso-8601$"> 122 | $else$ 123 | <pub-date pub-type="epub"> 124 | $endif$ 125 | $if(date.day)$ 126 | <day>$pub-date.day$</day> 127 | $endif$ 128 | $if(date.month)$ 129 | <month>$pub-date.month$</month> 130 | $endif$ 131 | $if(date.year)$ 132 | <year>$pub-date.year$</year> 133 | $else$ 134 | <string-date>$date$</string-date> 135 | $endif$ 136 | </pub-date> 137 | $endif$ 138 | $if(article.volume)$ 139 | <volume>$article.volume$</volume> 140 | $endif$ 141 | $if(article.issue)$ 142 | <issue>$article.issue$</issue> 143 | $endif$ 144 | $if(article.fpage)$ 145 | <fpage>$article.fpage$</fpage> 146 | $endif$ 147 | $if(article.lpage)$ 148 | <lpage>$article.lpage$</lpage> 149 | $endif$ 150 | $if(article.elocation-id)$ 151 | <elocation-id>$article.elocation-id$</elocation-id> 152 | $endif$ 153 | $if(history)$ 154 | <history> 155 | </history> 156 | $endif$ 157 | $if(copyright)$ 158 | <permissions> 159 | $if(copyright.statement)$ 160 | <copyright-statement>$copyright.statement$</copyright-statement> 161 | $endif$ 162 | $if(copyright.year)$ 163 | <copyright-year>$copyright.year$</copyright-year> 164 | $endif$ 165 | $if(copyright.holder)$ 166 | <copyright-holder>$copyright.holder$</copyright-holder> 167 | $endif$ 168 | $if(copyright.text)$ 169 | <license license-type="$copyright.type$" xlink:href="$copyright.link$"> 170 | <license-p>$copyright.text$</license-p> 171 | </license> 172 | </permissions> 173 | $endif$ 174 | $endif$ 175 | $if(tags)$ 176 | <kwd-group kwd-group-type="author"> 177 | $for(tags)$ 178 | <kwd>$tags$</kwd> 179 | $endfor$ 180 | </kwd-group> 181 | $endif$ 182 | $if(article.funding-statement)$ 183 | <funding-group> 184 | <funding-statement>$article.funding-statement$</funding-statement> 185 | </funding-group> 186 | $endif$ 187 | </article-meta> 188 | $if(notes)$ 189 | <notes>$notes$</notes> 190 | $endif$ 191 | </front> 192 | $body$ 193 | </article> 194 | -------------------------------------------------------------------------------- /examples/example.bib: -------------------------------------------------------------------------------- 1 | @article{Kafkas:2013fp, 2 | author = {Kafkas, {\c S}enay and Kim, Jee-Hyub and McEntyre, Johanna R}, 3 | title = {{Database Citation in Full Text Biomedical Articles}}, 4 | journal = {PLoS ONE}, 5 | year = {2013}, 6 | url = {http://dx.doi.org/10.1371/journal.pone.0063184}, 7 | } 8 | 9 | @article{borer2009, title={Some Simple Guidelines for Effective Data Management}, volume={90}, url={http://dx.doi.org/10.1890/0012-9623-90.2.205}, number={2}, journal={Bulletin of the Ecological Society of America}, publisher={Ecological Society of America}, author={Borer, Elizabeth T. and Seabloom, Eric W. and Jones, Matthew B. and Schildhauer, Mark}, year={2009}, month={Apr}, pages={205-214}} 10 | 11 | @inproceedings{brunt2002, 12 | title={The future of ecoinformatics in long term ecological research}, 13 | author={Brunt, James W and McCartney, Peter and Baker, Karen and Stafford, Susan G}, 14 | booktitle={Proceedings of the 6th World Multiconference on Systemics, Cybernetics and Informatics: SCI}, 15 | pages={14--18}, 16 | year={2002} 17 | } 18 | 19 | @article{darcy2009, 20 | title={Can the Relationship between Doctors and Drug Companies Ever Be a Healthy One?}, 21 | volume={6}, 22 | url={http://dx.doi.org/10.1371/journal.pmed.1000075}, 23 | number={7}, 24 | journal={PLoS Medicine}, 25 | publisher={Public Library of Science}, 26 | author={D'Arcy, Emma and Moynihan, Ray}, 27 | year={2009}, 28 | month={Jul}, 29 | pages={e1000075} 30 | } 31 | 32 | @article{fenner2011, title={Key Issue: Collective Action for the Open Researcher & Contributor ID (ORCID)}, 33 | volume={24}, 34 | url={http://dx.doi.org/10.1629/24277}, 35 | number={3}, 36 | journal={Serials: The Journal for the Serials Community}, 37 | publisher={UKSG}, 38 | author={Fenner, Martin and Gmez, Consol Garcia and Thorisson, Gudmundur A}, 39 | year={2011}, 40 | month={Nov}, 41 | pages={277-279}} 42 | 43 | @article{fenner2013, 44 | author = {Fenner, Martin}, 45 | journal = {PLoS Biol}, 46 | publisher = {Public Library of Science}, 47 | title = {What Can Article-Level Metrics Do for You?}, 48 | year = {2013}, 49 | month = {10}, 50 | volume = {11}, 51 | url = {http://dx.doi.org/10.1371%2Fjournal.pbio.1001687}, 52 | pages = {e1001687}, 53 | number = {10}, 54 | doi = {10.1371/journal.pbio.1001687} 55 | } 56 | 57 | @book{fienberg1985, 58 | title={Sharing research data}, 59 | author={Fienberg, Stephen E and Martin, Margaret E}, 60 | year={1985}, 61 | publisher={Natl Academy Pr} 62 | } 63 | 64 | @article{fraser1999, title={Usability of Geospatial Metadata or Space-Time Matters}, volume={25}, url={http://dx.doi.org/10.1002/bult.134}, number={6}, journal={Bulletin of the American Society for Information Science and Technology}, publisher={Wiley Blackwell (John Wiley & Sons)}, author={Fraser, Bruce and Gluck, Myke}, year={1999}, month={Aug}, pages={24-28}} 65 | 66 | @article{jones2006, title={The New Bioinformatics: Integrating Ecological Data from the Gene to the Biosphere}, volume={37}, url={http://dx.doi.org/10.1146/annurev.ecolsys.37.091305.110031}, number={1}, journal={Annual Review of Ecology, Evolution, and Systematics}, publisher={Annual Reviews}, author={Jones, Matthew B. and Schildhauer, Mark P. and Reichman, O.J. and Bowers, Shawn}, year={2006}, month={Dec}, pages={519-54}} 67 | 68 | @article{hampton2012, 69 | title={Ecological data in the Information Age}, 70 | author={Hampton, Stephanie E and Tewksbury, Joshua J and Strasser, Carly A}, 71 | journal={Frontiers in Ecology and the Environment}, 72 | volume={10}, 73 | number={2}, 74 | pages={59--59}, 75 | year={2012}, 76 | publisher={Eco Soc America} 77 | } 78 | 79 | @article{hampton2013, title={Big data and the future of ecology}, volume={11}, url={http://dx.doi.org/10.1890/120103}, number={3}, journal={Frontiers in Ecology and the Environment}, publisher={Ecological Society of America}, author={Hampton, Stephanie E and Strasser, Carly A and Tewksbury, Joshua J and Gram, Wendy K and Budden, Amber E and Batcheller, Archer L and Duke, Clifford S and Porter, John H}, year={2013}, month={Apr}, pages={156-162}} 80 | 81 | @article {kowalczyk2011, 82 | author = {Kowalczyk, Stacy and Shankar, Kalpana}, 83 | title = {Data sharing in the sciences}, 84 | journal = {Annual Review of Information Science and Technology}, 85 | volume = {45}, 86 | number = {1}, 87 | publisher = {Wiley Subscription Services, Inc., A Wiley Company}, 88 | issn = {1550-8382}, 89 | url = {http://dx.doi.org/10.1002/aris.2011.1440450113}, 90 | pages = {247--294}, 91 | year = {2011}, 92 | } 93 | 94 | @article{lampe1998, 95 | title={Data capture from the sponsors' and investigators' perspectives: Balancing quality, speed, and cost}, 96 | author={Lampe, AJ and Weiler, JM}, 97 | journal={Drug Information Journal}, 98 | volume={32}, 99 | number={4}, 100 | pages={871--886}, 101 | year={1998}, 102 | publisher={Drug Information Association} 103 | } 104 | 105 | @article{lord2013, url = {http://www.russet.org.uk/blog/2071}, title = {Greycite: Citing the Web}, journal = {An Exercise in Irrelevance}, YEAR = {2012}, howpublished = {\url{http://www.russet.org.uk/blog/2071}}, author = {Lord, Phillip and Marshall, Lindsay}, archived = {http://greycite.knowledgeblog.org/?uri=http%3A%2F%2Fwww.russet.org.uk%2Fblog%2F2071, http://wayback.archive.org/web/http://www.russet.org.uk/blog/2071, http://www.webarchive.org.uk/wayback/archive/http://www.russet.org.uk/blog/2071, http://webcitation.org/query.php?url=http%3A%2F%2Fwww.russet.org.uk%2Fblog%2F2071}, } 106 | 107 | @article{michener1997, 108 | title={Nongeospatial metadata for the ecological sciences}, 109 | author={Michener, William K and Brunt, James W and Helly, John J and Kirchner, Thomas B and Stafford, Susan G}, 110 | journal={Ecological Applications}, 111 | volume={7}, 112 | number={1}, 113 | pages={330--342}, 114 | year={1997}, 115 | publisher={Eco Soc America}, 116 | url={http://dx.doi.org/10.1890/1051-0761(1997)007[0330:nmftes]2.0.co;2} 117 | } 118 | 119 | @book{michener2009, 120 | title={Ecological data: Design, management and processing}, 121 | author={Michener, William K and Brunt, James W}, 122 | year={2009}, 123 | publisher={Wiley-Blackwell} 124 | } 125 | 126 | @article{moher2009, 127 | title={Preferred Reporting Items for Systematic Reviews and Meta-Analyses: The PRISMA Statement}, 128 | volume={6}, 129 | url={http://dx.doi.org/10.1371/journal.pmed.1000097}, 130 | number={7}, 131 | journal={PLoS Medicine}, 132 | publisher={Public Library of Science}, 133 | author={Moher, David and Liberati, Alessandro and Tetzlaff, Jennifer and Altman, Douglas G.}, 134 | year={2009}, 135 | month={Jul}, 136 | pages={e1000097} 137 | } 138 | 139 | @article{palmer2004, 140 | title={Ecological Science and Sustainability for a Crowded Planet}, 141 | author={Palmer, Margaret A. and Bernhardt, Emily S. and Chornesky, Elizabeth A. and Collins, Scott L. and Dobson, Andrew P. and Duke, Clifford S. and Gold, Barry D. and Jacobson, Robert and Kingsland, Sharon and Kranz, Rhonda and Mappin, Michael J. and Martinez, M. Luisa and Micheli, Fiorenza and Morse, Jennifer L. and Pace, Michael L. and Pascual, Mercedes and Palumbi, Stephen and Reichman, O.J. and Townsend, Alan and Turner, Monica G.}, 142 | year={2004}, 143 | url={http://www.esa.org/ecovisions/ppfiles/EcologicalVisionsReport.pdf} 144 | } 145 | 146 | @article{parr2005, title={Data sharing in ecology and evolution}, volume={20}, url={http://dx.doi.org/10.1016/j.tree.2005.04.023}, number={7}, journal={Trends in Ecology & Evolution}, publisher={Elsevier}, author={Parr, C and Cummings, M}, year={2005}, month={Jul}, pages={362-363}} 147 | 148 | @article{paulsen2012, 149 | title={Quality of Data Entry Using Single Entry, Double Entry and Automated Forms Processing--An Example Based on a Study of Patient-Reported Outcomes}, 150 | author={Paulsen, Aksel and Overgaard, S{\o}ren and Lauritsen, Jens Martin}, 151 | journal={PloS ONE}, 152 | volume={7}, 153 | number={4}, 154 | pages={e35087}, 155 | year={2012}, 156 | url={http://dx.doi.org/10.1371/journal.pone.0035087}, 157 | publisher={Public Library of Science} 158 | } 159 | 160 | @article{piwowar2007, title={Sharing Detailed Research Data Is Associated with Increased Citation Rate}, volume={2}, url={http://dx.doi.org/10.1371/journal.pone.0000308}, number={3}, journal={PLoS ONE}, publisher={Public Library of Science}, author={Piwowar, Heather A. and Day, Roger S. and Fridsma, Douglas B.}, year={2007}, month={Mar}, pages={e308}} 161 | 162 | @inproceedings{piwowar2008, 163 | title={A review of journal policies for sharing research data}, 164 | author={Piwowar, Heather A and Chapman, Wendy W}, 165 | booktitle={ELPUB2008}, 166 | year={2008} 167 | } 168 | 169 | @article{piwowar2013, 170 | title={Data reuse and the open data citation advantage}, 171 | author={Piwowar, Heather A and Vision, Todd J}, 172 | journal={PeerJ PrePrints}, 173 | volume={1}, 174 | pages={e1}, 175 | year={2013}, 176 | url={http://dx.doi.org/10.7287/peerj.preprints.1} 177 | } 178 | 179 | @article{poisot2013, 180 | title={Moving toward a sustainable ecological science: don't let data go to waste!}, 181 | author={Poisot, Timothe and Mounce, Ross and Gravel, Dominique}, 182 | year={2013}, 183 | url={https://github.com/tpoisot/DataSharingPaper/blob/master/DataSharing-MS.md} 184 | } 185 | 186 | @article{reichman2011, title={Challenges and Opportunities of Open Data in Ecology}, volume={331}, url={http://dx.doi.org/10.1126/science.1197962}, number={6018}, journal={Science}, publisher={American Association for the Advancement of Science}, author={Reichman, O. J. and Jones, M. B. and Schildhauer, M. P.}, year={2011}, month={Feb}, pages={703-705}} 187 | 188 | @article{schofield2009, title={Post-publication sharing of data and tools}, volume={461}, url={http://dx.doi.org/10.1038/461171a}, number={7261}, journal={Nature}, publisher={Nature Publishing Group}, author={Schofield, Paul N. and Bubela, Tania and Weaver, Thomas and Portilla, Lili and Brown, Stephen D. and Hancock, John M. and Einhorn, David and Tocchini-Valentini, Glauco and Hrabe de Angelis, Martin and Rosenthal, Nadia}, year={2009}, month={Sep}, pages={171-173}} 189 | 190 | @inproceedings{strasser2011, 191 | title={Promoting Data Stewardship Through Best Practices}, 192 | author={Strasser, Carly A and Cook, Robert B and Michener, William K and Budden, Amber and Koskela, Rebecca}, 193 | booktitle={Proceedings of the Environmental Information Management Conference 2011 (EIM 2011)}, 194 | year={2011}, 195 | organization={Oak Ridge National Laboratory (ORNL)} 196 | } 197 | 198 | @data{strasser2012, 199 | url = {http://dx.doi.org/10.5060/D2251G48}, 200 | author={Strasser, Carly A and Cook, Robert and Michener, William K and Budden, Amber}, 201 | publisher = {DataONE}, 202 | title = {Primer on Data Management: What you always wanted to know}, 203 | year = {2012} 204 | } 205 | 206 | @article{white2013, url = {http://blog.martinfenner.org/2013/06/25/nine-simple-ways-to-make-it-easier-to-reuse-your-data}, title = {Nine simple ways to make it easier to (re)use your data}, journal = {Gobbledygook}, YEAR = {2013}, howpublished = {\url{http://blog.martinfenner.org/2013/06/25/nine-simple-ways-to-make-it-easier-to-reuse-your-data}}, author = {Ethan P. White, Elita Baldrige, Zachary T. Brym, Kenneth J. Locey, Daniel J. McGlinn, Sarah R. Supp}, archived = {http://greycite.knowledgeblog.org/?uri=http%3A%2F%2Fblog.martinfenner.org%2F2013%2F06%2F25%2Fnine-simple-ways-to-make-it-easier-to-reuse-your-data%2F, http://webcitation.org/query.php?url=http%3A%2F%2Fblog.martinfenner.org%2F2013%2F06%2F25%2Fnine-simple-ways-to-make-it-easier-to-reuse-your-data%2F}, } 207 | 208 | @article{whitlock2010, title={Data Archiving}, volume={175}, doi={10.1086/650340}, url={http://dx.doi.org/10.1086/650340}, number={2}, journal={The American Naturalist}, publisher={The University of Chicago Press}, author={Whitlock, Michael C. and McPeek, Mark A. and Rausher, Mark D. and Rieseberg, Loren and Moore, Allen J.}, year={2010}, month={Feb}, pages={145-146}} 209 | 210 | @article{Whitlock2011, title={Data archiving in ecology and evolution: best practices}, volume={26}, url={http://dx.doi.org/10.1016/j.tree.2010.11.006}, number={2}, journal={Trends in Ecology & Evolution}, publisher={Elsevier}, author={Whitlock, Michael C.}, year={2011}, month={Feb}, pages={61-65}} 211 | 212 | @article{wickham2007, 213 | author = {Wickham, Hadley}, 214 | journal = {Journal of Statistical Software}, 215 | number = {12}, 216 | title = {Reshaping data with the reshape package}, 217 | url = {http://www.jstatsoft.org/v21/i12/paper}, 218 | volume = {21}, 219 | year = {2007} 220 | } 221 | 222 | @article{wieczorek2012, title={Darwin Core: An Evolving Community-Developed Biodiversity Data Standard}, volume={7}, doi={10.1371/journal.pone.0029715}, url={http://dx.doi.org/10.1371/journal.pone.0029715}, number={1}, journal={PLoS ONE}, publisher={Public Library of Science}, author={Wieczorek, John and Bloom, David and Guralnick, Robert and Blum, Stan and D{\"o}ring, Markus and Giovanni, Renato and Robertson, Tim and Vieglais, David}, year={2012}, month={Jan}, pages={e29715}} 223 | 224 | @phdthesis{zimmerman2003, 225 | title={Data sharing and secondary use of scientific data: Experiences of ecologists}, 226 | author={Zimmerman, Ann S}, 227 | year={2003}, 228 | school={The University of Michigan} 229 | } 230 | 231 | @article{zimmerman2007, title={Not by metadata alone: the use of diverse forms of knowledge to locate data for reuse}, volume={7}, doi={10.1007/s00799-007-0015-8}, url={http://dx.doi.org/10.1007/s00799-007-0015-8}, number={1-2}, journal={International Journal on Digital Libraries}, publisher={Springer-Verlag}, author={Zimmerman, Ann}, year={2007}, month={Oct}, pages={5-16}} 232 | 233 | @article{Dickson:2010ix, 234 | author = {Dickson, Samuel P and Wang, Kai and Krantz, Ian and Hakonarson, Hakon and Goldstein, David B}, 235 | doi = {10.1371/journal.pbio.1000294}, 236 | issn = {1545-7885}, 237 | journal = {PLOS Biology}, 238 | keywords = {Anemia,Genetic Predisposition to Disease,Genome-Wide Association Study,Genotype,Hearing Loss,Hearing Loss: genetics,Humans,Polymorphism,Probability,Risk Factors,Sickle Cell,Sickle Cell: genetics,Single Nucleotide}, 239 | month = jan, 240 | number = {1}, 241 | pages = {e1000294}, 242 | pmid = {20126254}, 243 | title = {{Rare variants create synthetic genome-wide associations.}}, 244 | volume = {8}, 245 | year = {2010} 246 | } 247 | @article{Narendra:2010fw, 248 | author = {Narendra, Derek P and Jin, Seok Min and Tanaka, Atsushi and Suen, Der-Fen and Gautier, Clement A and Shen, Jie and Cookson, Mark R and Youle, Richard J}, 249 | doi = {10.1371/journal.pbio.1000298}, 250 | issn = {1545-7885}, 251 | journal = {PLOS Biology}, 252 | keywords = {Animals,Biological,HeLa Cells,Humans,Membrane Potential,Metalloproteases,Metalloproteases: metabolism,Mice,Mitochondria,Mitochondria: metabolism,Mitochondria: physiology,Mitochondrial,Mitochondrial Membranes,Mitochondrial Membranes: metabolism,Models,Parkinson Disease,Parkinson Disease: genetics,Protein Kinases,Protein Kinases: genetics,Protein Kinases: metabolism,Rats,Ubiquitin-Protein Ligases,Ubiquitin-Protein Ligases: genetics,Ubiquitin-Protein Ligases: metabolism}, 253 | month = jan, 254 | number = {1}, 255 | pages = {e1000298}, 256 | pmid = {20126261}, 257 | title = {{PINK1 is selectively stabilized on impaired mitochondria to activate Parkin.}}, 258 | volume = {8}, 259 | year = {2010} 260 | } 261 | @article{bollenEtal2009, 262 | author = {Bollen, Johan and de Sompel, Herbert and Hagberg, Aric and Chute, Ryan}, 263 | doi = {10.1371/journal.pone.0006022}, 264 | journal = {PLoS ONE}, 265 | number = {6}, 266 | pages = {e6022}, 267 | publisher = {Public Library of Science}, 268 | title = {{A Principal Component Analysis of 39 Scientific Impact Measures}}, 269 | volume = {4}, 270 | year = {2009} 271 | } 272 | @article{Schekman2013, 273 | author = {Schekman, Randy and Patterson, Mark}, 274 | doi = {10.7554/eLife.00855}, 275 | issn = {2050-084X}, 276 | journal = {eLife}, 277 | month = may, 278 | pages = {e00855}, 279 | title = {{Reforming research assessment}}, 280 | volume = {2}, 281 | year = {2013} 282 | } 283 | @article{Campbell2008, 284 | author = {Campbell, Philip}, 285 | doi = {10.3354/esep00078}, 286 | issn = {1863-5415}, 287 | journal = {Ethics in Science and Environmental Politics}, 288 | month = jun, 289 | pages = {5--7}, 290 | title = {{Escape from the impact factor}}, 291 | type = {Journal article}, 292 | volume = {8}, 293 | year = {2008} 294 | } 295 | @misc{Glanzel2013, 296 | author = {Gl\"{a}nzel, Wolfgang and Wouters, Paul}, 297 | title = {{The dos and don'ts in individudal level bibliometrics}}, 298 | url = {http://de.slideshare.net/paulwouters1/issi2013-wg-pw}, 299 | year = {2013} 300 | } 301 | @article{VanEck2013, 302 | author = {van Eck, Nees Jan and Waltman, Ludo and van Raan, Anthony F J and Klautz, Robert J M and Peul, Wilco C}, 303 | doi = {10.1371/journal.pone.0062395}, 304 | editor = {Lovis, Christian}, 305 | issn = {1932-6203}, 306 | journal = {PLOS ONE}, 307 | keywords = {Academic medicine,Bibliometrics,Communications,Health informatics,Information science,Journalism,Mathematics,Medical journals,Medicine,Non-clinical medicine,Publication practices,Research Article,Research assessment,Science policy,Social and behavioral sciences,Statistics}, 308 | month = jan, 309 | number = {4}, 310 | pages = {e62395}, 311 | pmid = {23638064}, 312 | publisher = {Public Library of Science}, 313 | title = {{Citation analysis may severely underestimate the impact of clinical research as compared to basic research.}}, 314 | volume = {8}, 315 | year = {2013} 316 | } 317 | @article{Eysenbach2006, 318 | author = {Eysenbach, Gunther}, 319 | doi = {10.1371/journal.pbio.0040157}, 320 | editor = {Tenopir, Carol}, 321 | issn = {1545-7885}, 322 | journal = {PLOS Biology}, 323 | keywords = {Bibliometrics,Biomedical Research,Biomedical Research: statistics \& numerical data,Biomedical Research: trends,Internet,Internet: statistics \& numerical data,Internet: trends,Internet: utilization,Linear Models,Odds Ratio,Periodicals as Topic,Periodicals as Topic: economics,Periodicals as Topic: statistics \& numerical data,Periodicals as Topic: trends,Public Sector,Publishing,Publishing: economics,Publishing: statistics \& numerical data,Publishing: trends}, 324 | month = may, 325 | number = {5}, 326 | pages = {e157}, 327 | pmid = {16683865}, 328 | publisher = {Public Library of Science}, 329 | title = {{Citation advantage of open access articles.}}, 330 | volume = {4}, 331 | year = {2006} 332 | } 333 | @misc{Plum2013, 334 | title = {{Plum Analytics}}, 335 | url = {http://www.plumanalytics.com/}, 336 | year = {2013} 337 | } 338 | @misc{Altmetric2013, 339 | title = {{Altmetric.com}}, 340 | url = {http://www.altmetric.com/}, 341 | year = {2013} 342 | } 343 | @misc{Impactstory2013, 344 | title = {{ImpactStory}}, 345 | url = {http://impactstory.org/}, 346 | year = {2013} 347 | } 348 | @misc{NISO2013, 349 | title = {{NISO Alternative Assessment Metrics (Altmetrics) Project}}, 350 | url = {http://www.niso.org/topics/tl/altmetrics/initiative}, 351 | year = {2013} 352 | } 353 | @article{Dalloul2010, 354 | author = {Dalloul, Rami A and Long, Julie A and Zimin, Aleksey V and Aslam, Luqman and Beal, Kathryn and Blomberg, Le Ann and Bouffard, Pascal and Burt, David W and Crasta, Oswald and Crooijmans, Richard P M A and Cooper, Kristal and Coulombe, Roger A and De, Supriyo and Delany, Mary E and Dodgson, Jerry B and Dong, Jennifer J and Evans, Clive and Frederickson, Karin M and Flicek, Paul and Florea, Liliana and Folkerts, Otto and Groenen, Martien A M and Harkins, Tim T and Herrero, Javier and Hoffmann, Steve and Megens, Hendrik-Jan and Jiang, Andrew and de Jong, Pieter and Kaiser, Pete and Kim, Heebal and Kim, Kyu-Won and Kim, Sungwon and Langenberger, David and Lee, Mi-Kyung and Lee, Taeheon and Mane, Shrinivasrao and Marcais, Guillaume and Marz, Manja and McElroy, Audrey P and Modise, Thero and Nefedov, Mikhail and Notredame, C\'{e}dric and Paton, Ian R and Payne, William S and Pertea, Geo and Prickett, Dennis and Puiu, Daniela and Qioa, Dan and Raineri, Emanuele and Ruffier, Magali and Salzberg, Steven L and Schatz, Michael C and Scheuring, Chantel and Schmidt, Carl J and Schroeder, Steven and Searle, Stephen M J and Smith, Edward J and Smith, Jacqueline and Sonstegard, Tad S and Stadler, Peter F and Tafer, Hakim and Tu, Zhijian Jake and {Van Tassell}, Curtis P and Vilella, Albert J and Williams, Kelly P and Yorke, James A and Zhang, Liqing and Zhang, Hong-Bin and Zhang, Xiaojun and Zhang, Yang and Reed, Kent M}, 355 | doi = {10.1371/journal.pbio.1000475}, 356 | issn = {1545-7885}, 357 | journal = {PLOS Biology}, 358 | keywords = {Animals,Base Sequence,Chromosome Mapping,DNA,DNA: genetics,Genome,Polymorphism, Single Nucleotide,Sequence Analysis, DNA,Sequence Homology, Nucleic Acid,Species Specificity,Turkeys,Turkeys: genetics}, 359 | month = jan, 360 | number = {9}, 361 | pmid = {20838655}, 362 | title = {{Multi-platform next-generation sequencing of the domestic turkey (Meleagris gallopavo): genome assembly and analysis.}}, 363 | volume = {8}, 364 | year = {2010} 365 | } 366 | @article{Fausto2012, 367 | author = {Fausto, Sibele and Machado, Fabio A and Bento, Luiz Fernando J and Iamarino, Atila and Nahas, Tatiana R and Munger, David S}, 368 | doi = {10.1371/journal.pone.0050109}, 369 | editor = {Perc, Matjaz}, 370 | issn = {1932-6203}, 371 | journal = {PLOS ONE}, 372 | keywords = {Blogging,Communication,Humans,Information Dissemination,Information Dissemination: methods,Internet,Research,Science,Technology}, 373 | month = jan, 374 | number = {12}, 375 | pages = {e50109}, 376 | pmid = {23251358}, 377 | publisher = {Public Library of Science}, 378 | title = {{Research blogging: indexing and registering the change in science 2.0.}}, 379 | volume = {7}, 380 | year = {2012} 381 | } 382 | @misc{Video2012, 383 | booktitle = {The OpenHelix Blog}, 384 | title = {{Video Tip of the Week: Turkeys and their genomes}}, 385 | url = {http://blog.openhelix.eu/?p=14388}, 386 | year = {2012} 387 | } 388 | 389 | @article{Floreano2010, 390 | author = {Floreano, Dario and Keller, Laurent}, 391 | doi = {10.1371/journal.pbio.1000292}, 392 | issn = {1545-7885}, 393 | journal = {PLOS Biology}, 394 | keywords = {Artificial Intelligence,Biological Evolution,Genetic,Models,Neural Networks (Computer),Selection,Theoretical}, 395 | month = jan, 396 | number = {1}, 397 | pages = {e1000292}, 398 | pmid = {20126252}, 399 | publisher = {Public Library of Science}, 400 | title = {{Evolution of adaptive behaviour in robots by means of Darwinian selection.}}, 401 | volume = {8}, 402 | year = {2010} 403 | } 404 | @article{Lin2013, 405 | author = {Lin, Jennifer and Fenner, Martin}, 406 | doi = {10.3789/isqv25no2.2013.04}, 407 | issn = {1041-0031}, 408 | journal = {Information Standards Quarterly}, 409 | month = jan, 410 | number = {2}, 411 | pages = {20}, 412 | title = {{Altmetrics in Evolution: Defining and Redefining the Ontology of Article-Level Metrics}}, 413 | volume = {25}, 414 | year = {2013} 415 | } 416 | @article{Bik2013, 417 | author = {Bik, Holly M and Goldstein, Miriam C}, 418 | doi = {10.1371/journal.pbio.1001535}, 419 | issn = {1545-7885}, 420 | journal = {PLOS Biology}, 421 | keywords = {Congresses as Topic,Humans,Laboratory Personnel,Social Media,Social Media: statistics \& numerical data,Social Media: trends,Social Networking,Software}, 422 | month = jan, 423 | number = {4}, 424 | pages = {e1001535}, 425 | pmid = {23630451}, 426 | publisher = {Public Library of Science}, 427 | title = {{An introduction to social media for scientists.}}, 428 | volume = {11}, 429 | year = {2013} 430 | } 431 | @misc{Tasmanian2013, 432 | booktitle = {Wikipedia}, 433 | title = {{Tasmanian devil}}, 434 | url = {http://en.wikipedia.org/wiki/Tasmanian\devil}, 435 | year = {2013} 436 | } 437 | 438 | @misc{ALM2013, 439 | title = {{ALM Reports}}, 440 | url = {http://almreports.plos.org}, 441 | year = {2013} 442 | } 443 | 444 | @article{Nilsson2010, 445 | author = {Nilsson, Maria A and Churakov, Gennady and Sommer, Mirjam and Tran, Ngoc Van and Zemann, Anja and Brosius, J\"{u}rgen and Schmitz, J\"{u}rgen}, 446 | doi = {10.1371/journal.pbio.1000436}, 447 | editor = {Penny, David}, 448 | issn = {1545-7885}, 449 | journal = {PLOS Biology}, 450 | keywords = {Animals,Biological Evolution,Genetic Markers,Genome,Genome: genetics,Macropodidae,Macropodidae: genetics,Mutagenesis, Insertional,Mutagenesis, Insertional: genetics,Opossums,Opossums: genetics,Phylogeny,Retroelements,Retroelements: genetics,Short Interspersed Nucleotide Elements,Short Interspersed Nucleotide Elements: genetics}, 451 | month = jan, 452 | number = {7}, 453 | pages = {e1000436}, 454 | pmid = {20668664}, 455 | publisher = {Public Library of Science}, 456 | title = {{Tracking marsupial evolution using archaic genomic retroposon insertions.}}, 457 | volume = {8}, 458 | year = {2010} 459 | } -------------------------------------------------------------------------------- /examples/example1.html: -------------------------------------------------------------------------------- 1 | <p><em>Article-level metrics (ALMs) provide a wide range of metrics about the uptake of an individual journal article by the scientific community after publication. They include citations, usage statistics, discussions in online comments and social media, social bookmarking, and recommendations. In this essay, we describe why article-level metrics are an important extension of traditional citation-based journal metrics and provide a number of example from ALM data collected for PLOS Biology.</em></p> 2 | <blockquote> 3 | <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License, authored by me and <a href="http://dx.doi.org/10.1371/journal.pbio.1001687">originally published Oct 22, 2013 in PLOS Biology</a>.</p> 4 | </blockquote> 5 | <p>The scientific impact of a particular piece of research is reflected in how this work is taken up by the scientific community. The first systematic approach that was used to assess impact, based on the technology available at the time, was to track citations and aggregate them by journal. This strategy is not only no longer necessary — since now we can easily track citations for individual articles — but also, and more importantly, journal-based metrics are now considered a poor performance measure for individual articles <span class="citation">[@Campbell2008; @Glanzel2013]</span>. One major problem with journal-based metrics is the variation in citations per article, which means that a small percentage of articles can skew, and are responsible for, the majority of the journal-based citation impact factor, as shown by Campbell <span class="citation">[-@Campbell2008]</span> for the 2004 <em>Nature</em> Journal Impact Factor. <strong>Figure 1</strong> further illustrates this point, showing the wide distribution of citation counts between <em>PLOS Biology</em> research articles published in 2010. <em>PLOS Biology</em> research articles published in 2010 have been cited a median 19 times to date in Scopus, but 10% of them have been cited 50 or more times, and two articles <span class="citation">[@Narendra:2010fw; @Dickson:2010ix]</span> more than 300 times. <em>PLOS Biology</em> metrics are used as examples throughout this essay, and the dataset is available in the supporting information (<strong>Data S1</strong>). Similar data are available for an increasing number of other publications and organizations.</p> 6 | <pre class="sourceCode r"><code class="sourceCode r"><span class="co"># code for figure 1: density plots for citation counts for PLOS Biology</span> 7 | <span class="co"># articles published in 2010</span> 8 | 9 | <span class="co"># load May 20, 2013 ALM report</span> 10 | alm <-<span class="st"> </span><span class="kw">read.csv</span>(<span class="st">"../data/alm_report_plos_biology_2013-05-20.csv"</span>, <span class="dt">stringsAsFactors =</span> <span class="ot">FALSE</span>) 11 | 12 | <span class="co"># only look at research articles</span> 13 | alm <-<span class="st"> </span><span class="kw">subset</span>(alm, alm$article_type ==<span class="st"> "Research Article"</span>) 14 | 15 | <span class="co"># only look at papers published in 2010</span> 16 | alm$publication_date <-<span class="st"> </span><span class="kw">as.Date</span>(alm$publication_date) 17 | alm <-<span class="st"> </span><span class="kw">subset</span>(alm, alm$publication_date ><span class="st"> "2010-01-01"</span> &<span class="st"> </span>alm$publication_date <= 18 | <span class="st"> "2010-12-31"</span>) 19 | 20 | <span class="co"># labels</span> 21 | colnames <-<span class="st"> </span><span class="kw">dimnames</span>(alm)[[<span class="dv">2</span>]] 22 | plos.color <-<span class="st"> "#1ebd21"</span> 23 | plos.source <-<span class="st"> "scopus"</span> 24 | 25 | plos.xlab <-<span class="st"> "Scopus Citations"</span> 26 | plos.ylab <-<span class="st"> "Probability"</span> 27 | 28 | quantile <-<span class="st"> </span><span class="kw">quantile</span>(alm[, plos.source], <span class="kw">c</span>(<span class="fl">0.1</span>, <span class="fl">0.5</span>, <span class="fl">0.9</span>), <span class="dt">na.rm =</span> <span class="ot">TRUE</span>) 29 | 30 | <span class="co"># plot the chart</span> 31 | opar <-<span class="st"> </span><span class="kw">par</span>(<span class="dt">mai =</span> <span class="kw">c</span>(<span class="fl">0.5</span>, <span class="fl">0.75</span>, <span class="fl">0.5</span>, <span class="fl">0.5</span>), <span class="dt">omi =</span> <span class="kw">c</span>(<span class="fl">0.25</span>, <span class="fl">0.1</span>, <span class="fl">0.25</span>, <span class="fl">0.1</span>), <span class="dt">mgp =</span> <span class="kw">c</span>(<span class="dv">3</span>, 32 | <span class="fl">0.5</span>, <span class="fl">0.5</span>), <span class="dt">fg =</span> <span class="st">"black"</span>, <span class="dt">cex.main =</span> <span class="dv">2</span>, <span class="dt">cex.lab =</span> <span class="fl">1.5</span>, <span class="dt">col =</span> plos.color, 33 | <span class="dt">col.main =</span> plos.color, <span class="dt">col.lab =</span> plos.color, <span class="dt">xaxs =</span> <span class="st">"i"</span>, <span class="dt">yaxs =</span> <span class="st">"i"</span>) 34 | 35 | d <-<span class="st"> </span><span class="kw">density</span>(alm[, plos.source], <span class="dt">from =</span> <span class="dv">0</span>, <span class="dt">to =</span> <span class="dv">100</span>) 36 | d$x <-<span class="st"> </span><span class="kw">append</span>(d$x, <span class="dv">0</span>) 37 | d$y <-<span class="st"> </span><span class="kw">append</span>(d$y, <span class="dv">0</span>) 38 | <span class="kw">plot</span>(d, <span class="dt">type =</span> <span class="st">"n"</span>, <span class="dt">main =</span> <span class="ot">NA</span>, <span class="dt">xlab =</span> <span class="ot">NA</span>, <span class="dt">ylab =</span> <span class="ot">NA</span>, <span class="dt">xlim =</span> <span class="kw">c</span>(<span class="dv">0</span>, <span class="dv">100</span>), <span class="dt">frame.plot =</span> <span class="ot">FALSE</span>) 39 | <span class="kw">polygon</span>(d, <span class="dt">col =</span> plos.color, <span class="dt">border =</span> <span class="ot">NA</span>) 40 | <span class="kw">mtext</span>(plos.xlab, <span class="dt">side =</span> <span class="dv">1</span>, <span class="dt">col =</span> plos.color, <span class="dt">cex =</span> <span class="fl">1.25</span>, <span class="dt">outer =</span> <span class="ot">TRUE</span>, <span class="dt">adj =</span> <span class="dv">1</span>, 41 | <span class="dt">at =</span> <span class="dv">1</span>) 42 | <span class="kw">mtext</span>(plos.ylab, <span class="dt">side =</span> <span class="dv">2</span>, <span class="dt">col =</span> plos.color, <span class="dt">cex =</span> <span class="fl">1.25</span>, <span class="dt">outer =</span> <span class="ot">TRUE</span>, <span class="dt">adj =</span> <span class="dv">0</span>, 43 | <span class="dt">at =</span> <span class="dv">1</span>, <span class="dt">las =</span> <span class="dv">1</span>) 44 | 45 | <span class="kw">par</span>(opar)</code></pre> 46 | <div class="figure"> 47 | <img src="/images/2013-12-11_figure_1.svg" alt="Figure 1. Citation counts for PLOS Biology articles published in 2010. Scopus citation counts plotted as a probability distribution for all 197 PLOS Biology research articles published in 2010. Data collected May 20, 2013. Median 19 citations; 10% of papers have at least 50 citations." /><p class="caption"><strong>Figure 1. Citation counts for PLOS Biology articles published in 2010.</strong> Scopus citation counts plotted as a probability distribution for all 197 <em>PLOS Biology</em> research articles published in 2010. Data collected May 20, 2013. Median 19 citations; 10% of papers have at least 50 citations.</p> 48 | </div> 49 | <p>Scientific impact is a multi-dimensional construct that can not be adequately measured by any single indicator <span class="citation">[@Glanzel2013; @bollenEtal2009; @Schekman2013]</span>. To this end, PLOS has collected and displayed a variety of metrics for all its articles since 2009. The array of different categorised article-level metrics (ALMs) used and provided by PLOS as of August 2013 are shown in <strong>Figure 2</strong>. In addition to citations and usage statistics, i.e., how often an article has been viewed and downloaded, PLOS also collects metrics about: how often an article has been saved in online reference managers, such as Mendeley; how often an article has been discussed in its comments section online, and also in science blogs or in social media; and how often an article has been recommended by other scientists. These additional metrics provide valuable information that we would miss if we only consider citations. Two important shortcomings of citation-based metrics are that (1) they take years to accumulate and (2) citation analysis is not always the best indicator of impact in more practical fields, such as clinical medicine <span class="citation">[@VanEck2013]</span>. Usage statistics often better reflect the impact of work in more practical fields, and they also sometimes better highlight articles of general interest (for example, the 2006 <em>PLOS Biology</em> article on the citation advantage of Open Access articles <span class="citation">[@Eysenbach2006]</span>, one of the 10 most-viewed articles published in <em>PLOS Biology</em>).</p> 50 | <div class="figure"> 51 | <img src="/images/2013-12-11_figure_2.png" alt="Figure 2. Article-level metrics used by PLOS in August 2013 and their categories. Taken from [@Lin2013] with permission by the authors." /><p class="caption"><strong>Figure 2. Article-level metrics used by PLOS in August 2013 and their categories.</strong> Taken from <span class="citation">[@Lin2013]</span> with permission by the authors.</p> 52 | </div> 53 | <p>A bubble chart showing all 2010 <em>PLOS Biology</em> articles (<strong>Figure 3</strong>) gives a good overview of the year's views and citations, plus it shows the influence that the article type (as indicated by dot color) has on an article's performance as measured by these metrics. The weekly <em>PLOS Biology</em> publication schedule is reflected in this figure, with articles published on the same day present in a vertical line. <strong>Figure 3</strong> also shows that the two most highly cited 2010 <em>PLOS Biology</em> research articles are also among the most viewed (indicated by the red arrows), but overall there isn't a strong correlation between citations and views. The most-viewed article published in 2010 in <em>PLOS Biology</em> is an essay on Darwinian selection in robots <span class="citation">[@Floreano2010]</span>. Detailed usage statistics also allow speculatulation about the different ways that readers access and make use of published literature; some articles are browsed or read online due to general interest while others that are downloaded (and perhaps also printed) may reflect the reader's intention to look at the data and results in detail and to return to the article more than once.</p> 54 | <pre class="sourceCode r"><code class="sourceCode r"><span class="co"># code for figure 3: Bubblechart views vs. citations for PLOS Biology</span> 55 | <span class="co"># articles published in 2010.</span> 56 | 57 | <span class="co"># Load required libraries</span> 58 | <span class="kw">library</span>(plyr) 59 | 60 | <span class="co"># load May 20, 2013 ALM report</span> 61 | alm <-<span class="st"> </span><span class="kw">read.csv</span>(<span class="st">"../data/alm_report_plos_biology_2013-05-20.csv"</span>, <span class="dt">stringsAsFactors =</span> <span class="ot">FALSE</span>, 62 | <span class="dt">na.strings =</span> <span class="kw">c</span>(<span class="st">"0"</span>)) 63 | 64 | <span class="co"># only look at papers published in 2010</span> 65 | alm$publication_date <-<span class="st"> </span><span class="kw">as.Date</span>(alm$publication_date) 66 | alm <-<span class="st"> </span><span class="kw">subset</span>(alm, alm$publication_date ><span class="st"> "2010-01-01"</span> &<span class="st"> </span>alm$publication_date <= 67 | <span class="st"> "2010-12-31"</span>) 68 | 69 | <span class="co"># make sure counter values are numbers</span> 70 | alm$counter_html <-<span class="st"> </span><span class="kw">as.numeric</span>(alm$counter_html) 71 | 72 | <span class="co"># lump all papers together that are not research articles</span> 73 | reassignType <-<span class="st"> </span>function(x) if (x ==<span class="st"> "Research Article"</span>) <span class="dv">1</span> else <span class="dv">0</span> 74 | alm$article_group <-<span class="st"> </span><span class="kw">aaply</span>(alm$article_type, <span class="dv">1</span>, reassignType) 75 | 76 | <span class="co"># calculate article age in months</span> 77 | alm$age_in_months <-<span class="st"> </span>(<span class="kw">Sys.Date</span>() -<span class="st"> </span>alm$publication_date)/<span class="fl">365.25</span> *<span class="st"> </span><span class="dv">12</span> 78 | start_age_in_months <-<span class="st"> </span><span class="kw">floor</span>(<span class="kw">as.numeric</span>(<span class="kw">Sys.Date</span>() -<span class="st"> </span><span class="kw">as.Date</span>(<span class="kw">strptime</span>(<span class="st">"2010-12-31"</span>, 79 | <span class="dt">format =</span> <span class="st">"%Y-%m-%d"</span>)))/<span class="fl">365.25</span> *<span class="st"> </span><span class="dv">12</span>) 80 | 81 | <span class="co"># chart variables</span> 82 | x <-<span class="st"> </span>alm$age_in_months 83 | y <-<span class="st"> </span>alm$counter 84 | z <-<span class="st"> </span>alm$scopus 85 | 86 | xlab <-<span class="st"> "Age in Months"</span> 87 | ylab <-<span class="st"> "Total Views"</span> 88 | 89 | labels <-<span class="st"> </span>alm$article_group 90 | col.main <-<span class="st"> "#1ebd21"</span> 91 | col <-<span class="st"> "#666358"</span> 92 | 93 | <span class="co"># calculate bubble diameter</span> 94 | z <-<span class="st"> </span><span class="kw">sqrt</span>(z/pi) 95 | 96 | <span class="co"># calculate bubble color</span> 97 | getColor <-<span class="st"> </span>function(x) <span class="kw">c</span>(<span class="st">"#c9c9c7"</span>, <span class="st">"#1ebd21"</span>)[x +<span class="st"> </span><span class="dv">1</span>] 98 | colors <-<span class="st"> </span><span class="kw">aaply</span>(labels, <span class="dv">1</span>, getColor) 99 | 100 | <span class="co"># plot the chart</span> 101 | opar <-<span class="st"> </span><span class="kw">par</span>(<span class="dt">mai =</span> <span class="kw">c</span>(<span class="fl">0.5</span>, <span class="fl">0.75</span>, <span class="fl">0.5</span>, <span class="fl">0.5</span>), <span class="dt">omi =</span> <span class="kw">c</span>(<span class="fl">0.25</span>, <span class="fl">0.1</span>, <span class="fl">0.25</span>, <span class="fl">0.1</span>), <span class="dt">mgp =</span> <span class="kw">c</span>(<span class="dv">3</span>, 102 | <span class="fl">0.5</span>, <span class="fl">0.5</span>), <span class="dt">fg =</span> <span class="st">"black"</span>, <span class="dt">cex =</span> <span class="dv">1</span>, <span class="dt">cex.main =</span> <span class="dv">2</span>, <span class="dt">cex.lab =</span> <span class="fl">1.5</span>, <span class="dt">col =</span> <span class="st">"white"</span>, 103 | <span class="dt">col.main =</span> col.main, <span class="dt">col.lab =</span> col) 104 | 105 | <span class="kw">plot</span>(x, y, <span class="dt">type =</span> <span class="st">"n"</span>, <span class="dt">xlim =</span> <span class="kw">c</span>(start_age_in_months, start_age_in_months +<span class="st"> </span><span class="dv">13</span>), 106 | <span class="dt">ylim =</span> <span class="kw">c</span>(<span class="dv">0</span>, <span class="dv">60000</span>), <span class="dt">xlab =</span> <span class="ot">NA</span>, <span class="dt">ylab =</span> <span class="ot">NA</span>, <span class="dt">las =</span> <span class="dv">1</span>) 107 | <span class="kw">symbols</span>(x, y, <span class="dt">circles =</span> z, <span class="dt">inches =</span> <span class="kw">exp</span>(<span class="fl">1.3</span>)/<span class="dv">15</span>, <span class="dt">bg =</span> colors, <span class="dt">xlim =</span> <span class="kw">c</span>(start_age_in_months, 108 | start_age_in_months +<span class="st"> </span><span class="dv">13</span>), <span class="dt">ylim =</span> <span class="kw">c</span>(<span class="dv">0</span>, ymax), <span class="dt">xlab =</span> <span class="ot">NA</span>, <span class="dt">ylab =</span> <span class="ot">NA</span>, <span class="dt">las =</span> <span class="dv">1</span>, 109 | <span class="dt">add =</span> <span class="ot">TRUE</span>) 110 | <span class="kw">mtext</span>(xlab, <span class="dt">side =</span> <span class="dv">1</span>, <span class="dt">col =</span> col.main, <span class="dt">cex =</span> <span class="fl">1.25</span>, <span class="dt">outer =</span> <span class="ot">TRUE</span>, <span class="dt">adj =</span> <span class="dv">1</span>, <span class="dt">at =</span> <span class="dv">1</span>) 111 | <span class="kw">mtext</span>(ylab, <span class="dt">side =</span> <span class="dv">2</span>, <span class="dt">col =</span> col.main, <span class="dt">cex =</span> <span class="fl">1.25</span>, <span class="dt">outer =</span> <span class="ot">TRUE</span>, <span class="dt">adj =</span> <span class="dv">0</span>, <span class="dt">at =</span> <span class="dv">1</span>, 112 | <span class="dt">las =</span> <span class="dv">1</span>) 113 | 114 | <span class="kw">par</span>(opar)</code></pre> 115 | <div class="figure"> 116 | <img src="/images/2013-12-11_figure_3.svg" alt="Figure 3. Views vs. citations for PLOS Biology articles published in 2010. All 304 PLOS Biology articles published in 2010. Bubble size correlates with number of Scopus citations. Research articles are labeled green; all other articles are grey. Red arrows indicate the two most highly cited papers. Data collected May 20, 2013." /><p class="caption"><strong>Figure 3. Views vs. citations for PLOS Biology articles published in 2010.</strong> All 304 <em>PLOS Biology</em> articles published in 2010. Bubble size correlates with number of Scopus citations. Research articles are labeled green; all other articles are grey. Red arrows indicate the two most highly cited papers. Data collected May 20, 2013.</p> 117 | </div> 118 | <p>When readers first see an interesting article, their response is often to view or download it. By contrast, a citation may be one of the last outcomes of their interest, occuring only about 1 in 300 times a PLOS paper is viewed online. A lot of things happen in between these potential responses, ranging from discussions in comments, social media, and blogs, to bookmarking, to linking from websites. These activities are usually subsumed under the term “altmetrics,” and their variety can be overwhelming. Therefore, it helps to group them together into categories, and several organizations, including PLOS, are using the category labels of Viewed, Cited, Saved, Discussed, and Recommended (<strong>Figures 2 and 4</strong>, see also <span class="citation">[@Lin2013]</span>).</p> 119 | <pre class="sourceCode r"><code class="sourceCode r"><span class="co"># code for figure 4: bar plot for Article-level metrics for PLOS Biology</span> 120 | 121 | <span class="co"># Load required libraries</span> 122 | <span class="kw">library</span>(reshape2) 123 | 124 | <span class="co"># load May 20, 2013 ALM report</span> 125 | alm <-<span class="st"> </span><span class="kw">read.csv</span>(<span class="st">"../data/alm_report_plos_biology_2013-05-20.csv"</span>, <span class="dt">stringsAsFactors =</span> <span class="ot">FALSE</span>, 126 | <span class="dt">na.strings =</span> <span class="kw">c</span>(<span class="dv">0</span>, <span class="st">"0"</span>)) 127 | 128 | <span class="co"># only look at research articles</span> 129 | alm <-<span class="st"> </span><span class="kw">subset</span>(alm, alm$article_type ==<span class="st"> "Research Article"</span>) 130 | 131 | <span class="co"># make sure columns are in the right format</span> 132 | alm$counter_html <-<span class="st"> </span><span class="kw">as.numeric</span>(alm$counter_html) 133 | alm$mendeley <-<span class="st"> </span><span class="kw">as.numeric</span>(alm$mendeley) 134 | 135 | <span class="co"># options</span> 136 | plos.color <-<span class="st"> "#1ebd21"</span> 137 | plos.colors <-<span class="st"> </span><span class="kw">c</span>(<span class="st">"#a17f78"</span>, <span class="st">"#ad9a27"</span>, <span class="st">"#ad9a27"</span>, <span class="st">"#ad9a27"</span>, <span class="st">"#ad9a27"</span>, <span class="st">"#ad9a27"</span>, 138 | <span class="st">"#dcebdd"</span>, <span class="st">"#dcebdd"</span>, <span class="st">"#789aa1"</span>, <span class="st">"#789aa1"</span>, <span class="st">"#789aa1"</span>, <span class="st">"#304345"</span>, <span class="st">"#304345"</span>) 139 | 140 | <span class="co"># use subset of columns</span> 141 | alm <-<span class="st"> </span><span class="kw">subset</span>(alm, <span class="dt">select =</span> <span class="kw">c</span>(<span class="st">"f1000"</span>, <span class="st">"wikipedia"</span>, <span class="st">"researchblogging"</span>, <span class="st">"comments"</span>, 142 | <span class="st">"facebook"</span>, <span class="st">"twitter"</span>, <span class="st">"citeulike"</span>, <span class="st">"mendeley"</span>, <span class="st">"pubmed"</span>, <span class="st">"crossref"</span>, <span class="st">"scopus"</span>, 143 | <span class="st">"pmc_html"</span>, <span class="st">"counter_html"</span>)) 144 | 145 | <span class="co"># calculate percentage of values that are not missing (i.e. have a count of</span> 146 | <span class="co"># at least 1)</span> 147 | colSums <-<span class="st"> </span><span class="kw">colSums</span>(!<span class="kw">is.na</span>(alm)) *<span class="st"> </span><span class="dv">100</span>/<span class="kw">length</span>(alm$counter_html) 148 | exactSums <-<span class="st"> </span><span class="kw">sum</span>(<span class="kw">as.numeric</span>(alm$pmc_html), <span class="dt">na.rm =</span> <span class="ot">TRUE</span>) 149 | 150 | <span class="co"># plot the chart</span> 151 | opar <-<span class="st"> </span><span class="kw">par</span>(<span class="dt">mar =</span> <span class="kw">c</span>(<span class="fl">0.1</span>, <span class="fl">7.25</span>, <span class="fl">0.1</span>, <span class="fl">0.1</span>) +<span class="st"> </span><span class="fl">0.1</span>, <span class="dt">omi =</span> <span class="kw">c</span>(<span class="fl">0.1</span>, <span class="fl">0.25</span>, <span class="fl">0.1</span>, <span class="fl">0.1</span>), 152 | <span class="dt">col.main =</span> plos.color) 153 | 154 | plos.names <-<span class="st"> </span><span class="kw">c</span>(<span class="st">"F1000Prime"</span>, <span class="st">"Wikipedia"</span>, <span class="st">"Research Blogging"</span>, <span class="st">"PLOS Comments"</span>, 155 | <span class="st">"Facebook"</span>, <span class="st">"Twitter"</span>, <span class="st">"CiteULike"</span>, <span class="st">"Mendeley"</span>, <span class="st">"PubMed Citations"</span>, <span class="st">"CrossRef"</span>, 156 | <span class="st">"Scopus"</span>, <span class="st">"PMC HTML Views"</span>, <span class="st">"PLOS HTML Views"</span>) 157 | y <-<span class="st"> </span><span class="kw">barplot</span>(colSums, <span class="dt">horiz =</span> <span class="ot">TRUE</span>, <span class="dt">col =</span> plos.colors, <span class="dt">border =</span> <span class="ot">NA</span>, <span class="dt">xlab =</span> plos.names, 158 | <span class="dt">xlim =</span> <span class="kw">c</span>(<span class="dv">0</span>, <span class="dv">120</span>), <span class="dt">axes =</span> <span class="ot">FALSE</span>, <span class="dt">names.arg =</span> plos.names, <span class="dt">las =</span> <span class="dv">1</span>, <span class="dt">adj =</span> <span class="dv">0</span>) 159 | <span class="kw">text</span>(colSums +<span class="st"> </span><span class="dv">6</span>, y, <span class="dt">labels =</span> <span class="kw">sprintf</span>(<span class="st">"%1.0f%%"</span>, colSums)) 160 | 161 | <span class="kw">par</span>(opar)</code></pre> 162 | <div class="figure"> 163 | <img src="/images/2013-12-11_figure_4.svg" alt="Figure 4. Article-level metrics for PLOS Biology. Proportion of all 1,706 PLOS Biology research articles published up to May 20, 2013 mentioned by particular article-level metrics source. Colors indicate categories (Viewed, Cited, Saved, Discussed, Recommended), as used on the PLOS website." /><p class="caption"><strong>Figure 4. Article-level metrics for PLOS Biology.</strong> Proportion of all 1,706 <em>PLOS Biology</em> research articles published up to May 20, 2013 mentioned by particular article-level metrics source. Colors indicate categories (Viewed, Cited, Saved, Discussed, Recommended), as used on the PLOS website.</p> 164 | </div> 165 | <p>All <em>PLOS Biology</em> articles are viewed and downloaded, and almost all of them (all research articles and nearly all front matter) will be cited sooner or later. Almost all of them will also be bookmarked in online reference managers, such as Mendeley, but the percentage of articles that are discussed online is much smaller. Some of these percentages are time dependent; the use of social media discussion platforms, such as Twitter and Facebook for example, has increased in recent years (93% of <em>PLOS Biology</em> research articles published since June 2012 have been discussed on Twitter, and 63% mentioned on Facebook). These are the locations where most of the online discussion around published articles currently seems to take place; the percentage of papers with comments on the PLOS website or that have science blog posts written about them is much smaller. Not all of this online discussion is about research articles, and perhaps, not surprisingly, the most-tweeted PLOS article overall (with more than 1,100 tweets) is a <em>PLOS Biology</em> perspective on the use of social media for scientists <span class="citation">[@Bik2013]</span>.</p> 166 | <p>Some metrics are not so much indicators of a broad online discussion, but rather focus on highlighting articles of particular interest. For example, science blogs allow a more detailed discussion of an article as compared to comments or tweets, and journals themselves sometimes choose to highlight a paper on their own blogs, allowing for a more digestible explanation of the science for the non-expert reader <span class="citation">[@Fausto2012]</span>. Coverage by other bloggers also serves the same purpose; a good example of this is one recent post on the OpenHelix Blog <span class="citation">[@Video2012]</span> that contains video footage of the second author of a 2010 <em>PLOS Biology</em> article <span class="citation">[@Dalloul2010]</span> discussing the turkey genome.</p> 167 | <p>F1000Prime, a commercial service of recommendations by expert scientists, was added to the PLOS Article-Level Metrics in August 2013. We now highlight on the PLOS website when any articles have received at least one recommendation within F1000Prime. We also monitor when an article has been cited within the widely used modern-day online encyclopedia, Wikipedia. A good example of the latter is the Tasmanian devil Wikipedia page <span class="citation">[@Tasmanian2013]</span> that links to a <em>PLOS Biology</em> research article published in 2010 <span class="citation">[@Nilsson2010]</span>. While a F1000Prime recommendation is a strong endorsement from peer(s) in the scientific community, being included in a Wikipedia page is akin to making it into a textbook about the subject area and being read by a much wider audience that goes beyond the scientific community.</p> 168 | <p><em>PLOS Biology</em> is the PLOS journal with the highest percentage of articles recommended in F1000Prime and mentioned in Wikipedia, but there is only partial overlap between the two groups of articles because they focus on different audiences (<strong>Figure 5</strong>). These recommendations and mentions in turn show correlations with other metrics, but not simple ones; you can't assume, for example, that highly cited articles are more likely to be recommended by F1000Prime, so it will be interesting to monitor these trends now that we include this information.</p> 169 | <pre class="sourceCode r"><code class="sourceCode r"><span class="co"># code for figure 5: Venn diagram F1000 vs. Wikipedia for PLOS Biology</span> 170 | <span class="co"># articles</span> 171 | 172 | <span class="co"># load required libraries</span> 173 | <span class="kw">library</span>(<span class="st">"plyr"</span>) 174 | <span class="kw">library</span>(<span class="st">"VennDiagram"</span>) 175 | 176 | <span class="co"># load May 20, 2013 ALM report</span> 177 | alm <-<span class="st"> </span><span class="kw">read.csv</span>(<span class="st">"../data/alm_report_plos_biology_2013-05-20.csv"</span>, <span class="dt">stringsAsFactors =</span> <span class="ot">FALSE</span>) 178 | 179 | <span class="co"># only look at research articles</span> 180 | alm <-<span class="st"> </span><span class="kw">subset</span>(alm, alm$article_type ==<span class="st"> "Research Article"</span>) 181 | 182 | <span class="co"># group articles based on values in Wikipedia and F1000</span> 183 | reassignWikipedia <-<span class="st"> </span>function(x) if (x ><span class="st"> </span><span class="dv">0</span>) <span class="dv">1</span> else <span class="dv">0</span> 184 | alm$wikipedia_bin <-<span class="st"> </span><span class="kw">aaply</span>(alm$wikipedia, <span class="dv">1</span>, reassignWikipedia) 185 | reassignF1000 <-<span class="st"> </span>function(x) if (x ><span class="st"> </span><span class="dv">0</span>) <span class="dv">2</span> else <span class="dv">0</span> 186 | alm$f1000_bin <-<span class="st"> </span><span class="kw">aaply</span>(alm$f1000, <span class="dv">1</span>, reassignF1000) 187 | alm$article_group =<span class="st"> </span>alm$wikipedia_bin +<span class="st"> </span>alm$f1000_bin 188 | reassignCombined <-<span class="st"> </span>function(x) if (x ==<span class="st"> </span><span class="dv">3</span>) <span class="dv">1</span> else <span class="dv">0</span> 189 | alm$combined_bin <-<span class="st"> </span><span class="kw">aaply</span>(alm$article_group, <span class="dv">1</span>, reassignCombined) 190 | reassignNo <-<span class="st"> </span>function(x) if (x ==<span class="st"> </span><span class="dv">0</span>) <span class="dv">1</span> else <span class="dv">0</span> 191 | alm$no_bin <-<span class="st"> </span><span class="kw">aaply</span>(alm$article_group, <span class="dv">1</span>, reassignNo) 192 | 193 | <span class="co"># remember to divide f1000_bin by 2, as this is the default value</span> 194 | summary <-<span class="st"> </span><span class="kw">colSums</span>(<span class="kw">subset</span>(alm, <span class="dt">select =</span> <span class="kw">c</span>(<span class="st">"wikipedia_bin"</span>, <span class="st">"f1000_bin"</span>, <span class="st">"combined_bin"</span>, 195 | <span class="st">"no_bin"</span>)), <span class="dt">na.rm =</span> <span class="ot">TRUE</span>) 196 | rows <-<span class="st"> </span><span class="kw">nrow</span>(alm) 197 | 198 | <span class="co"># options</span> 199 | plos.colors <-<span class="st"> </span><span class="kw">c</span>(<span class="st">"#c9c9c7"</span>, <span class="st">"#0000ff"</span>, <span class="st">"#ff0000"</span>) 200 | 201 | <span class="co"># plot the chart</span> 202 | opar <-<span class="st"> </span><span class="kw">par</span>(<span class="dt">mai =</span> <span class="kw">c</span>(<span class="fl">0.5</span>, <span class="fl">0.75</span>, <span class="fl">3.5</span>, <span class="fl">0.5</span>), <span class="dt">omi =</span> <span class="kw">c</span>(<span class="fl">0.5</span>, <span class="fl">0.5</span>, <span class="fl">1.5</span>, <span class="fl">0.5</span>), <span class="dt">mgp =</span> <span class="kw">c</span>(<span class="dv">3</span>, 203 | <span class="fl">0.5</span>, <span class="fl">0.5</span>), <span class="dt">fg =</span> <span class="st">"black"</span>, <span class="dt">cex.main =</span> <span class="dv">2</span>, <span class="dt">cex.lab =</span> <span class="fl">1.5</span>, <span class="dt">col =</span> plos.color, 204 | <span class="dt">col.main =</span> plos.color, <span class="dt">col.lab =</span> plos.color, <span class="dt">xaxs =</span> <span class="st">"i"</span>, <span class="dt">yaxs =</span> <span class="st">"i"</span>) 205 | 206 | venn.plot <-<span class="st"> </span><span class="kw">draw.triple.venn</span>(<span class="dt">area1 =</span> rows, <span class="dt">area2 =</span> summary[<span class="dv">1</span>], <span class="dt">area3 =</span> summary[<span class="dv">2</span>]/<span class="dv">2</span>, 207 | <span class="dt">n12 =</span> summary[<span class="dv">1</span>], <span class="dt">n23 =</span> summary[<span class="dv">3</span>], <span class="dt">n13 =</span> summary[<span class="dv">2</span>]/<span class="dv">2</span>, <span class="dt">n123 =</span> summary[<span class="dv">3</span>], 208 | <span class="dt">euler.d =</span> <span class="ot">TRUE</span>, <span class="dt">scaled =</span> <span class="ot">TRUE</span>, <span class="dt">fill =</span> plos.colors, <span class="dt">cex =</span> <span class="dv">2</span>, <span class="dt">fontfamily =</span> <span class="kw">rep</span>(<span class="st">"sans"</span>, 209 | <span class="dv">7</span>)) 210 | 211 | <span class="kw">par</span>(opar)</code></pre> 212 | <div class="figure"> 213 | <img src="/images/2013-12-11_figure_5.svg" alt="Figure 5. PLOS Biology articles: sites of recommendation and discussion. Number of PLOS Biology research articles published up to May 20, 2013 that have been recommended by F1000Prime (red) or mentioned in Wikipedia (blue)." /><p class="caption"><strong>Figure 5. PLOS Biology articles: sites of recommendation and discussion.</strong> Number of <em>PLOS Biology</em> research articles published up to May 20, 2013 that have been recommended by F1000Prime (red) or mentioned in Wikipedia (blue).</p> 214 | </div> 215 | <p>With the increasing availability of ALM data, there comes a growing need to provide tools that will allow the community to interrogate them. A good first step for researchers, research administrators, and others interested in looking at the metrics of a larger set of PLOS articles is the recently launched ALM Reports tool <span class="citation">[@ALM2013]</span>. There are also a growing number of service providers, including Altmetric.com <span class="citation">[@Altmetric2013]</span>, ImpactStory <span class="citation">[@Impactstory2013]</span>, and Plum Analytics <span class="citation">[@Plum2013]</span> that provide similar services for articles from other publishers.</p> 216 | <p>As article-level metrics become increasingly used by publishers, funders, universities, and researchers, one of the major challenges to overcome is ensuring that standards and best practices are widely adopted and understood. The National Information Standards Organization (NISO) was recently awarded a grant by the Alfred P. Sloan Foundation to work on this <span class="citation">[@NISO2013]</span>, and PLOS is actively involved in this project. We look forward to further developing our article-level metrics and to having them adopted by other publishers, which hopefully will pave the way to their wide incorporation into research and researcher assessments.</p> 217 | <h3 id="supporting-information">Supporting Information</h3> 218 | <p><strong><a href="http://dx.doi.org/10.1371/journal.pbio.1001687.s001">Data S1</a>. Dataset of ALM for PLOS Biology articles used in the text, and R scripts that were used to produce figures.</strong> The data were collected on May 20, 2013 and include all <em>PLOS Biology</em> articles published up to that day. Data for F1000Prime were collected on August 15, 2013. All charts were produced with R version 3.0.0.</p> 219 | -------------------------------------------------------------------------------- /examples/example1.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: post 3 | title: "What Can Article Level Metrics Do for You?" 4 | date: 2013-10-22 5 | tags: [molecular biology, cancer] 6 | bibliography: examples/example.bib 7 | csl: jats.csl 8 | article: 9 | type: research-article 10 | publisher-id: PBIOLOGY-D-13-03338 11 | doi: 10.1371/journal.pbio.1001687 12 | elocation-id: e1001687 13 | heading: Essay 14 | conflict: Martin Fenner is the technical lead for the PLOS Article-Level Metrics project. 15 | funding-statement: The author received no specific funding for this work. 16 | journal: 17 | publisher-id: plos 18 | publisher-name: Public Library of Science 19 | publisher-loc: San Francisco, USA 20 | nlm-ta: PLoS Biol 21 | pmc: plosbiol 22 | title: PLoS Biology 23 | eissn: 1545-7885 24 | pissn: 1544-9173 25 | author: 26 | - surname: Fenner 27 | given-names: Martin 28 | orcid: http://orcid.org/0000-0003-1419-2405 29 | email: mfenner@plos.org 30 | affiliation: Article-Level Metrics Project, Public Library of Science, San Francisco, California, United States of America 31 | corresp: yes 32 | copyright: 33 | holder: Martin Fenner 34 | year: 2013 35 | text: "This is an open-access article distributed under the terms of the Creative Commons Attribution License, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited." 36 | type: open-access 37 | link: http://creativecommons.org/licenses/by/3.0/ 38 | --- 39 | The scientific impact of a particular piece of research is reflected in 40 | how this work is taken up by the scientific community. The first 41 | systematic approach that was used to assess impact, based on the 42 | technology available at the time, was to track citations and aggregate 43 | them by journal. This strategy is not only no longer necessary — since now 44 | we can easily track citations for individual articles — but also, and more 45 | importantly, journal-based metrics are now considered a poor performance 46 | measure for individual articles [@Campbell2008; @Glanzel2013]. One major 47 | problem with journal-based metrics is the variation in citations per 48 | article, which means that a small percentage of articles can skew, and 49 | are responsible for, the majority of the journal-based citation impact 50 | factor, as shown by Campbell [-@Campbell2008] for the 2004 51 | *Nature* Journal Impact Factor. **Figure 1** further 52 | illustrates this point, showing the wide distribution of citation counts 53 | between *PLOS Biology* research articles published in 2010. *PLOS 54 | Biology* research articles published in 2010 have been cited a median 19 55 | times to date in Scopus, but 10% of them have been cited 50 or more 56 | times, and two articles [@Narendra:2010fw; @Dickson:2010ix] more than 57 | 300 times. *PLOS Biology* metrics are used as examples throughout this 58 | essay, and the dataset is available in the supporting information (**Data 59 | S1**). Similar data are available for an increasing 60 | number of other publications and organizations. 61 | 62 | ```r 63 | # code for figure 1: density plots for citation counts for PLOS Biology 64 | # articles published in 2010 65 | 66 | # load May 20, 2013 ALM report 67 | alm <- read.csv("../data/alm_report_plos_biology_2013-05-20.csv", stringsAsFactors = FALSE) 68 | 69 | # only look at research articles 70 | alm <- subset(alm, alm$article_type == "Research Article") 71 | 72 | # only look at papers published in 2010 73 | alm$publication_date <- as.Date(alm$publication_date) 74 | alm <- subset(alm, alm$publication_date > "2010-01-01" & alm$publication_date <= 75 | "2010-12-31") 76 | 77 | # labels 78 | colnames <- dimnames(alm)[[2]] 79 | plos.color <- "#1ebd21" 80 | plos.source <- "scopus" 81 | 82 | plos.xlab <- "Scopus Citations" 83 | plos.ylab <- "Probability" 84 | 85 | quantile <- quantile(alm[, plos.source], c(0.1, 0.5, 0.9), na.rm = TRUE) 86 | 87 | # plot the chart 88 | opar <- par(mai = c(0.5, 0.75, 0.5, 0.5), omi = c(0.25, 0.1, 0.25, 0.1), mgp = c(3, 89 | 0.5, 0.5), fg = "black", cex.main = 2, cex.lab = 1.5, col = plos.color, 90 | col.main = plos.color, col.lab = plos.color, xaxs = "i", yaxs = "i") 91 | 92 | d <- density(alm[, plos.source], from = 0, to = 100) 93 | d$x <- append(d$x, 0) 94 | d$y <- append(d$y, 0) 95 | plot(d, type = "n", main = NA, xlab = NA, ylab = NA, xlim = c(0, 100), frame.plot = FALSE) 96 | polygon(d, col = plos.color, border = NA) 97 | mtext(plos.xlab, side = 1, col = plos.color, cex = 1.25, outer = TRUE, adj = 1, 98 | at = 1) 99 | mtext(plos.ylab, side = 2, col = plos.color, cex = 1.25, outer = TRUE, adj = 0, 100 | at = 1, las = 1) 101 | 102 | par(opar) 103 | ``` 104 | 105 | ![**Figure 1. Citation counts for PLOS Biology articles published in 2010.** Scopus citation counts plotted as a probability distribution for all 197 *PLOS Biology* research articles published in 2010. Data collected May 20, 2013. Median 19 citations; 10% of papers have at least 50 citations.](/images/2013-12-11_figure_1.svg) 106 | 107 | 108 | Scientific impact is a multi-dimensional construct that can not be 109 | adequately measured by any single indicator 110 | [@Glanzel2013; @bollenEtal2009; @Schekman2013]. 111 | To this end, PLOS has collected and displayed a variety of metrics for 112 | all its articles since 2009. The array of different categorised 113 | article-level metrics (ALMs) used and provided by PLOS as of August 2013 114 | are shown in **Figure 2**. In addition to citations 115 | and usage statistics, i.e., how often an article has been viewed and 116 | downloaded, PLOS also collects metrics about: how often an article has 117 | been saved in online reference managers, such as Mendeley; how often an 118 | article has been discussed in its comments section online, and also in 119 | science blogs or in social media; and how often an article has been 120 | recommended by other scientists. These additional metrics provide 121 | valuable information that we would miss if we only consider citations. 122 | Two important shortcomings of citation-based metrics are that (1) they 123 | take years to accumulate and (2) citation analysis is not always the 124 | best indicator of impact in more practical fields, such as clinical 125 | medicine [@VanEck2013]. Usage statistics often better 126 | reflect the impact of work in more practical fields, and they also 127 | sometimes better highlight articles of general interest (for example, 128 | the 2006 *PLOS Biology* article on the citation advantage of Open Access 129 | articles [@Eysenbach2006], one of the 10 most-viewed 130 | articles published in *PLOS Biology*). 131 | 132 | ![**Figure 2. Article-level metrics used by PLOS in August 2013 and their 133 | categories.** Taken from [@Lin2013] with permission by the authors.](/images/2013-12-11_figure_2.png) 134 | 135 | A bubble chart showing all 2010 *PLOS Biology* articles (**Figure 136 | 3**) gives a good overview of the year's views and 137 | citations, plus it shows the influence that the article type (as 138 | indicated by dot color) has on an article's performance as measured by 139 | these metrics. The weekly *PLOS Biology* publication schedule is 140 | reflected in this figure, with articles published on the same day 141 | present in a vertical line. **Figure 3** also shows 142 | that the two most highly cited 2010 *PLOS Biology* research articles are 143 | also among the most viewed (indicated by the red arrows), but overall 144 | there isn't a strong correlation between citations and views. The 145 | most-viewed article published in 2010 in *PLOS Biology* is an essay on 146 | Darwinian selection in robots [@Floreano2010]. Detailed 147 | usage statistics also allow speculatulation about the different ways 148 | that readers access and make use of published literature; some articles 149 | are browsed or read online due to general interest while others that are 150 | downloaded (and perhaps also printed) may reflect the reader's intention 151 | to look at the data and results in detail and to return to the article 152 | more than once. 153 | 154 | 155 | ```r 156 | # code for figure 3: Bubblechart views vs. citations for PLOS Biology 157 | # articles published in 2010. 158 | 159 | # Load required libraries 160 | library(plyr) 161 | 162 | # load May 20, 2013 ALM report 163 | alm <- read.csv("../data/alm_report_plos_biology_2013-05-20.csv", stringsAsFactors = FALSE, 164 | na.strings = c("0")) 165 | 166 | # only look at papers published in 2010 167 | alm$publication_date <- as.Date(alm$publication_date) 168 | alm <- subset(alm, alm$publication_date > "2010-01-01" & alm$publication_date <= 169 | "2010-12-31") 170 | 171 | # make sure counter values are numbers 172 | alm$counter_html <- as.numeric(alm$counter_html) 173 | 174 | # lump all papers together that are not research articles 175 | reassignType <- function(x) if (x == "Research Article") 1 else 0 176 | alm$article_group <- aaply(alm$article_type, 1, reassignType) 177 | 178 | # calculate article age in months 179 | alm$age_in_months <- (Sys.Date() - alm$publication_date)/365.25 * 12 180 | start_age_in_months <- floor(as.numeric(Sys.Date() - as.Date(strptime("2010-12-31", 181 | format = "%Y-%m-%d")))/365.25 * 12) 182 | 183 | # chart variables 184 | x <- alm$age_in_months 185 | y <- alm$counter 186 | z <- alm$scopus 187 | 188 | xlab <- "Age in Months" 189 | ylab <- "Total Views" 190 | 191 | labels <- alm$article_group 192 | col.main <- "#1ebd21" 193 | col <- "#666358" 194 | 195 | # calculate bubble diameter 196 | z <- sqrt(z/pi) 197 | 198 | # calculate bubble color 199 | getColor <- function(x) c("#c9c9c7", "#1ebd21")[x + 1] 200 | colors <- aaply(labels, 1, getColor) 201 | 202 | # plot the chart 203 | opar <- par(mai = c(0.5, 0.75, 0.5, 0.5), omi = c(0.25, 0.1, 0.25, 0.1), mgp = c(3, 204 | 0.5, 0.5), fg = "black", cex = 1, cex.main = 2, cex.lab = 1.5, col = "white", 205 | col.main = col.main, col.lab = col) 206 | 207 | plot(x, y, type = "n", xlim = c(start_age_in_months, start_age_in_months + 13), 208 | ylim = c(0, 60000), xlab = NA, ylab = NA, las = 1) 209 | symbols(x, y, circles = z, inches = exp(1.3)/15, bg = colors, xlim = c(start_age_in_months, 210 | start_age_in_months + 13), ylim = c(0, ymax), xlab = NA, ylab = NA, las = 1, 211 | add = TRUE) 212 | mtext(xlab, side = 1, col = col.main, cex = 1.25, outer = TRUE, adj = 1, at = 1) 213 | mtext(ylab, side = 2, col = col.main, cex = 1.25, outer = TRUE, adj = 0, at = 1, 214 | las = 1) 215 | 216 | par(opar) 217 | ``` 218 | 219 | ![**Figure 3. Views vs. citations for PLOS Biology articles published in 2010.** All 304 *PLOS Biology* articles published in 2010. Bubble size correlates with number of Scopus citations. Research articles are labeled green; all other articles are grey. Red arrows indicate the two most highly cited papers. Data collected May 20, 2013.](/images/2013-12-11_figure_3.svg) 220 | 221 | 222 | When readers first see an interesting article, their response is often 223 | to view or download it. By contrast, a citation may be one of the last 224 | outcomes of their interest, occuring only about 1 in 300 times a PLOS 225 | paper is viewed online. A lot of things happen in between these 226 | potential responses, ranging from discussions in comments, social media, 227 | and blogs, to bookmarking, to linking from websites. These activities 228 | are usually subsumed under the term “altmetrics,” and their variety can 229 | be overwhelming. Therefore, it helps to group them together into 230 | categories, and several organizations, including PLOS, are using the 231 | category labels of Viewed, Cited, Saved, Discussed, and Recommended 232 | (**Figures 2 and 4**, see also [@Lin2013]). 233 | 234 | 235 | ```r 236 | # code for figure 4: bar plot for Article-level metrics for PLOS Biology 237 | 238 | # Load required libraries 239 | library(reshape2) 240 | 241 | # load May 20, 2013 ALM report 242 | alm <- read.csv("../data/alm_report_plos_biology_2013-05-20.csv", stringsAsFactors = FALSE, 243 | na.strings = c(0, "0")) 244 | 245 | # only look at research articles 246 | alm <- subset(alm, alm$article_type == "Research Article") 247 | 248 | # make sure columns are in the right format 249 | alm$counter_html <- as.numeric(alm$counter_html) 250 | alm$mendeley <- as.numeric(alm$mendeley) 251 | 252 | # options 253 | plos.color <- "#1ebd21" 254 | plos.colors <- c("#a17f78", "#ad9a27", "#ad9a27", "#ad9a27", "#ad9a27", "#ad9a27", 255 | "#dcebdd", "#dcebdd", "#789aa1", "#789aa1", "#789aa1", "#304345", "#304345") 256 | 257 | # use subset of columns 258 | alm <- subset(alm, select = c("f1000", "wikipedia", "researchblogging", "comments", 259 | "facebook", "twitter", "citeulike", "mendeley", "pubmed", "crossref", "scopus", 260 | "pmc_html", "counter_html")) 261 | 262 | # calculate percentage of values that are not missing (i.e. have a count of 263 | # at least 1) 264 | colSums <- colSums(!is.na(alm)) * 100/length(alm$counter_html) 265 | exactSums <- sum(as.numeric(alm$pmc_html), na.rm = TRUE) 266 | 267 | # plot the chart 268 | opar <- par(mar = c(0.1, 7.25, 0.1, 0.1) + 0.1, omi = c(0.1, 0.25, 0.1, 0.1), 269 | col.main = plos.color) 270 | 271 | plos.names <- c("F1000Prime", "Wikipedia", "Research Blogging", "PLOS Comments", 272 | "Facebook", "Twitter", "CiteULike", "Mendeley", "PubMed Citations", "CrossRef", 273 | "Scopus", "PMC HTML Views", "PLOS HTML Views") 274 | y <- barplot(colSums, horiz = TRUE, col = plos.colors, border = NA, xlab = plos.names, 275 | xlim = c(0, 120), axes = FALSE, names.arg = plos.names, las = 1, adj = 0) 276 | text(colSums + 6, y, labels = sprintf("%1.0f%%", colSums)) 277 | 278 | par(opar) 279 | ``` 280 | 281 | ![**Figure 4. Article-level metrics for PLOS Biology.** Proportion of all 1,706 *PLOS Biology* research articles published up to May 20, 2013 mentioned by particular article-level metrics source. Colors indicate categories (Viewed, Cited, Saved, Discussed, Recommended), as used on the PLOS website.](/images/2013-12-11_figure_4.svg) 282 | 283 | 284 | All *PLOS Biology* articles are viewed and downloaded, and almost all of 285 | them (all research articles and nearly all front matter) will be cited 286 | sooner or later. Almost all of them will also be bookmarked in online 287 | reference managers, such as Mendeley, but the percentage of articles 288 | that are discussed online is much smaller. Some of these percentages are 289 | time dependent; the use of social media discussion platforms, such as 290 | Twitter and Facebook for example, has increased in recent years (93% of 291 | *PLOS Biology* research articles published since June 2012 have been 292 | discussed on Twitter, and 63% mentioned on Facebook). These are the 293 | locations where most of the online discussion around published articles 294 | currently seems to take place; the percentage of papers with comments on 295 | the PLOS website or that have science blog posts written about them is 296 | much smaller. Not all of this online discussion is about research 297 | articles, and perhaps, not surprisingly, the most-tweeted PLOS article 298 | overall (with more than 1,100 tweets) is a *PLOS Biology* perspective on 299 | the use of social media for scientists [@Bik2013]. 300 | 301 | Some metrics are not so much indicators of a broad online discussion, 302 | but rather focus on highlighting articles of particular interest. For 303 | example, science blogs allow a more detailed discussion of an article as 304 | compared to comments or tweets, and journals themselves sometimes choose 305 | to highlight a paper on their own blogs, allowing for a more digestible 306 | explanation of the science for the non-expert reader 307 | [@Fausto2012]. Coverage by other bloggers also serves 308 | the same purpose; a good example of this is one recent post on the 309 | OpenHelix Blog [@Video2012] that contains video footage 310 | of the second author of a 2010 *PLOS Biology* article 311 | [@Dalloul2010] discussing the turkey genome. 312 | 313 | F1000Prime, a commercial service of recommendations by expert 314 | scientists, was added to the PLOS Article-Level Metrics in August 2013. 315 | We now highlight on the PLOS website when any articles have received at 316 | least one recommendation within F1000Prime. We also monitor when an 317 | article has been cited within the widely used modern-day online 318 | encyclopedia, Wikipedia. A good example of the latter is the Tasmanian 319 | devil Wikipedia page [@Tasmanian2013] that links to a 320 | *PLOS Biology* research article published in 2010 321 | [@Nilsson2010]. While a F1000Prime recommendation is a 322 | strong endorsement from peer(s) in the scientific community, being 323 | included in a Wikipedia page is akin to making it into a textbook about 324 | the subject area and being read by a much wider audience that goes 325 | beyond the scientific community. 326 | 327 | *PLOS Biology* is the PLOS journal with the highest percentage of 328 | articles recommended in F1000Prime and mentioned in Wikipedia, but there 329 | is only partial overlap between the two groups of articles because they 330 | focus on different audiences (**Figure 5**). These 331 | recommendations and mentions in turn show correlations with other 332 | metrics, but not simple ones; you can't assume, for example, that highly 333 | cited articles are more likely to be recommended by F1000Prime, so it 334 | will be interesting to monitor these trends now that we include this 335 | information. 336 | 337 | 338 | ```r 339 | # code for figure 5: Venn diagram F1000 vs. Wikipedia for PLOS Biology 340 | # articles 341 | 342 | # load required libraries 343 | library("plyr") 344 | library("VennDiagram") 345 | 346 | # load May 20, 2013 ALM report 347 | alm <- read.csv("../data/alm_report_plos_biology_2013-05-20.csv", stringsAsFactors = FALSE) 348 | 349 | # only look at research articles 350 | alm <- subset(alm, alm$article_type == "Research Article") 351 | 352 | # group articles based on values in Wikipedia and F1000 353 | reassignWikipedia <- function(x) if (x > 0) 1 else 0 354 | alm$wikipedia_bin <- aaply(alm$wikipedia, 1, reassignWikipedia) 355 | reassignF1000 <- function(x) if (x > 0) 2 else 0 356 | alm$f1000_bin <- aaply(alm$f1000, 1, reassignF1000) 357 | alm$article_group = alm$wikipedia_bin + alm$f1000_bin 358 | reassignCombined <- function(x) if (x == 3) 1 else 0 359 | alm$combined_bin <- aaply(alm$article_group, 1, reassignCombined) 360 | reassignNo <- function(x) if (x == 0) 1 else 0 361 | alm$no_bin <- aaply(alm$article_group, 1, reassignNo) 362 | 363 | # remember to divide f1000_bin by 2, as this is the default value 364 | summary <- colSums(subset(alm, select = c("wikipedia_bin", "f1000_bin", "combined_bin", 365 | "no_bin")), na.rm = TRUE) 366 | rows <- nrow(alm) 367 | 368 | # options 369 | plos.colors <- c("#c9c9c7", "#0000ff", "#ff0000") 370 | 371 | # plot the chart 372 | opar <- par(mai = c(0.5, 0.75, 3.5, 0.5), omi = c(0.5, 0.5, 1.5, 0.5), mgp = c(3, 373 | 0.5, 0.5), fg = "black", cex.main = 2, cex.lab = 1.5, col = plos.color, 374 | col.main = plos.color, col.lab = plos.color, xaxs = "i", yaxs = "i") 375 | 376 | venn.plot <- draw.triple.venn(area1 = rows, area2 = summary[1], area3 = summary[2]/2, 377 | n12 = summary[1], n23 = summary[3], n13 = summary[2]/2, n123 = summary[3], 378 | euler.d = TRUE, scaled = TRUE, fill = plos.colors, cex = 2, fontfamily = rep("sans", 379 | 7)) 380 | 381 | par(opar) 382 | ``` 383 | 384 | ![**Figure 5. PLOS Biology articles: sites of recommendation and discussion.** Number of *PLOS Biology* research articles published up to May 20, 2013 that have been recommended by F1000Prime (red) or mentioned in Wikipedia (blue).](/images/2013-12-11_figure_5.svg) 385 | 386 | 387 | With the increasing availability of ALM data, there comes a growing need 388 | to provide tools that will allow the community to interrogate them. A 389 | good first step for researchers, research administrators, and others 390 | interested in looking at the metrics of a larger set of PLOS articles is 391 | the recently launched ALM Reports tool [@ALM2013]. There 392 | are also a growing number of service providers, including Altmetric.com 393 | [@Altmetric2013], ImpactStory [@Impactstory2013], and Plum Analytics 394 | [@Plum2013] that provide similar services for articles 395 | from other publishers. 396 | 397 | As article-level metrics become increasingly used by publishers, 398 | funders, universities, and researchers, one of the major challenges to 399 | overcome is ensuring that standards and best practices are widely 400 | adopted and understood. The National Information Standards Organization 401 | (NISO) was recently awarded a grant by the Alfred P. Sloan Foundation to 402 | work on this [@NISO2013], and PLOS is actively involved 403 | in this project. We look forward to further developing our article-level 404 | metrics and to having them adopted by other publishers, which hopefully 405 | will pave the way to their wide incorporation into research and 406 | researcher assessments. 407 | 408 | ### Supporting Information 409 | 410 | **[Data S1](http://dx.doi.org/10.1371/journal.pbio.1001687.s001). 411 | Dataset of ALM for PLOS Biology articles used in the text, and 412 | R scripts that were used to produce figures.** The data were collected 413 | on May 20, 2013 and include all *PLOS Biology* articles published up to 414 | that day. Data for F1000Prime were collected on August 15, 2013. All 415 | charts were produced with R version 3.0.0. 416 | -------------------------------------------------------------------------------- /examples/example1.tex: -------------------------------------------------------------------------------- 1 | \emph{Article-level metrics (ALMs) provide a wide range of metrics about 2 | the uptake of an individual journal article by the scientific community 3 | after publication. They include citations, usage statistics, discussions 4 | in online comments and social media, social bookmarking, and 5 | recommendations. In this essay, we describe why article-level metrics 6 | are an important extension of traditional citation-based journal metrics 7 | and provide a number of example from ALM data collected for PLOS 8 | Biology.} 9 | 10 | \begin{quote} 11 | This is an open-access article distributed under the terms of the 12 | Creative Commons Attribution License, authored by me and 13 | \href{http://dx.doi.org/10.1371/journal.pbio.1001687}{originally 14 | published Oct 22, 2013 in PLOS Biology}. 15 | \end{quote} 16 | 17 | The scientific impact of a particular piece of research is reflected in 18 | how this work is taken up by the scientific community. The first 19 | systematic approach that was used to assess impact, based on the 20 | technology available at the time, was to track citations and aggregate 21 | them by journal. This strategy is not only no longer necessary --- since 22 | now we can easily track citations for individual articles --- but also, 23 | and more importantly, journal-based metrics are now considered a poor 24 | performance measure for individual articles {[}@Campbell2008; 25 | @Glanzel2013{]}. One major problem with journal-based metrics is the 26 | variation in citations per article, which means that a small percentage 27 | of articles can skew, and are responsible for, the majority of the 28 | journal-based citation impact factor, as shown by Campbell 29 | {[}-@Campbell2008{]} for the 2004 \emph{Nature} Journal Impact Factor. 30 | \textbf{Figure 1} further illustrates this point, showing the wide 31 | distribution of citation counts between \emph{PLOS Biology} research 32 | articles published in 2010. \emph{PLOS Biology} research articles 33 | published in 2010 have been cited a median 19 times to date in Scopus, 34 | but 10\% of them have been cited 50 or more times, and two articles 35 | {[}@Narendra:2010fw; @Dickson:2010ix{]} more than 300 times. \emph{PLOS 36 | Biology} metrics are used as examples throughout this essay, and the 37 | dataset is available in the supporting information (\textbf{Data S1}). 38 | Similar data are available for an increasing number of other 39 | publications and organizations. 40 | 41 | \begin{Shaded} 42 | \begin{Highlighting}[] 43 | \CommentTok{# code for figure 1: density plots for citation counts for PLOS Biology} 44 | \CommentTok{# articles published in 2010} 45 | 46 | \CommentTok{# load May 20, 2013 ALM report} 47 | \NormalTok{alm <-}\StringTok{ }\KeywordTok{read.csv}\NormalTok{(}\StringTok{"../data/alm_report_plos_biology_2013-05-20.csv"}\NormalTok{, }\DataTypeTok{stringsAsFactors =} \OtherTok{FALSE}\NormalTok{)} 48 | 49 | \CommentTok{# only look at research articles} 50 | \NormalTok{alm <-}\StringTok{ }\KeywordTok{subset}\NormalTok{(alm, alm$article_type ==}\StringTok{ "Research Article"}\NormalTok{)} 51 | 52 | \CommentTok{# only look at papers published in 2010} 53 | \NormalTok{alm$publication_date <-}\StringTok{ }\KeywordTok{as.Date}\NormalTok{(alm$publication_date)} 54 | \NormalTok{alm <-}\StringTok{ }\KeywordTok{subset}\NormalTok{(alm, alm$publication_date >}\StringTok{ "2010-01-01"} \NormalTok{&}\StringTok{ }\NormalTok{alm$publication_date <=} 55 | \StringTok{ "2010-12-31"}\NormalTok{)} 56 | 57 | \CommentTok{# labels} 58 | \NormalTok{colnames <-}\StringTok{ }\KeywordTok{dimnames}\NormalTok{(alm)[[}\DecValTok{2}\NormalTok{]]} 59 | \NormalTok{plos.color <-}\StringTok{ "#1ebd21"} 60 | \NormalTok{plos.source <-}\StringTok{ "scopus"} 61 | 62 | \NormalTok{plos.xlab <-}\StringTok{ "Scopus Citations"} 63 | \NormalTok{plos.ylab <-}\StringTok{ "Probability"} 64 | 65 | \NormalTok{quantile <-}\StringTok{ }\KeywordTok{quantile}\NormalTok{(alm[, plos.source], }\KeywordTok{c}\NormalTok{(}\FloatTok{0.1}\NormalTok{, }\FloatTok{0.5}\NormalTok{, }\FloatTok{0.9}\NormalTok{), }\DataTypeTok{na.rm =} \OtherTok{TRUE}\NormalTok{)} 66 | 67 | \CommentTok{# plot the chart} 68 | \NormalTok{opar <-}\StringTok{ }\KeywordTok{par}\NormalTok{(}\DataTypeTok{mai =} \KeywordTok{c}\NormalTok{(}\FloatTok{0.5}\NormalTok{, }\FloatTok{0.75}\NormalTok{, }\FloatTok{0.5}\NormalTok{, }\FloatTok{0.5}\NormalTok{), }\DataTypeTok{omi =} \KeywordTok{c}\NormalTok{(}\FloatTok{0.25}\NormalTok{, }\FloatTok{0.1}\NormalTok{, }\FloatTok{0.25}\NormalTok{, }\FloatTok{0.1}\NormalTok{), }\DataTypeTok{mgp =} \KeywordTok{c}\NormalTok{(}\DecValTok{3}\NormalTok{,} 69 | \FloatTok{0.5}\NormalTok{, }\FloatTok{0.5}\NormalTok{), }\DataTypeTok{fg =} \StringTok{"black"}\NormalTok{, }\DataTypeTok{cex.main =} \DecValTok{2}\NormalTok{, }\DataTypeTok{cex.lab =} \FloatTok{1.5}\NormalTok{, }\DataTypeTok{col =} \NormalTok{plos.color,} 70 | \DataTypeTok{col.main =} \NormalTok{plos.color, }\DataTypeTok{col.lab =} \NormalTok{plos.color, }\DataTypeTok{xaxs =} \StringTok{"i"}\NormalTok{, }\DataTypeTok{yaxs =} \StringTok{"i"}\NormalTok{)} 71 | 72 | \NormalTok{d <-}\StringTok{ }\KeywordTok{density}\NormalTok{(alm[, plos.source], }\DataTypeTok{from =} \DecValTok{0}\NormalTok{, }\DataTypeTok{to =} \DecValTok{100}\NormalTok{)} 73 | \NormalTok{d$x <-}\StringTok{ }\KeywordTok{append}\NormalTok{(d$x, }\DecValTok{0}\NormalTok{)} 74 | \NormalTok{d$y <-}\StringTok{ }\KeywordTok{append}\NormalTok{(d$y, }\DecValTok{0}\NormalTok{)} 75 | \KeywordTok{plot}\NormalTok{(d, }\DataTypeTok{type =} \StringTok{"n"}\NormalTok{, }\DataTypeTok{main =} \OtherTok{NA}\NormalTok{, }\DataTypeTok{xlab =} \OtherTok{NA}\NormalTok{, }\DataTypeTok{ylab =} \OtherTok{NA}\NormalTok{, }\DataTypeTok{xlim =} \KeywordTok{c}\NormalTok{(}\DecValTok{0}\NormalTok{, }\DecValTok{100}\NormalTok{), }\DataTypeTok{frame.plot =} \OtherTok{FALSE}\NormalTok{)} 76 | \KeywordTok{polygon}\NormalTok{(d, }\DataTypeTok{col =} \NormalTok{plos.color, }\DataTypeTok{border =} \OtherTok{NA}\NormalTok{)} 77 | \KeywordTok{mtext}\NormalTok{(plos.xlab, }\DataTypeTok{side =} \DecValTok{1}\NormalTok{, }\DataTypeTok{col =} \NormalTok{plos.color, }\DataTypeTok{cex =} \FloatTok{1.25}\NormalTok{, }\DataTypeTok{outer =} \OtherTok{TRUE}\NormalTok{, }\DataTypeTok{adj =} \DecValTok{1}\NormalTok{,} 78 | \DataTypeTok{at =} \DecValTok{1}\NormalTok{)} 79 | \KeywordTok{mtext}\NormalTok{(plos.ylab, }\DataTypeTok{side =} \DecValTok{2}\NormalTok{, }\DataTypeTok{col =} \NormalTok{plos.color, }\DataTypeTok{cex =} \FloatTok{1.25}\NormalTok{, }\DataTypeTok{outer =} \OtherTok{TRUE}\NormalTok{, }\DataTypeTok{adj =} \DecValTok{0}\NormalTok{,} 80 | \DataTypeTok{at =} \DecValTok{1}\NormalTok{, }\DataTypeTok{las =} \DecValTok{1}\NormalTok{)} 81 | 82 | \KeywordTok{par}\NormalTok{(opar)} 83 | \end{Highlighting} 84 | \end{Shaded} 85 | 86 | \begin{figure}[htbp] 87 | \centering 88 | \includegraphics{/images/2013-12-11_figure_1.svg} 89 | \caption{\textbf{Figure 1. Citation counts for PLOS Biology articles 90 | published in 2010.} Scopus citation counts plotted as a probability 91 | distribution for all 197 \emph{PLOS Biology} research articles published 92 | in 2010. Data collected May 20, 2013. Median 19 citations; 10\% of 93 | papers have at least 50 citations.} 94 | \end{figure} 95 | 96 | Scientific impact is a multi-dimensional construct that can not be 97 | adequately measured by any single indicator {[}@Glanzel2013; 98 | @bollenEtal2009; @Schekman2013{]}. To this end, PLOS has collected and 99 | displayed a variety of metrics for all its articles since 2009. The 100 | array of different categorised article-level metrics (ALMs) used and 101 | provided by PLOS as of August 2013 are shown in \textbf{Figure 2}. In 102 | addition to citations and usage statistics, i.e., how often an article 103 | has been viewed and downloaded, PLOS also collects metrics about: how 104 | often an article has been saved in online reference managers, such as 105 | Mendeley; how often an article has been discussed in its comments 106 | section online, and also in science blogs or in social media; and how 107 | often an article has been recommended by other scientists. These 108 | additional metrics provide valuable information that we would miss if we 109 | only consider citations. Two important shortcomings of citation-based 110 | metrics are that (1) they take years to accumulate and (2) citation 111 | analysis is not always the best indicator of impact in more practical 112 | fields, such as clinical medicine {[}@VanEck2013{]}. Usage statistics 113 | often better reflect the impact of work in more practical fields, and 114 | they also sometimes better highlight articles of general interest (for 115 | example, the 2006 \emph{PLOS Biology} article on the citation advantage 116 | of Open Access articles {[}@Eysenbach2006{]}, one of the 10 most-viewed 117 | articles published in \emph{PLOS Biology}). 118 | 119 | \begin{figure}[htbp] 120 | \centering 121 | \includegraphics{/images/2013-12-11_figure_2.png} 122 | \caption{\textbf{Figure 2. Article-level metrics used by PLOS in August 123 | 2013 and their categories.} Taken from {[}@Lin2013{]} with permission by 124 | the authors.} 125 | \end{figure} 126 | 127 | A bubble chart showing all 2010 \emph{PLOS Biology} articles 128 | (\textbf{Figure 3}) gives a good overview of the year's views and 129 | citations, plus it shows the influence that the article type (as 130 | indicated by dot color) has on an article's performance as measured by 131 | these metrics. The weekly \emph{PLOS Biology} publication schedule is 132 | reflected in this figure, with articles published on the same day 133 | present in a vertical line. \textbf{Figure 3} also shows that the two 134 | most highly cited 2010 \emph{PLOS Biology} research articles are also 135 | among the most viewed (indicated by the red arrows), but overall there 136 | isn't a strong correlation between citations and views. The most-viewed 137 | article published in 2010 in \emph{PLOS Biology} is an essay on 138 | Darwinian selection in robots {[}@Floreano2010{]}. Detailed usage 139 | statistics also allow speculatulation about the different ways that 140 | readers access and make use of published literature; some articles are 141 | browsed or read online due to general interest while others that are 142 | downloaded (and perhaps also printed) may reflect the reader's intention 143 | to look at the data and results in detail and to return to the article 144 | more than once. 145 | 146 | \begin{Shaded} 147 | \begin{Highlighting}[] 148 | \CommentTok{# code for figure 3: Bubblechart views vs. citations for PLOS Biology} 149 | \CommentTok{# articles published in 2010.} 150 | 151 | \CommentTok{# Load required libraries} 152 | \KeywordTok{library}\NormalTok{(plyr)} 153 | 154 | \CommentTok{# load May 20, 2013 ALM report} 155 | \NormalTok{alm <-}\StringTok{ }\KeywordTok{read.csv}\NormalTok{(}\StringTok{"../data/alm_report_plos_biology_2013-05-20.csv"}\NormalTok{, }\DataTypeTok{stringsAsFactors =} \OtherTok{FALSE}\NormalTok{,} 156 | \DataTypeTok{na.strings =} \KeywordTok{c}\NormalTok{(}\StringTok{"0"}\NormalTok{))} 157 | 158 | \CommentTok{# only look at papers published in 2010} 159 | \NormalTok{alm$publication_date <-}\StringTok{ }\KeywordTok{as.Date}\NormalTok{(alm$publication_date)} 160 | \NormalTok{alm <-}\StringTok{ }\KeywordTok{subset}\NormalTok{(alm, alm$publication_date >}\StringTok{ "2010-01-01"} \NormalTok{&}\StringTok{ }\NormalTok{alm$publication_date <=} 161 | \StringTok{ "2010-12-31"}\NormalTok{)} 162 | 163 | \CommentTok{# make sure counter values are numbers} 164 | \NormalTok{alm$counter_html <-}\StringTok{ }\KeywordTok{as.numeric}\NormalTok{(alm$counter_html)} 165 | 166 | \CommentTok{# lump all papers together that are not research articles} 167 | \NormalTok{reassignType <-}\StringTok{ }\NormalTok{function(x) if (x ==}\StringTok{ "Research Article"}\NormalTok{) }\DecValTok{1} \NormalTok{else }\DecValTok{0} 168 | \NormalTok{alm$article_group <-}\StringTok{ }\KeywordTok{aaply}\NormalTok{(alm$article_type, }\DecValTok{1}\NormalTok{, reassignType)} 169 | 170 | \CommentTok{# calculate article age in months} 171 | \NormalTok{alm$age_in_months <-}\StringTok{ }\NormalTok{(}\KeywordTok{Sys.Date}\NormalTok{() -}\StringTok{ }\NormalTok{alm$publication_date)/}\FloatTok{365.25} \NormalTok{*}\StringTok{ }\DecValTok{12} 172 | \NormalTok{start_age_in_months <-}\StringTok{ }\KeywordTok{floor}\NormalTok{(}\KeywordTok{as.numeric}\NormalTok{(}\KeywordTok{Sys.Date}\NormalTok{() -}\StringTok{ }\KeywordTok{as.Date}\NormalTok{(}\KeywordTok{strptime}\NormalTok{(}\StringTok{"2010-12-31"}\NormalTok{,} 173 | \DataTypeTok{format =} \StringTok{"%Y-%m-%d"}\NormalTok{)))/}\FloatTok{365.25} \NormalTok{*}\StringTok{ }\DecValTok{12}\NormalTok{)} 174 | 175 | \CommentTok{# chart variables} 176 | \NormalTok{x <-}\StringTok{ }\NormalTok{alm$age_in_months} 177 | \NormalTok{y <-}\StringTok{ }\NormalTok{alm$counter} 178 | \NormalTok{z <-}\StringTok{ }\NormalTok{alm$scopus} 179 | 180 | \NormalTok{xlab <-}\StringTok{ "Age in Months"} 181 | \NormalTok{ylab <-}\StringTok{ "Total Views"} 182 | 183 | \NormalTok{labels <-}\StringTok{ }\NormalTok{alm$article_group} 184 | \NormalTok{col.main <-}\StringTok{ "#1ebd21"} 185 | \NormalTok{col <-}\StringTok{ "#666358"} 186 | 187 | \CommentTok{# calculate bubble diameter} 188 | \NormalTok{z <-}\StringTok{ }\KeywordTok{sqrt}\NormalTok{(z/pi)} 189 | 190 | \CommentTok{# calculate bubble color} 191 | \NormalTok{getColor <-}\StringTok{ }\NormalTok{function(x) }\KeywordTok{c}\NormalTok{(}\StringTok{"#c9c9c7"}\NormalTok{, }\StringTok{"#1ebd21"}\NormalTok{)[x +}\StringTok{ }\DecValTok{1}\NormalTok{]} 192 | \NormalTok{colors <-}\StringTok{ }\KeywordTok{aaply}\NormalTok{(labels, }\DecValTok{1}\NormalTok{, getColor)} 193 | 194 | \CommentTok{# plot the chart} 195 | \NormalTok{opar <-}\StringTok{ }\KeywordTok{par}\NormalTok{(}\DataTypeTok{mai =} \KeywordTok{c}\NormalTok{(}\FloatTok{0.5}\NormalTok{, }\FloatTok{0.75}\NormalTok{, }\FloatTok{0.5}\NormalTok{, }\FloatTok{0.5}\NormalTok{), }\DataTypeTok{omi =} \KeywordTok{c}\NormalTok{(}\FloatTok{0.25}\NormalTok{, }\FloatTok{0.1}\NormalTok{, }\FloatTok{0.25}\NormalTok{, }\FloatTok{0.1}\NormalTok{), }\DataTypeTok{mgp =} \KeywordTok{c}\NormalTok{(}\DecValTok{3}\NormalTok{,} 196 | \FloatTok{0.5}\NormalTok{, }\FloatTok{0.5}\NormalTok{), }\DataTypeTok{fg =} \StringTok{"black"}\NormalTok{, }\DataTypeTok{cex =} \DecValTok{1}\NormalTok{, }\DataTypeTok{cex.main =} \DecValTok{2}\NormalTok{, }\DataTypeTok{cex.lab =} \FloatTok{1.5}\NormalTok{, }\DataTypeTok{col =} \StringTok{"white"}\NormalTok{,} 197 | \DataTypeTok{col.main =} \NormalTok{col.main, }\DataTypeTok{col.lab =} \NormalTok{col)} 198 | 199 | \KeywordTok{plot}\NormalTok{(x, y, }\DataTypeTok{type =} \StringTok{"n"}\NormalTok{, }\DataTypeTok{xlim =} \KeywordTok{c}\NormalTok{(start_age_in_months, start_age_in_months +}\StringTok{ }\DecValTok{13}\NormalTok{),} 200 | \DataTypeTok{ylim =} \KeywordTok{c}\NormalTok{(}\DecValTok{0}\NormalTok{, }\DecValTok{60000}\NormalTok{), }\DataTypeTok{xlab =} \OtherTok{NA}\NormalTok{, }\DataTypeTok{ylab =} \OtherTok{NA}\NormalTok{, }\DataTypeTok{las =} \DecValTok{1}\NormalTok{)} 201 | \KeywordTok{symbols}\NormalTok{(x, y, }\DataTypeTok{circles =} \NormalTok{z, }\DataTypeTok{inches =} \KeywordTok{exp}\NormalTok{(}\FloatTok{1.3}\NormalTok{)/}\DecValTok{15}\NormalTok{, }\DataTypeTok{bg =} \NormalTok{colors, }\DataTypeTok{xlim =} \KeywordTok{c}\NormalTok{(start_age_in_months,} 202 | \NormalTok{start_age_in_months +}\StringTok{ }\DecValTok{13}\NormalTok{), }\DataTypeTok{ylim =} \KeywordTok{c}\NormalTok{(}\DecValTok{0}\NormalTok{, ymax), }\DataTypeTok{xlab =} \OtherTok{NA}\NormalTok{, }\DataTypeTok{ylab =} \OtherTok{NA}\NormalTok{, }\DataTypeTok{las =} \DecValTok{1}\NormalTok{,} 203 | \DataTypeTok{add =} \OtherTok{TRUE}\NormalTok{)} 204 | \KeywordTok{mtext}\NormalTok{(xlab, }\DataTypeTok{side =} \DecValTok{1}\NormalTok{, }\DataTypeTok{col =} \NormalTok{col.main, }\DataTypeTok{cex =} \FloatTok{1.25}\NormalTok{, }\DataTypeTok{outer =} \OtherTok{TRUE}\NormalTok{, }\DataTypeTok{adj =} \DecValTok{1}\NormalTok{, }\DataTypeTok{at =} \DecValTok{1}\NormalTok{)} 205 | \KeywordTok{mtext}\NormalTok{(ylab, }\DataTypeTok{side =} \DecValTok{2}\NormalTok{, }\DataTypeTok{col =} \NormalTok{col.main, }\DataTypeTok{cex =} \FloatTok{1.25}\NormalTok{, }\DataTypeTok{outer =} \OtherTok{TRUE}\NormalTok{, }\DataTypeTok{adj =} \DecValTok{0}\NormalTok{, }\DataTypeTok{at =} \DecValTok{1}\NormalTok{,} 206 | \DataTypeTok{las =} \DecValTok{1}\NormalTok{)} 207 | 208 | \KeywordTok{par}\NormalTok{(opar)} 209 | \end{Highlighting} 210 | \end{Shaded} 211 | 212 | \begin{figure}[htbp] 213 | \centering 214 | \includegraphics{/images/2013-12-11_figure_3.svg} 215 | \caption{\textbf{Figure 3. Views vs.~citations for PLOS Biology articles 216 | published in 2010.} All 304 \emph{PLOS Biology} articles published in 217 | 2010. Bubble size correlates with number of Scopus citations. Research 218 | articles are labeled green; all other articles are grey. Red arrows 219 | indicate the two most highly cited papers. Data collected May 20, 2013.} 220 | \end{figure} 221 | 222 | When readers first see an interesting article, their response is often 223 | to view or download it. By contrast, a citation may be one of the last 224 | outcomes of their interest, occuring only about 1 in 300 times a PLOS 225 | paper is viewed online. A lot of things happen in between these 226 | potential responses, ranging from discussions in comments, social media, 227 | and blogs, to bookmarking, to linking from websites. These activities 228 | are usually subsumed under the term ``altmetrics,'' and their variety 229 | can be overwhelming. Therefore, it helps to group them together into 230 | categories, and several organizations, including PLOS, are using the 231 | category labels of Viewed, Cited, Saved, Discussed, and Recommended 232 | (\textbf{Figures 2 and 4}, see also {[}@Lin2013{]}). 233 | 234 | \begin{Shaded} 235 | \begin{Highlighting}[] 236 | \CommentTok{# code for figure 4: bar plot for Article-level metrics for PLOS Biology} 237 | 238 | \CommentTok{# Load required libraries} 239 | \KeywordTok{library}\NormalTok{(reshape2)} 240 | 241 | \CommentTok{# load May 20, 2013 ALM report} 242 | \NormalTok{alm <-}\StringTok{ }\KeywordTok{read.csv}\NormalTok{(}\StringTok{"../data/alm_report_plos_biology_2013-05-20.csv"}\NormalTok{, }\DataTypeTok{stringsAsFactors =} \OtherTok{FALSE}\NormalTok{,} 243 | \DataTypeTok{na.strings =} \KeywordTok{c}\NormalTok{(}\DecValTok{0}\NormalTok{, }\StringTok{"0"}\NormalTok{))} 244 | 245 | \CommentTok{# only look at research articles} 246 | \NormalTok{alm <-}\StringTok{ }\KeywordTok{subset}\NormalTok{(alm, alm$article_type ==}\StringTok{ "Research Article"}\NormalTok{)} 247 | 248 | \CommentTok{# make sure columns are in the right format} 249 | \NormalTok{alm$counter_html <-}\StringTok{ }\KeywordTok{as.numeric}\NormalTok{(alm$counter_html)} 250 | \NormalTok{alm$mendeley <-}\StringTok{ }\KeywordTok{as.numeric}\NormalTok{(alm$mendeley)} 251 | 252 | \CommentTok{# options} 253 | \NormalTok{plos.color <-}\StringTok{ "#1ebd21"} 254 | \NormalTok{plos.colors <-}\StringTok{ }\KeywordTok{c}\NormalTok{(}\StringTok{"#a17f78"}\NormalTok{, }\StringTok{"#ad9a27"}\NormalTok{, }\StringTok{"#ad9a27"}\NormalTok{, }\StringTok{"#ad9a27"}\NormalTok{, }\StringTok{"#ad9a27"}\NormalTok{, }\StringTok{"#ad9a27"}\NormalTok{,} 255 | \StringTok{"#dcebdd"}\NormalTok{, }\StringTok{"#dcebdd"}\NormalTok{, }\StringTok{"#789aa1"}\NormalTok{, }\StringTok{"#789aa1"}\NormalTok{, }\StringTok{"#789aa1"}\NormalTok{, }\StringTok{"#304345"}\NormalTok{, }\StringTok{"#304345"}\NormalTok{)} 256 | 257 | \CommentTok{# use subset of columns} 258 | \NormalTok{alm <-}\StringTok{ }\KeywordTok{subset}\NormalTok{(alm, }\DataTypeTok{select =} \KeywordTok{c}\NormalTok{(}\StringTok{"f1000"}\NormalTok{, }\StringTok{"wikipedia"}\NormalTok{, }\StringTok{"researchblogging"}\NormalTok{, }\StringTok{"comments"}\NormalTok{,} 259 | \StringTok{"facebook"}\NormalTok{, }\StringTok{"twitter"}\NormalTok{, }\StringTok{"citeulike"}\NormalTok{, }\StringTok{"mendeley"}\NormalTok{, }\StringTok{"pubmed"}\NormalTok{, }\StringTok{"crossref"}\NormalTok{, }\StringTok{"scopus"}\NormalTok{,} 260 | \StringTok{"pmc_html"}\NormalTok{, }\StringTok{"counter_html"}\NormalTok{))} 261 | 262 | \CommentTok{# calculate percentage of values that are not missing (i.e. have a count of} 263 | \CommentTok{# at least 1)} 264 | \NormalTok{colSums <-}\StringTok{ }\KeywordTok{colSums}\NormalTok{(!}\KeywordTok{is.na}\NormalTok{(alm)) *}\StringTok{ }\DecValTok{100}\NormalTok{/}\KeywordTok{length}\NormalTok{(alm$counter_html)} 265 | \NormalTok{exactSums <-}\StringTok{ }\KeywordTok{sum}\NormalTok{(}\KeywordTok{as.numeric}\NormalTok{(alm$pmc_html), }\DataTypeTok{na.rm =} \OtherTok{TRUE}\NormalTok{)} 266 | 267 | \CommentTok{# plot the chart} 268 | \NormalTok{opar <-}\StringTok{ }\KeywordTok{par}\NormalTok{(}\DataTypeTok{mar =} \KeywordTok{c}\NormalTok{(}\FloatTok{0.1}\NormalTok{, }\FloatTok{7.25}\NormalTok{, }\FloatTok{0.1}\NormalTok{, }\FloatTok{0.1}\NormalTok{) +}\StringTok{ }\FloatTok{0.1}\NormalTok{, }\DataTypeTok{omi =} \KeywordTok{c}\NormalTok{(}\FloatTok{0.1}\NormalTok{, }\FloatTok{0.25}\NormalTok{, }\FloatTok{0.1}\NormalTok{, }\FloatTok{0.1}\NormalTok{),} 269 | \DataTypeTok{col.main =} \NormalTok{plos.color)} 270 | 271 | \NormalTok{plos.names <-}\StringTok{ }\KeywordTok{c}\NormalTok{(}\StringTok{"F1000Prime"}\NormalTok{, }\StringTok{"Wikipedia"}\NormalTok{, }\StringTok{"Research Blogging"}\NormalTok{, }\StringTok{"PLOS Comments"}\NormalTok{,} 272 | \StringTok{"Facebook"}\NormalTok{, }\StringTok{"Twitter"}\NormalTok{, }\StringTok{"CiteULike"}\NormalTok{, }\StringTok{"Mendeley"}\NormalTok{, }\StringTok{"PubMed Citations"}\NormalTok{, }\StringTok{"CrossRef"}\NormalTok{,} 273 | \StringTok{"Scopus"}\NormalTok{, }\StringTok{"PMC HTML Views"}\NormalTok{, }\StringTok{"PLOS HTML Views"}\NormalTok{)} 274 | \NormalTok{y <-}\StringTok{ }\KeywordTok{barplot}\NormalTok{(colSums, }\DataTypeTok{horiz =} \OtherTok{TRUE}\NormalTok{, }\DataTypeTok{col =} \NormalTok{plos.colors, }\DataTypeTok{border =} \OtherTok{NA}\NormalTok{, }\DataTypeTok{xlab =} \NormalTok{plos.names,} 275 | \DataTypeTok{xlim =} \KeywordTok{c}\NormalTok{(}\DecValTok{0}\NormalTok{, }\DecValTok{120}\NormalTok{), }\DataTypeTok{axes =} \OtherTok{FALSE}\NormalTok{, }\DataTypeTok{names.arg =} \NormalTok{plos.names, }\DataTypeTok{las =} \DecValTok{1}\NormalTok{, }\DataTypeTok{adj =} \DecValTok{0}\NormalTok{)} 276 | \KeywordTok{text}\NormalTok{(colSums +}\StringTok{ }\DecValTok{6}\NormalTok{, y, }\DataTypeTok{labels =} \KeywordTok{sprintf}\NormalTok{(}\StringTok{"%1.0f%%"}\NormalTok{, colSums))} 277 | 278 | \KeywordTok{par}\NormalTok{(opar)} 279 | \end{Highlighting} 280 | \end{Shaded} 281 | 282 | \begin{figure}[htbp] 283 | \centering 284 | \includegraphics{/images/2013-12-11_figure_4.svg} 285 | \caption{\textbf{Figure 4. Article-level metrics for PLOS Biology.} 286 | Proportion of all 1,706 \emph{PLOS Biology} research articles published 287 | up to May 20, 2013 mentioned by particular article-level metrics source. 288 | Colors indicate categories (Viewed, Cited, Saved, Discussed, 289 | Recommended), as used on the PLOS website.} 290 | \end{figure} 291 | 292 | All \emph{PLOS Biology} articles are viewed and downloaded, and almost 293 | all of them (all research articles and nearly all front matter) will be 294 | cited sooner or later. Almost all of them will also be bookmarked in 295 | online reference managers, such as Mendeley, but the percentage of 296 | articles that are discussed online is much smaller. Some of these 297 | percentages are time dependent; the use of social media discussion 298 | platforms, such as Twitter and Facebook for example, has increased in 299 | recent years (93\% of \emph{PLOS Biology} research articles published 300 | since June 2012 have been discussed on Twitter, and 63\% mentioned on 301 | Facebook). These are the locations where most of the online discussion 302 | around published articles currently seems to take place; the percentage 303 | of papers with comments on the PLOS website or that have science blog 304 | posts written about them is much smaller. Not all of this online 305 | discussion is about research articles, and perhaps, not surprisingly, 306 | the most-tweeted PLOS article overall (with more than 1,100 tweets) is a 307 | \emph{PLOS Biology} perspective on the use of social media for 308 | scientists {[}@Bik2013{]}. 309 | 310 | Some metrics are not so much indicators of a broad online discussion, 311 | but rather focus on highlighting articles of particular interest. For 312 | example, science blogs allow a more detailed discussion of an article as 313 | compared to comments or tweets, and journals themselves sometimes choose 314 | to highlight a paper on their own blogs, allowing for a more digestible 315 | explanation of the science for the non-expert reader {[}@Fausto2012{]}. 316 | Coverage by other bloggers also serves the same purpose; a good example 317 | of this is one recent post on the OpenHelix Blog {[}@Video2012{]} that 318 | contains video footage of the second author of a 2010 \emph{PLOS 319 | Biology} article {[}@Dalloul2010{]} discussing the turkey genome. 320 | 321 | F1000Prime, a commercial service of recommendations by expert 322 | scientists, was added to the PLOS Article-Level Metrics in August 2013. 323 | We now highlight on the PLOS website when any articles have received at 324 | least one recommendation within F1000Prime. We also monitor when an 325 | article has been cited within the widely used modern-day online 326 | encyclopedia, Wikipedia. A good example of the latter is the Tasmanian 327 | devil Wikipedia page {[}@Tasmanian2013{]} that links to a \emph{PLOS 328 | Biology} research article published in 2010 {[}@Nilsson2010{]}. While a 329 | F1000Prime recommendation is a strong endorsement from peer(s) in the 330 | scientific community, being included in a Wikipedia page is akin to 331 | making it into a textbook about the subject area and being read by a 332 | much wider audience that goes beyond the scientific community. 333 | 334 | \emph{PLOS Biology} is the PLOS journal with the highest percentage of 335 | articles recommended in F1000Prime and mentioned in Wikipedia, but there 336 | is only partial overlap between the two groups of articles because they 337 | focus on different audiences (\textbf{Figure 5}). These recommendations 338 | and mentions in turn show correlations with other metrics, but not 339 | simple ones; you can't assume, for example, that highly cited articles 340 | are more likely to be recommended by F1000Prime, so it will be 341 | interesting to monitor these trends now that we include this 342 | information. 343 | 344 | \begin{Shaded} 345 | \begin{Highlighting}[] 346 | \CommentTok{# code for figure 5: Venn diagram F1000 vs. Wikipedia for PLOS Biology} 347 | \CommentTok{# articles} 348 | 349 | \CommentTok{# load required libraries} 350 | \KeywordTok{library}\NormalTok{(}\StringTok{"plyr"}\NormalTok{)} 351 | \KeywordTok{library}\NormalTok{(}\StringTok{"VennDiagram"}\NormalTok{)} 352 | 353 | \CommentTok{# load May 20, 2013 ALM report} 354 | \NormalTok{alm <-}\StringTok{ }\KeywordTok{read.csv}\NormalTok{(}\StringTok{"../data/alm_report_plos_biology_2013-05-20.csv"}\NormalTok{, }\DataTypeTok{stringsAsFactors =} \OtherTok{FALSE}\NormalTok{)} 355 | 356 | \CommentTok{# only look at research articles} 357 | \NormalTok{alm <-}\StringTok{ }\KeywordTok{subset}\NormalTok{(alm, alm$article_type ==}\StringTok{ "Research Article"}\NormalTok{)} 358 | 359 | \CommentTok{# group articles based on values in Wikipedia and F1000} 360 | \NormalTok{reassignWikipedia <-}\StringTok{ }\NormalTok{function(x) if (x >}\StringTok{ }\DecValTok{0}\NormalTok{) }\DecValTok{1} \NormalTok{else }\DecValTok{0} 361 | \NormalTok{alm$wikipedia_bin <-}\StringTok{ }\KeywordTok{aaply}\NormalTok{(alm$wikipedia, }\DecValTok{1}\NormalTok{, reassignWikipedia)} 362 | \NormalTok{reassignF1000 <-}\StringTok{ }\NormalTok{function(x) if (x >}\StringTok{ }\DecValTok{0}\NormalTok{) }\DecValTok{2} \NormalTok{else }\DecValTok{0} 363 | \NormalTok{alm$f1000_bin <-}\StringTok{ }\KeywordTok{aaply}\NormalTok{(alm$f1000, }\DecValTok{1}\NormalTok{, reassignF1000)} 364 | \NormalTok{alm$article_group =}\StringTok{ }\NormalTok{alm$wikipedia_bin +}\StringTok{ }\NormalTok{alm$f1000_bin} 365 | \NormalTok{reassignCombined <-}\StringTok{ }\NormalTok{function(x) if (x ==}\StringTok{ }\DecValTok{3}\NormalTok{) }\DecValTok{1} \NormalTok{else }\DecValTok{0} 366 | \NormalTok{alm$combined_bin <-}\StringTok{ }\KeywordTok{aaply}\NormalTok{(alm$article_group, }\DecValTok{1}\NormalTok{, reassignCombined)} 367 | \NormalTok{reassignNo <-}\StringTok{ }\NormalTok{function(x) if (x ==}\StringTok{ }\DecValTok{0}\NormalTok{) }\DecValTok{1} \NormalTok{else }\DecValTok{0} 368 | \NormalTok{alm$no_bin <-}\StringTok{ }\KeywordTok{aaply}\NormalTok{(alm$article_group, }\DecValTok{1}\NormalTok{, reassignNo)} 369 | 370 | \CommentTok{# remember to divide f1000_bin by 2, as this is the default value} 371 | \NormalTok{summary <-}\StringTok{ }\KeywordTok{colSums}\NormalTok{(}\KeywordTok{subset}\NormalTok{(alm, }\DataTypeTok{select =} \KeywordTok{c}\NormalTok{(}\StringTok{"wikipedia_bin"}\NormalTok{, }\StringTok{"f1000_bin"}\NormalTok{, }\StringTok{"combined_bin"}\NormalTok{,} 372 | \StringTok{"no_bin"}\NormalTok{)), }\DataTypeTok{na.rm =} \OtherTok{TRUE}\NormalTok{)} 373 | \NormalTok{rows <-}\StringTok{ }\KeywordTok{nrow}\NormalTok{(alm)} 374 | 375 | \CommentTok{# options} 376 | \NormalTok{plos.colors <-}\StringTok{ }\KeywordTok{c}\NormalTok{(}\StringTok{"#c9c9c7"}\NormalTok{, }\StringTok{"#0000ff"}\NormalTok{, }\StringTok{"#ff0000"}\NormalTok{)} 377 | 378 | \CommentTok{# plot the chart} 379 | \NormalTok{opar <-}\StringTok{ }\KeywordTok{par}\NormalTok{(}\DataTypeTok{mai =} \KeywordTok{c}\NormalTok{(}\FloatTok{0.5}\NormalTok{, }\FloatTok{0.75}\NormalTok{, }\FloatTok{3.5}\NormalTok{, }\FloatTok{0.5}\NormalTok{), }\DataTypeTok{omi =} \KeywordTok{c}\NormalTok{(}\FloatTok{0.5}\NormalTok{, }\FloatTok{0.5}\NormalTok{, }\FloatTok{1.5}\NormalTok{, }\FloatTok{0.5}\NormalTok{), }\DataTypeTok{mgp =} \KeywordTok{c}\NormalTok{(}\DecValTok{3}\NormalTok{,} 380 | \FloatTok{0.5}\NormalTok{, }\FloatTok{0.5}\NormalTok{), }\DataTypeTok{fg =} \StringTok{"black"}\NormalTok{, }\DataTypeTok{cex.main =} \DecValTok{2}\NormalTok{, }\DataTypeTok{cex.lab =} \FloatTok{1.5}\NormalTok{, }\DataTypeTok{col =} \NormalTok{plos.color,} 381 | \DataTypeTok{col.main =} \NormalTok{plos.color, }\DataTypeTok{col.lab =} \NormalTok{plos.color, }\DataTypeTok{xaxs =} \StringTok{"i"}\NormalTok{, }\DataTypeTok{yaxs =} \StringTok{"i"}\NormalTok{)} 382 | 383 | \NormalTok{venn.plot <-}\StringTok{ }\KeywordTok{draw.triple.venn}\NormalTok{(}\DataTypeTok{area1 =} \NormalTok{rows, }\DataTypeTok{area2 =} \NormalTok{summary[}\DecValTok{1}\NormalTok{], }\DataTypeTok{area3 =} \NormalTok{summary[}\DecValTok{2}\NormalTok{]/}\DecValTok{2}\NormalTok{,} 384 | \DataTypeTok{n12 =} \NormalTok{summary[}\DecValTok{1}\NormalTok{], }\DataTypeTok{n23 =} \NormalTok{summary[}\DecValTok{3}\NormalTok{], }\DataTypeTok{n13 =} \NormalTok{summary[}\DecValTok{2}\NormalTok{]/}\DecValTok{2}\NormalTok{, }\DataTypeTok{n123 =} \NormalTok{summary[}\DecValTok{3}\NormalTok{],} 385 | \DataTypeTok{euler.d =} \OtherTok{TRUE}\NormalTok{, }\DataTypeTok{scaled =} \OtherTok{TRUE}\NormalTok{, }\DataTypeTok{fill =} \NormalTok{plos.colors, }\DataTypeTok{cex =} \DecValTok{2}\NormalTok{, }\DataTypeTok{fontfamily =} \KeywordTok{rep}\NormalTok{(}\StringTok{"sans"}\NormalTok{,} 386 | \DecValTok{7}\NormalTok{))} 387 | 388 | \KeywordTok{par}\NormalTok{(opar)} 389 | \end{Highlighting} 390 | \end{Shaded} 391 | 392 | \begin{figure}[htbp] 393 | \centering 394 | \includegraphics{/images/2013-12-11_figure_5.svg} 395 | \caption{\textbf{Figure 5. PLOS Biology articles: sites of 396 | recommendation and discussion.} Number of \emph{PLOS Biology} research 397 | articles published up to May 20, 2013 that have been recommended by 398 | F1000Prime (red) or mentioned in Wikipedia (blue).} 399 | \end{figure} 400 | 401 | With the increasing availability of ALM data, there comes a growing need 402 | to provide tools that will allow the community to interrogate them. A 403 | good first step for researchers, research administrators, and others 404 | interested in looking at the metrics of a larger set of PLOS articles is 405 | the recently launched ALM Reports tool {[}@ALM2013{]}. There are also a 406 | growing number of service providers, including Altmetric.com 407 | {[}@Altmetric2013{]}, ImpactStory {[}@Impactstory2013{]}, and Plum 408 | Analytics {[}@Plum2013{]} that provide similar services for articles 409 | from other publishers. 410 | 411 | As article-level metrics become increasingly used by publishers, 412 | funders, universities, and researchers, one of the major challenges to 413 | overcome is ensuring that standards and best practices are widely 414 | adopted and understood. The National Information Standards Organization 415 | (NISO) was recently awarded a grant by the Alfred P. Sloan Foundation to 416 | work on this {[}@NISO2013{]}, and PLOS is actively involved in this 417 | project. We look forward to further developing our article-level metrics 418 | and to having them adopted by other publishers, which hopefully will 419 | pave the way to their wide incorporation into research and researcher 420 | assessments. 421 | 422 | \subsubsection{Supporting Information}\label{supporting-information} 423 | 424 | \textbf{\href{http://dx.doi.org/10.1371/journal.pbio.1001687.s001}{Data 425 | S1}. Dataset of ALM for PLOS Biology articles used in the text, and R 426 | scripts that were used to produce figures.} The data were collected on 427 | May 20, 2013 and include all \emph{PLOS Biology} articles published up 428 | to that day. Data for F1000Prime were collected on August 15, 2013. All 429 | charts were produced with R version 3.0.0. 430 | -------------------------------------------------------------------------------- /examples/example3.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: post 3 | title: "Additional Markdown we need in Scholarly Texts" 4 | author: "Martin Fenner" 5 | date: "2012-12-18" 6 | description: "" 7 | category: 8 | tags: [markdown] 9 | bibliography: examples/example.bib 10 | csl: jats.csl 11 | --- 12 | Following up from [my post last 13 | week](/2012/12/13/a-call-for-scholarly-markdown/), 14 | below is a suggested list of features that should be supported in 15 | documents written in scholarly markdown. Please provide feedback via the 16 | comments, or by editing the Wiki version I have set up 17 | [here](https://github.com/mfenner/scholarly-markdown/wiki). Listed are 18 | features that go beyond the [standard markdown 19 | syntax](http://daringfireball.net/projects/markdown/syntax). 20 | 21 | The goals of scholarly markdown are 22 | 23 | 1. to support writing of complete scholarly articles, 24 | 2. don’t make the syntax more complicated than it is today, and 25 | 3. don’t rely on HTML as the fallback mechanism. 26 | 27 | In practice this means that scholarly markdown should support most, but 28 | not all scholarly texts – documents that are heavy in math formulas, 29 | have complicated tables, etc. may be better written with LaTeX or 30 | Microsoft Word. It also means that scholarly markdown will probably 31 | contain only limited semantic markup, as this is difficult to do with a 32 | lightweight markup language and much easier with XML or a binary file 33 | format. 34 | 35 | Cover Page 36 | ---------- 37 | 38 | Optional metadata about a document. Typically used for title, authors 39 | (including affiliation), and publication date, but should be flexible 40 | enough to handle any kind of metadata (keywords, copyright, etc.). 41 | 42 | ```yaml 43 | --- 44 | layout: post 45 | title: "Additional Markdown we need in Scholarly Texts" 46 | tags: [markdown] 47 | authors: 48 | - name: Martin Fenner 49 | orcid: 0000-0003-1419-2405 50 | copyright: http://creativecommons.org/licenses/by/3.0/deed.en 51 | --- 52 | ``` 53 | 54 | Typography 55 | ---------- 56 | 57 | Scholarly markdown should support ^superscript^ and ~subscript~ text, and 58 | should provide an easy way to enter greek $\zeta$ letters. 59 | 60 | Tables 61 | ------ 62 | 63 | Tables should work as anchors (i.e. you can link to them) and table 64 | captions should support styled text. Unless the table is very simple, 65 | tables are probably better written as CSV files with another tool, and 66 | then imported into the scholarly markdown document similar to figures. 67 | 68 | ----------------------------------------------------- 69 | Centered Right Left 70 | Header Aligned Aligned 71 | ----------- --------------- ------------------------- 72 | First 12.0 Example of a row that 73 | spans multiple lines. 74 | 75 | Second 5.0 Here's another one. Note 76 | the blank line between 77 | rows. 78 | ----------------------------------------------------- 79 | 80 | Table: **This is the table caption**. We can explain the table here. 81 | 82 | Figures 83 | ------- 84 | 85 | Figures in scholarly works are separated from the text, and have a 86 | figure caption (which can contain styled text). Figures should work as 87 | anchors (i.e. you can link to them). Figures can be in different file 88 | formats, including TIFF and PDF, and those formats have to be converted 89 | into web-friendly formats when exporting to HTML (e.g. PNG and SVG). 90 | 91 | ![**Set operations illustrated with Venn diagrams**. Example taken from [TeXample.net](http://www.texample.net/tikz/examples/set-operations-illustrated-with-venn-diagrams/).](/images/set-operations-illustrated-with-venn-diagrams.png) 92 | 93 | Citations and Links 94 | ------------------- 95 | 96 | Scholarly articles typically don’t have inline links, but rather 97 | citations. The external links (both scholarly identifiers such as DOIs 98 | and regular web URLs) are collected in a bibliography at the end of the 99 | document, and the citations in the text link to this bibliography. This 100 | functionality is similar to footnotes. 101 | 102 | Citations should include a citation key in the text, e.g. `[@kowalczyk2011]`, parsed as [@kowalczyk2011], and a separate bibliography file in BibTeX (or RIS) format that contains references for all citations. Inserting citations and creating the bibliography can best be done with a reference manager. 103 | 104 | Cross-links – i.e. links within a document – are important for scholarly 105 | texts. It should be possible to link to section headers (e.g. the 106 | beginning of the discussion section), figures and tables. 107 | 108 | Math 109 | ---- 110 | 111 | Complicated math is probably best done in a different authoring 112 | environment, but simple formulas, both inline $\sqrt2x$ and block elements 113 | 114 | > ${\frac {d}{dx}}\arctan(\sin({x}^{2}))=-2\,{\frac {\cos({x}^{2})x}{-2+\left (\cos({x}^{2})\right )^{2}}}$ 115 | 116 | should be supported by scholarly markdown. 117 | 118 | Comments 119 | -------- 120 | 121 | Comments are important for multi-author documents and if reviewer 122 | feedback should be included. Comments should be linked to a particular 123 | part of a document to provide context, or attached at the end of a 124 | document for general comments. It would also be helpful to “comment out” 125 | parts of a document, e.g. to indicate parts that are incomplete and need 126 | more work. Revisions of a markdown document are best handled using a 127 | version control system such as git. 128 | 129 | References 130 | ---------- 131 | -------------------------------------------------------------------------------- /jats.csl: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="utf-8"?> 2 | <style xmlns="http://purl.org/net/xbiblio/csl" class="in-text" version="1.0" default-locale="en-US"> 3 | <!-- This style was edited with the Visual CSL Editor (http://editor.citationstyles.org/visualEditor/) --> 4 | <info> 5 | <title>Journal Article Tag Suite 6 | JATS 7 | http://www.zotero.org/styles/journal-article-tag-suite 8 | 9 | 10 | 11 | Martin Fenner 12 | mfenner@plos.org 13 | 14 | 15 | 16 | 17 |

Use this style to generate bibliographic data in Journal Article Tagging Suite (JATS) 1.0 XML format 18 | 2015-04-26T17:02:43+00:00 19 | This work is licensed under a Creative Commons Attribution-ShareAlike 3.0 License 20 | 21 | 22 | 23 | <etal/> 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | -------------------------------------------------------------------------------- /sample.lua: -------------------------------------------------------------------------------- 1 | -- This is a sample custom writer for pandoc. It produces output 2 | -- that is very similar to that of pandoc's HTML writer. 3 | -- There is one new feature: code blocks marked with class 'dot' 4 | -- are piped through graphviz and images are included in the HTML 5 | -- output using 'data:' URLs. 6 | -- 7 | -- Invoke with: pandoc -t sample.lua 8 | -- 9 | -- Note: you need not have lua installed on your system to use this 10 | -- custom writer. However, if you do have lua installed, you can 11 | -- use it to test changes to the script. 'lua sample.lua' will 12 | -- produce informative error messages if your code contains 13 | -- syntax errors. 14 | 15 | -- Character escaping 16 | local function escape(s, in_attribute) 17 | return s:gsub("[<>&\"']", 18 | function(x) 19 | if x == '<' then 20 | return '<' 21 | elseif x == '>' then 22 | return '>' 23 | elseif x == '&' then 24 | return '&' 25 | elseif x == '"' then 26 | return '"' 27 | elseif x == "'" then 28 | return ''' 29 | else 30 | return x 31 | end 32 | end) 33 | end 34 | 35 | -- Helper function to convert an attributes table into 36 | -- a string that can be put into HTML tags. 37 | local function attributes(attr) 38 | local attr_table = {} 39 | for x,y in pairs(attr) do 40 | if y and y ~= "" then 41 | table.insert(attr_table, ' ' .. x .. '="' .. escape(y,true) .. '"') 42 | end 43 | end 44 | return table.concat(attr_table) 45 | end 46 | 47 | -- Run cmd on a temporary file containing inp and return result. 48 | local function pipe(cmd, inp) 49 | local tmp = os.tmpname() 50 | local tmph = io.open(tmp, "w") 51 | tmph:write(inp) 52 | tmph:close() 53 | local outh = io.popen(cmd .. " " .. tmp,"r") 54 | local result = outh:read("*all") 55 | outh:close() 56 | os.remove(tmp) 57 | return result 58 | end 59 | 60 | -- Table to store footnotes, so they can be included at the end. 61 | local notes = {} 62 | 63 | -- Blocksep is used to separate block elements. 64 | function Blocksep() 65 | return "\n\n" 66 | end 67 | 68 | -- This function is called once for the whole document. Parameters: 69 | -- body is a string, metadata is a table, variables is a table. 70 | -- This gives you a fragment. You could use the metadata table to 71 | -- fill variables in a custom lua template. Or, pass `--template=...` 72 | -- to pandoc, and pandoc will add do the template processing as 73 | -- usual. 74 | function Doc(body, metadata, variables) 75 | local buffer = {} 76 | local function add(s) 77 | table.insert(buffer, s) 78 | end 79 | add(body) 80 | if #notes > 0 then 81 | add('
    ') 82 | for _,note in pairs(notes) do 83 | add(note) 84 | end 85 | add('
') 86 | end 87 | return table.concat(buffer,'\n') 88 | end 89 | 90 | -- The functions that follow render corresponding pandoc elements. 91 | -- s is always a string, attr is always a table of attributes, and 92 | -- items is always an array of strings (the items in a list). 93 | -- Comments indicate the types of other variables. 94 | 95 | function Str(s) 96 | return escape(s) 97 | end 98 | 99 | function Space() 100 | return " " 101 | end 102 | 103 | function LineBreak() 104 | return "
" 105 | end 106 | 107 | function Emph(s) 108 | return "" .. s .. "" 109 | end 110 | 111 | function Strong(s) 112 | return "" .. s .. "" 113 | end 114 | 115 | function Subscript(s) 116 | return "" .. s .. "" 117 | end 118 | 119 | function Superscript(s) 120 | return "" .. s .. "" 121 | end 122 | 123 | function SmallCaps(s) 124 | return '' .. s .. '' 125 | end 126 | 127 | function Strikeout(s) 128 | return '' .. s .. '' 129 | end 130 | 131 | function Link(s, src, tit) 132 | return "" .. s .. "" 134 | end 135 | 136 | function Image(s, src, tit) 137 | return "" 139 | end 140 | 141 | function Code(s, attr) 142 | return "" .. escape(s) .. "" 143 | end 144 | 145 | function InlineMath(s) 146 | return "\\(" .. escape(s) .. "\\)" 147 | end 148 | 149 | function DisplayMath(s) 150 | return "\\[" .. escape(s) .. "\\]" 151 | end 152 | 153 | function Note(s) 154 | local num = #notes + 1 155 | -- insert the back reference right before the final closing tag. 156 | s = string.gsub(s, 157 | '(.*)' .. s .. '') 160 | -- return the footnote reference, linked to the note. 161 | return '' .. num .. '' 163 | end 164 | 165 | function Span(s, attr) 166 | return "" .. s .. "" 167 | end 168 | 169 | function Cite(s, cs) 170 | local ids = {} 171 | for _,cit in ipairs(cs) do 172 | table.insert(ids, cit.citationId) 173 | end 174 | return "" .. s .. "" 176 | end 177 | 178 | function Plain(s) 179 | return s 180 | end 181 | 182 | function Para(s) 183 | return "

" .. s .. "

" 184 | end 185 | 186 | -- lev is an integer, the header level. 187 | function Header(lev, s, attr) 188 | return "" .. s .. "" 189 | end 190 | 191 | function BlockQuote(s) 192 | return "
\n" .. s .. "\n
" 193 | end 194 | 195 | function HorizontalRule() 196 | return "
" 197 | end 198 | 199 | function CodeBlock(s, attr) 200 | -- If code block has class 'dot', pipe the contents through dot 201 | -- and base64, and include the base64-encoded png as a data: URL. 202 | if attr.class and string.match(' ' .. attr.class .. ' ',' dot ') then 203 | local png = pipe("base64", pipe("dot -Tpng", s)) 204 | return '' 205 | -- otherwise treat as code (one could pipe through a highlighter) 206 | else 207 | return "
" .. escape(s) ..
208 |            "
" 209 | end 210 | end 211 | 212 | function BulletList(items) 213 | local buffer = {} 214 | for _, item in pairs(items) do 215 | table.insert(buffer, "
  • " .. item .. "
  • ") 216 | end 217 | return "
      \n" .. table.concat(buffer, "\n") .. "\n
    " 218 | end 219 | 220 | function OrderedList(items) 221 | local buffer = {} 222 | for _, item in pairs(items) do 223 | table.insert(buffer, "
  • " .. item .. "
  • ") 224 | end 225 | return "
      \n" .. table.concat(buffer, "\n") .. "\n
    " 226 | end 227 | 228 | -- Revisit association list STackValue instance. 229 | function DefinitionList(items) 230 | local buffer = {} 231 | for _,item in pairs(items) do 232 | for k, v in pairs(item) do 233 | table.insert(buffer,"
    " .. k .. "
    \n
    " .. 234 | table.concat(v,"
    \n
    ") .. "
    ") 235 | end 236 | end 237 | return "
    \n" .. table.concat(buffer, "\n") .. "\n
    " 238 | end 239 | 240 | -- Convert pandoc alignment to something HTML can use. 241 | -- align is AlignLeft, AlignRight, AlignCenter, or AlignDefault. 242 | function html_align(align) 243 | if align == 'AlignLeft' then 244 | return 'left' 245 | elseif align == 'AlignRight' then 246 | return 'right' 247 | elseif align == 'AlignCenter' then 248 | return 'center' 249 | else 250 | return 'left' 251 | end 252 | end 253 | 254 | -- Caption is a string, aligns is an array of strings, 255 | -- widths is an array of floats, headers is an array of 256 | -- strings, rows is an array of arrays of strings. 257 | function Table(caption, aligns, widths, headers, rows) 258 | local buffer = {} 259 | local function add(s) 260 | table.insert(buffer, s) 261 | end 262 | add("") 263 | if caption ~= "" then 264 | add("") 265 | end 266 | if widths and widths[1] ~= 0 then 267 | for _, w in pairs(widths) do 268 | add('') 269 | end 270 | end 271 | local header_row = {} 272 | local empty_header = true 273 | for i, h in pairs(headers) do 274 | local align = html_align(aligns[i]) 275 | table.insert(header_row,'') 276 | empty_header = empty_header and h == "" 277 | end 278 | if empty_header then 279 | head = "" 280 | else 281 | add('') 282 | for _,h in pairs(header_row) do 283 | add(h) 284 | end 285 | add('') 286 | end 287 | local class = "even" 288 | for _, row in pairs(rows) do 289 | class = (class == "even" and "odd") or "even" 290 | add('') 291 | for i,c in pairs(row) do 292 | add('') 293 | end 294 | add('') 295 | end 296 | add('\n" .. s .. "" 302 | end 303 | 304 | -- The following code will produce runtime warnings when you haven't defined 305 | -- all of the functions you need for the custom writer, so it's useful 306 | -- to include when you're working on a writer. 307 | local meta = {} 308 | meta.__index = 309 | function(_, key) 310 | io.stderr:write(string.format("WARNING: Undefined function '%s'\n",key)) 311 | return function() return "" end 312 | end 313 | setmetatable(_G, meta) 314 | -------------------------------------------------------------------------------- /spec/input_spec.lua: -------------------------------------------------------------------------------- 1 | require("../jats") 2 | 3 | describe("required custom writer functions", function() 4 | 5 | -- generic writer functions 6 | it("Doc", function() 7 | assert.is_true(type(Doc) == 'function') 8 | end) 9 | 10 | it("Blocksep", function() 11 | assert.is_true(type(Blocksep) == 'function') 12 | end) 13 | 14 | -- block elements 15 | it("Plain", function() 16 | assert.is_true(type(Plain) == 'function') 17 | end) 18 | 19 | it("CaptionedImage", function() 20 | assert.is_true(type(CaptionedImage) == 'function') 21 | end) 22 | 23 | it("Para", function() 24 | assert.is_true(type(Para) == 'function') 25 | end) 26 | 27 | it("RawBlock", function() 28 | assert.is_true(type(RawBlock) == 'function') 29 | end) 30 | 31 | it("HorizontalRule", function() 32 | assert.is_true(type(HorizontalRule) == 'function') 33 | end) 34 | 35 | it("Header", function() 36 | assert.is_true(type(Header) == 'function') 37 | end) 38 | 39 | it("CodeBlock", function() 40 | assert.is_true(type(CodeBlock) == 'function') 41 | end) 42 | 43 | it("BlockQuote", function() 44 | assert.is_true(type(BlockQuote) == 'function') 45 | end) 46 | 47 | it("Table", function() 48 | assert.is_true(type(Table) == 'function') 49 | end) 50 | 51 | it("BulletList", function() 52 | assert.is_true(type(BulletList) == 'function') 53 | end) 54 | 55 | it("OrderedList", function() 56 | assert.is_true(type(OrderedList) == 'function') 57 | end) 58 | 59 | it("DefinitionList", function() 60 | assert.is_true(type(DefinitionList) == 'function') 61 | end) 62 | 63 | it("Div", function() 64 | assert.is_true(type(Div) == 'function') 65 | end) 66 | 67 | -- inline elements 68 | it("Str", function() 69 | assert.is_true(type(Str) == 'function') 70 | end) 71 | 72 | it("Space", function() 73 | assert.is_true(type(Space) == 'function') 74 | end) 75 | 76 | it("Emph", function() 77 | assert.is_true(type(Emph) == 'function') 78 | end) 79 | 80 | it("Strong", function() 81 | assert.is_true(type(Strong) == 'function') 82 | end) 83 | 84 | it("Strikeout", function() 85 | assert.is_true(type(Strikeout) == 'function') 86 | end) 87 | 88 | it("Superscript", function() 89 | assert.is_true(type(Superscript) == 'function') 90 | end) 91 | 92 | it("Subscript", function() 93 | assert.is_true(type(Subscript) == 'function') 94 | end) 95 | 96 | it("SmallCaps", function() 97 | assert.is_true(type(SmallCaps) == 'function') 98 | end) 99 | 100 | it("SingleQuoted", function() 101 | assert.is_true(type(SingleQuoted) == 'function') 102 | end) 103 | 104 | it("DoubleQuoted", function() 105 | assert.is_true(type(DoubleQuoted) == 'function') 106 | end) 107 | 108 | it("Cite", function() 109 | assert.is_true(type(Cite) == 'function') 110 | end) 111 | 112 | it("Code", function() 113 | assert.is_true(type(Code) == 'function') 114 | end) 115 | 116 | it("DisplayMath", function() 117 | assert.is_true(type(DisplayMath) == 'function') 118 | end) 119 | 120 | it("InlineMath", function() 121 | assert.is_true(type(InlineMath) == 'function') 122 | end) 123 | 124 | it("RawInline", function() 125 | assert.is_true(type(RawInline) == 'function') 126 | end) 127 | 128 | it("LineBreak", function() 129 | assert.is_true(type(LineBreak) == 'function') 130 | end) 131 | 132 | it("Link", function() 133 | assert.is_true(type(Link) == 'function') 134 | end) 135 | 136 | it("Image", function() 137 | assert.is_true(type(Image) == 'function') 138 | end) 139 | 140 | it("Note", function() 141 | assert.is_true(type(Note) == 'function') 142 | end) 143 | 144 | it("Span", function() 145 | assert.is_true(type(Span) == 'function') 146 | end) 147 | 148 | end) -------------------------------------------------------------------------------- /spec/output_spec.lua: -------------------------------------------------------------------------------- 1 | local inspect = require 'inspect' 2 | require("../jats") 3 | 4 | describe("custom xml functions", function() 5 | it("should escape XML entities", function() 6 | local result = escape('<') 7 | local expected = '<' 8 | assert.are.same(result, expected) 9 | end) 10 | 11 | it("should unescape XML entities", function() 12 | local result = unescape('<') 13 | expected = '<' 14 | assert.are.same(result, expected) 15 | end) 16 | 17 | it("should write attributes", function() 18 | local attr = { ['id'] = 'sec-1.4', ['sec-type'] = 'results' } 19 | local result = attributes(attr) 20 | expected = ' id="sec-1.4" sec-type="results"' 21 | assert.are.same(result, expected) 22 | end) 23 | 24 | it("should write empty attributes", function() 25 | local attr = {} 26 | local result = attributes(attr) 27 | expected = '' 28 | assert.are.same(result, expected) 29 | end) 30 | 31 | it("should build XML entities", function() 32 | local result = xml('p', 'Some text') 33 | expected = '

    Some text

    ' 34 | assert.are.same(result, expected) 35 | end) 36 | 37 | it("should build self closing elements", function() 38 | local result = xml('hr') 39 | expected = '
    ' 40 | assert.are.same(result, expected) 41 | end) 42 | 43 | it("should include attributes", function() 44 | local result = xml('div', 'Some text', { ['id'] = 'results' }) 45 | expected = '
    Some text
    ' 46 | assert.are.same(result, expected) 47 | end) 48 | end) 49 | 50 | describe("Doc", function() 51 | it("should build the body", function() 52 | local result = Doc('This is a test.') 53 | expected = '\n\n\nThis is a test.</sec>\n</body>' 54 | assert.are.same(result, expected) 55 | end) 56 | end) 57 | 58 | describe("Span", function() 59 | it("should understand bold", function() 60 | local result = Span('This is a test.', { ['style'] = "font-weight:bold" }) 61 | expected = '<bold>This is a test.</bold>' 62 | assert.are.same(result, expected) 63 | end) 64 | 65 | it("should understand italic", function() 66 | local result = Span('This is a test.', { ['style'] = "font-style:italic" }) 67 | expected = '<italic>This is a test.</italic>' 68 | assert.are.same(result, expected) 69 | end) 70 | 71 | it("should understand small caps", function() 72 | local result = Span('This is a test.', { ['style'] = "font-variant: small-caps" }) 73 | expected = '<sc>This is a test.</sc>' 74 | assert.are.same(result, expected) 75 | end) 76 | end) 77 | 78 | describe("sections", function() 79 | it("should find sections", function() 80 | local result = sec_type_helper('Discussion') 81 | expected = 'discussion' 82 | assert.are.same(result, expected) 83 | end) 84 | 85 | it("should ignore unknown sections", function() 86 | local result = sec_type_helper('Report') 87 | assert.is.falsy(result) 88 | end) 89 | 90 | it("should generate section", function() 91 | local result = section_helper(2, 'Some text in the discussion.', 'Discussion') 92 | expected = { ['sec-type'] = 'discussion' } 93 | assert.are.same(result, expected) 94 | end) 95 | 96 | it("should generate acknowledgements", function() 97 | local result = section_helper(2, 'We thank our sponsors.', 'Acknowledgements') 98 | expected = {} 99 | assert.are.same(result, expected) 100 | end) 101 | 102 | it("should build header", function() 103 | local result = Header(2, 'Discussion') 104 | expected = '</sec>\n<sec lev="2">\n<title>Discussion' 105 | assert.are.same(result, expected) 106 | end) 107 | 108 | it("should build section", function() 109 | local result = Section(2, 'Some text in the discussion.', 'Discussion', { ['sec-type'] = 'discussion' }) 110 | expected = '\nDiscussionSome text in the discussion.' 111 | assert.are.same(result, expected) 112 | end) 113 | 114 | it("should build ack", function() 115 | local result = Ack('We thank our sponsors.') 116 | expected = 'We thank our sponsors.' 117 | assert.are.same(result, expected) 118 | end) 119 | 120 | it("should build supplementary material", function() 121 | local result = SupplementaryMaterial('Detailed information about the experiment.', 'S4') 122 | expected = '\n\n' 123 | assert.are.same(result, expected) 124 | end) 125 | end) 126 | 127 | describe("references", function() 128 | it("should insert references", function() 129 | local result = Ref('') 130 | expected = 1 131 | assert.are.same(result, expected) 132 | end) 133 | 134 | it("should build links", function() 135 | local result = Link('PubMed', 'http://www.ncbi.nlm.nih.gov/pubmed/', '24 million citations for biomedical literature') 136 | expected = 'PubMed' 137 | assert.are.same(result, expected) 138 | end) 139 | 140 | it("should build cite", function() 141 | local result = Cite('[7]') 142 | expected = '[7]' 143 | assert.are.same(result, expected) 144 | end) 145 | 146 | it("should build multiple cites", function() 147 | local result = Cite('[7,8]') 148 | expected = '[7][8]' 149 | assert.are.same(result, expected) 150 | end) 151 | 152 | it("should ignore cite keys", function() 153 | local result = Cite('[@thorisson2011]') 154 | expected = '[@thorisson2011]' 155 | assert.are.same(result, expected) 156 | end) 157 | end) 158 | 159 | describe("figures", function() 160 | it("should build graphic", function() 161 | local result = Image(nil, 'hello_world.png', 'A title') 162 | expected = '' 163 | assert.are.same(result, expected) 164 | end) 165 | 166 | it("should build graphic with object id", function() 167 | local object = xml('object_id', '12345') 168 | local result = Image(object, 'hello_world.png', 'A title') 169 | expected = '12345' 170 | assert.are.same(result, expected) 171 | end) 172 | 173 | it("should build figure", function() 174 | local title = xml('title', "Fig. 4") 175 | local result = CaptionedImage(title .. "Some describing text", 'hello_world.png', 'A title') 176 | expected = '' 177 | assert.are.same(result, expected) 178 | end) 179 | end) 180 | 181 | describe("custom tags", function() 182 | it("Section", function() 183 | assert.is_true(type(Section) == 'function') 184 | end) 185 | 186 | it("RefList", function() 187 | assert.is_true(type(RefList) == 'function') 188 | end) 189 | 190 | it("Ref", function() 191 | assert.is_true(type(Ref) == 'function') 192 | end) 193 | 194 | it("SupplementaryMaterial", function() 195 | assert.is_true(type(SupplementaryMaterial) == 'function') 196 | end) 197 | 198 | it("Ack", function() 199 | assert.is_true(type(Ack) == 'function') 200 | end) 201 | 202 | it("Glossary", function() 203 | assert.is_true(type(Glossary) == 'function') 204 | end) 205 | end) 206 | 207 | describe("lists", function() 208 | it("should build unordered list", function() 209 | local result = BulletList({ 'A', 'B', 'C' }) 210 | expected = '\nA\nB\nC\n' 211 | assert.are.same(result, expected) 212 | end) 213 | 214 | it("should build ordered list", function() 215 | local result = OrderedList({ 'A', 'B', 'C' }) 216 | expected = '\nA\nB\nC\n' 217 | assert.are.same(result, expected) 218 | end) 219 | 220 | it("should build definition list", function() 221 | local result = DefinitionList({ { ['A'] = { 'This is A.' }}, { ['B'] = {'This is B.', 'This is also B.' }}, { ['C'] = { 'This is C.' }} }) 222 | expected = '\nAThis is A.\nBThis is B.This is also B.\nCThis is C.\n' 223 | assert.are.same(result, expected) 224 | end) 225 | end) 226 | 227 | describe("flatten", function() 228 | it("flatten_table", function() 229 | assert.is_true(type(flatten_table) == 'function') 230 | end) 231 | 232 | it("nested table", function() 233 | local data = { body = { name = 'test', color = '#CCC' }} 234 | local expected = { ['body-name'] = 'test', ['body-color'] = '#CCC' } 235 | local result = flatten_table(data) 236 | assert.are.same(result, expected) 237 | end) 238 | 239 | it("nested table with array", function() 240 | local data = { body = { name = 'test', color = '#CCC' }, author = {{ name = 'Smith' }, { name = 'Baker' }}} 241 | local expected = { ['body-name'] = 'test', ['body-color'] = '#CCC', author = {{ name = 'Smith' }, { name = 'Baker' }}} 242 | local result = flatten_table(data) 243 | assert.are.same(result, expected) 244 | end) 245 | 246 | it("deeply nested table", function() 247 | local data = { body = { name = 'test', font = { color = { id = 1, value = '#CCC' }}}} 248 | local expected = { ['body-name'] = 'test', ['body-font-color-id'] = 1, ['body-font-color-value'] = '#CCC' } 249 | local result = flatten_table(data) 250 | assert.are.same(result, expected) 251 | end) 252 | 253 | it("regular table", function() 254 | local data = { body = 'test' } 255 | local result = flatten_table(data) 256 | assert.are.same(result, data) 257 | end) 258 | 259 | end) 260 | 261 | describe("date_helper", function() 262 | it("function exists", function() 263 | assert.is_true(type(date_helper) == 'function') 264 | end) 265 | 266 | it("generates iso8601 dates", function() 267 | local date = '2013-12-24' 268 | local expected = '2013-12-24' 269 | local result = date_helper(date).iso8601 270 | assert.are.same(result, expected) 271 | end) 272 | 273 | it("rejects malformed dates", function() 274 | local date = '12/24/13' 275 | local expected = nil 276 | local result = date_helper(date) 277 | assert.are.same(result, expected) 278 | end) 279 | end) 280 | 281 | describe("handle files", function() 282 | it("function read_file exists", function() 283 | assert.is_true(type(read_file) == 'function') 284 | end) 285 | end) 286 | 287 | describe("parse_yaml", function() 288 | it("function exists", function() 289 | assert.is_true(type(parse_yaml) == 'function') 290 | end) 291 | 292 | it("parse variables", function() 293 | local config = [[ 294 | publisher-id: plos 295 | publisher-name: Public Library of Science 296 | ]] 297 | local expected = { ['publisher-id'] = 'plos', 298 | ['publisher-name'] = 'Public Library of Science' } 299 | local result = parse_yaml(config) 300 | assert.are.same(result, expected) 301 | end) 302 | 303 | it("strip quotes", function() 304 | local config = [[ 305 | title: "What Can Article Level Metrics Do for You?" 306 | alternate-title: 'What Can Article Level Metrics Do for You?' 307 | ]] 308 | local expected = { title = 'What Can Article Level Metrics Do for You?', 309 | ['alternate-title'] = 'What Can Article Level Metrics Do for You?' } 310 | local result = parse_yaml(config) 311 | assert.are.same(result, expected) 312 | end) 313 | 314 | it("parse nested variables", function() 315 | local config = [[ 316 | doi: 10.1371/journal.pbio.1001687 317 | journal: 318 | publisher-id: plos 319 | date: 2013-12-25 320 | publisher: 321 | publisher-name: Public Library of Science 322 | ]] 323 | local expected = { date = "2013-12-25", 324 | doi = "10.1371/journal.pbio.1001687", 325 | journal = { 326 | ["publisher-id"] = "plos" 327 | }, 328 | publisher = { 329 | ["publisher-name"] = "Public Library of Science" 330 | } 331 | } 332 | local result = parse_yaml(config) 333 | assert.are.same(result, expected) 334 | end) 335 | 336 | it("parse array variable", function() 337 | local config = [[ 338 | publisher-id: plos 339 | publisher-name: Public Library of Science 340 | tags: [molecular biology, cancer] 341 | ]] 342 | local expected = { ['publisher-id'] = 'plos', 343 | ['publisher-name'] = 'Public Library of Science', 344 | tags = { 'molecular biology', 'cancer' } } 345 | local result = parse_yaml(config) 346 | assert.are.same(result, expected) 347 | end) 348 | end) 349 | --------------------------------------------------------------------------------
    " .. caption .. "
    ' .. h .. '
    ' .. c .. '
    S4Detailed information about the experiment.Fig. 4Some describing text