├── .gitignore ├── lib ├── webloc │ └── version.rb └── webloc.rb ├── test ├── oldstyle.webloc ├── pliststyle.webloc └── webloc_test.rb ├── Gemfile ├── Rakefile ├── webloc.gemspec ├── LICENSE └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | pkg/* 2 | *.gem 3 | .bundle 4 | .DS_Store -------------------------------------------------------------------------------- /lib/webloc/version.rb: -------------------------------------------------------------------------------- 1 | class Webloc 2 | VERSION = "0.3.1" 3 | end 4 | -------------------------------------------------------------------------------- /test/oldstyle.webloc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterc/webloc/HEAD/test/oldstyle.webloc -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source "https://rubygems.org" 2 | 3 | # Specify your gem's dependencies in webloc.gemspec 4 | gemspec 5 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require 'bundler' 2 | Bundler::GemHelper.install_tasks 3 | 4 | require 'rake/testtask' 5 | Rake::TestTask.new(:test) do |test| 6 | test.libs << 'lib' << 'test' 7 | test.pattern = 'test/**/*_test.rb' 8 | test.verbose = true 9 | end 10 | 11 | task :default => :test -------------------------------------------------------------------------------- /test/pliststyle.webloc: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | URL 6 | https://github.com/peterc/webloc 7 | 8 | 9 | -------------------------------------------------------------------------------- /webloc.gemspec: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | $:.push File.expand_path("../lib", __FILE__) 3 | require "webloc/version" 4 | 5 | Gem::Specification.new do |s| 6 | s.name = "webloc" 7 | s.version = Webloc::VERSION 8 | s.platform = Gem::Platform::RUBY 9 | s.authors = ["Peter Cooper"] 10 | s.email = ["git@peterc.org"] 11 | s.homepage = "https://github.com/peterc/webloc" 12 | s.summary = %q{Reads and writes .webloc files on macOS} 13 | s.description = %q{Webloc reads and writes .webloc files on macOS} 14 | 15 | s.rubyforge_project = "webloc" 16 | 17 | s.files = `git ls-files`.split("\n") 18 | s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n") 19 | s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) } 20 | s.require_paths = ["lib"] 21 | 22 | s.add_dependency 'plist' 23 | end 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2011-2024 Peter Cooper 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # webloc 2 | 3 | *webloc* is a Ruby library that can read from and write to .webloc files as used on macOS. These are a variant of 'plist' format files, specifically used for storing links to URLs. 4 | 5 | It works on Ruby 2.7 and up, including Ruby 3.x, and supports URLs of up to 2048 characters in length (and probably longer, but this is around the de facto limit for URLs in most systems). 6 | 7 | ## Installation 8 | 9 | gem install webloc 10 | 11 | ## Usage 12 | 13 | ### Basic Usage 14 | 15 | Reading a .webloc file: 16 | 17 | webloc = Webloc.load('bookmark.webloc') 18 | puts webloc.url 19 | # => "https://example.com" 20 | 21 | Writing to a .webloc file: 22 | 23 | Webloc.new('https://rubyweekly.com/').save('rubyweekly.webloc') 24 | 25 | ### Advanced Examples 26 | 27 | #### Processing multiple .webloc files 28 | 29 | ```ruby 30 | require 'webloc' 31 | 32 | Dir.glob('*.webloc').each do |file| 33 | webloc = Webloc.load(file) 34 | puts "#{file}: #{webloc.url}" 35 | end 36 | ``` 37 | 38 | #### Creating webloc files from a list of URLs 39 | 40 | ```ruby 41 | require 'webloc' 42 | 43 | urls = [ 44 | 'https://github.com', 45 | 'https://stackoverflow.com', 46 | 'https://ruby-lang.org' 47 | ] 48 | 49 | urls.each_with_index do |url, index| 50 | filename = "bookmark_#{index + 1}.webloc" 51 | Webloc.new(url).save(filename) 52 | puts "Created #{filename}" 53 | end 54 | ``` 55 | 56 | #### Error handling 57 | 58 | ```ruby 59 | require 'webloc' 60 | 61 | begin 62 | webloc = Webloc.load('suspicious.webloc') 63 | puts webloc.url 64 | rescue Webloc::FileNotFoundError => e 65 | puts "File not found: #{e.message}" 66 | rescue Webloc::CorruptedFileError => e 67 | puts "File is corrupted: #{e.message}" 68 | rescue Webloc::InvalidFormatError => e 69 | puts "Invalid file format: #{e.message}" 70 | rescue Webloc::WeblocError => e 71 | puts "General webloc error: #{e.message}" 72 | end 73 | ``` 74 | 75 | #### Validating URLs before creating webloc files 76 | 77 | ```ruby 78 | require 'webloc' 79 | require 'uri' 80 | 81 | def create_webloc_safely(url, filename) 82 | # Basic URL validation 83 | uri = URI.parse(url) 84 | unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS) 85 | puts "Invalid URL scheme: #{url}" 86 | return false 87 | end 88 | 89 | # Create the webloc file 90 | Webloc.new(url).save(filename) 91 | puts "Created #{filename} for #{url}" 92 | true 93 | rescue URI::InvalidURIError 94 | puts "Invalid URL format: #{url}" 95 | false 96 | rescue Webloc::WeblocError => e 97 | puts "Failed to create webloc: #{e.message}" 98 | false 99 | end 100 | 101 | create_webloc_safely('https://example.com', 'example.webloc') 102 | create_webloc_safely('invalid-url', 'invalid.webloc') 103 | ``` 104 | 105 | #### Converting between formats 106 | 107 | ```ruby 108 | require 'webloc' 109 | require 'json' 110 | 111 | # Convert webloc to JSON 112 | webloc = Webloc.load('bookmark.webloc') 113 | json_data = { url: webloc.url, title: File.basename('bookmark.webloc', '.webloc') } 114 | File.write('bookmark.json', JSON.pretty_generate(json_data)) 115 | 116 | # Convert JSON back to webloc 117 | json_content = JSON.parse(File.read('bookmark.json')) 118 | Webloc.new(json_content['url']).save('restored.webloc') 119 | ``` 120 | 121 | ## Thanks 122 | 123 | Thanks is due to Christos Karaiskos for [this article](https://medium.com/@karaiskc/understanding-apples-binary-property-list-format-281e6da00dbd 124 | ) which helped me understand the plist format a bit more when fixing a bug in 2024. 125 | 126 | ## License 127 | 128 | Copyright (C) 2011-2025 Peter Cooper 129 | 130 | webloc is licensed under the terms of the MIT License -------------------------------------------------------------------------------- /test/webloc_test.rb: -------------------------------------------------------------------------------- 1 | require 'test/unit' 2 | require 'webloc' 3 | require 'tempfile' 4 | 5 | class WeblocTest < Test::Unit::TestCase 6 | def test_webloc_object_requires_url 7 | assert_raise(ArgumentError) { Webloc.new } 8 | end 9 | 10 | def test_webloc_object_rejects_nil_url 11 | error = assert_raise(ArgumentError) { Webloc.new(nil) } 12 | assert_equal "URL cannot be nil or empty", error.message 13 | end 14 | 15 | def test_webloc_object_rejects_empty_url 16 | error = assert_raise(ArgumentError) { Webloc.new("") } 17 | assert_equal "URL cannot be nil or empty", error.message 18 | end 19 | 20 | def test_webloc_object_created_with_url 21 | assert_equal 'http://example.com', Webloc.new('http://example.com').url 22 | end 23 | 24 | def test_webloc_object_loaded_from_old_style_file 25 | assert_equal 'https://github.com/peterc/webloc', Webloc.load(File.dirname(__FILE__) + '/oldstyle.webloc').url 26 | end 27 | 28 | def test_webloc_object_loaded_from_plist_file 29 | assert_equal 'https://github.com/peterc/webloc', Webloc.load(File.dirname(__FILE__) + '/pliststyle.webloc').url 30 | end 31 | 32 | def test_webloc_generates_valid_data 33 | data = File.read(File.dirname(__FILE__) + '/oldstyle.webloc').b 34 | assert_equal data, Webloc.new('https://github.com/peterc/webloc').data 35 | end 36 | 37 | def test_webloc_can_handle_long_urls 38 | url = "http://example.com/this-is-a-very-long-url-abcde" + ('a' * 2000) 39 | assert_nothing_raised { Webloc.new(url).data } 40 | file = Tempfile.new('test-long-webloc') 41 | begin 42 | Webloc.new(url).save(file.path) 43 | assert_equal url, Webloc.load(file.path).url 44 | ensure 45 | file.close 46 | file.unlink 47 | end 48 | end 49 | 50 | def test_webloc_can_write_file 51 | file = Tempfile.new('test-webloc') 52 | begin 53 | Webloc.new('https://github.com/peterc/webloc').save(file.path) 54 | assert_equal Webloc.new('https://github.com/peterc/webloc').data, File.read(file.path).b 55 | ensure 56 | file.close 57 | file.unlink 58 | end 59 | end 60 | 61 | def test_load_nonexistent_file_raises_file_not_found_error 62 | error = assert_raise(Webloc::FileNotFoundError) { Webloc.load('nonexistent.webloc') } 63 | assert_match(/File not found: nonexistent\.webloc/, error.message) 64 | end 65 | 66 | def test_load_empty_file_raises_empty_file_error 67 | file = Tempfile.new('empty-webloc') 68 | begin 69 | file.close 70 | error = assert_raise(Webloc::EmptyFileError) { Webloc.load(file.path) } 71 | assert_match(/File is empty:/, error.message) 72 | ensure 73 | file.unlink 74 | end 75 | end 76 | 77 | def test_load_corrupted_binary_file_raises_invalid_format_error 78 | file = Tempfile.new('corrupted-webloc') 79 | begin 80 | file.write("corrupted binary data without SURL marker") 81 | file.close 82 | error = assert_raise(Webloc::InvalidFormatError) { Webloc.load(file.path) } 83 | assert_match(/Invalid binary webloc format - missing SURL marker/, error.message) 84 | ensure 85 | file.unlink 86 | end 87 | end 88 | 89 | def test_load_invalid_xml_file_raises_invalid_format_error 90 | file = Tempfile.new('invalid-xml-webloc') 91 | begin 92 | file.write("xml") 93 | file.close 94 | error = assert_raise(Webloc::InvalidFormatError) { Webloc.load(file.path) } 95 | assert_match(/Invalid XML plist format/, error.message) 96 | ensure 97 | file.unlink 98 | end 99 | end 100 | 101 | def test_load_xml_without_url_key_raises_invalid_format_error 102 | file = Tempfile.new('no-url-xml-webloc') 103 | begin 104 | file.write('NotURLvalue') 105 | file.close 106 | error = assert_raise(Webloc::InvalidFormatError) { Webloc.load(file.path) } 107 | assert_match(/No 'URL' key found in plist file/, error.message) 108 | ensure 109 | file.unlink 110 | end 111 | end 112 | 113 | def test_save_with_nil_filename_raises_argument_error 114 | webloc = Webloc.new('http://example.com') 115 | error = assert_raise(ArgumentError) { webloc.save(nil) } 116 | assert_equal "Filename cannot be nil or empty", error.message 117 | end 118 | 119 | def test_save_with_empty_filename_raises_argument_error 120 | webloc = Webloc.new('http://example.com') 121 | error = assert_raise(ArgumentError) { webloc.save("") } 122 | assert_equal "Filename cannot be nil or empty", error.message 123 | end 124 | 125 | def test_save_to_invalid_path_raises_webloc_error 126 | webloc = Webloc.new('http://example.com') 127 | error = assert_raise(Webloc::WeblocError) { webloc.save('/invalid/path/that/does/not/exist/file.webloc') } 128 | assert_match(/Failed to save webloc file/, error.message) 129 | end 130 | end -------------------------------------------------------------------------------- /lib/webloc.rb: -------------------------------------------------------------------------------- 1 | require 'plist' 2 | 3 | class Webloc 4 | class WeblocError < StandardError; end 5 | class FileNotFoundError < WeblocError; end 6 | class CorruptedFileError < WeblocError; end 7 | class InvalidFormatError < WeblocError; end 8 | class EmptyFileError < WeblocError; end 9 | 10 | attr_accessor :url 11 | 12 | def initialize(url) 13 | raise ArgumentError, "URL cannot be nil or empty" if url.nil? || url.empty? 14 | @url = url 15 | end 16 | 17 | def self.load(filename) 18 | raise FileNotFoundError, "File not found: #{filename}" unless File.exist?(filename) 19 | 20 | begin 21 | data = File.read(filename) 22 | rescue => e 23 | raise FileNotFoundError, "Unable to read file '#{filename}': #{e.message}" 24 | end 25 | 26 | raise EmptyFileError, "File is empty: #{filename}" if data.empty? 27 | 28 | data = data.force_encoding('binary') rescue data 29 | url = nil 30 | 31 | if data !~ /\')[0] 58 | else 59 | raise InvalidFormatError, "Unsupported length encoding in binary webloc file: #{filename}" 60 | end 61 | 62 | raise CorruptedFileError, "Invalid URL length (#{length}) in file: #{filename}" if length <= 0 || length > data.length 63 | 64 | url = data[offset + length_offset, length] 65 | raise CorruptedFileError, "Extracted URL is empty from file: #{filename}" if url.nil? || url.empty? 66 | 67 | url 68 | rescue CorruptedFileError, InvalidFormatError => e 69 | raise e 70 | rescue => e 71 | raise CorruptedFileError, "Failed to parse binary webloc format in file '#{filename}': #{e.message}" 72 | end 73 | end 74 | 75 | def self.parse_xml_format(filename) 76 | begin 77 | plist_data = Plist::parse_xml(filename) 78 | raise InvalidFormatError, "Invalid XML plist format - could not parse file: #{filename}" unless plist_data.is_a?(Hash) 79 | 80 | url = plist_data['URL'] 81 | raise CorruptedFileError, "No 'URL' key found in plist file: #{filename}" unless url 82 | 83 | url 84 | rescue => e 85 | if e.message.include?('parse') || e.message.include?('XML') || e.message.include?('plist') 86 | raise InvalidFormatError, "Invalid XML plist format in file '#{filename}': #{e.message}" 87 | else 88 | raise CorruptedFileError, "Failed to parse XML webloc format in file '#{filename}': #{e.message}" 89 | end 90 | end 91 | end 92 | 93 | public 94 | 95 | def data 96 | # PLIST HEADER 97 | @data = "bplist\x30\x30".bytes 98 | 99 | # PLIST OBJECT TABLE 100 | @data += "\xD1\x01\x02".bytes # object 1 is a dictionary 101 | @data += "SURL".bytes # object 2 102 | 103 | length_suffix = @url.length > 255 ? "\x11" : "\x10" 104 | @data += ("\x5f" + length_suffix).bytes # object 3 is an ASCII string with a variable length length encoding (I know..) 105 | # .. the '0' in \x10 denotes the length can be encoded within 2**0 bytes (i.e. 1) 106 | # .. the '1' in \x11 denotes the length can be encoded within 2**1 bytes (i.e. 2) 107 | 108 | if @url.length > 255 109 | @data += [@url.length].pack('S>').bytes 110 | else 111 | @data += [@url.length].pack('C').bytes 112 | end 113 | @data += @url.bytes # and finally the URL itself 114 | 115 | # This is the offset table 116 | @data += "\x08\x0B\x0F".bytes # so objects at 0x08, 0x0b and 0x0f 117 | 118 | # PLIST TRAILER 119 | # Bytes 0-4 are unused 120 | @data += "\x00\x00\x00\x00\x00".bytes 121 | # Byte 5 is the sort version 122 | @data += "\x00".bytes 123 | # Byte 6 is how many bytes are needed for each offset table offset 124 | @data += "\x01".bytes 125 | @data += "\x01".bytes 126 | # Bytes 8-15 are how many objects are contained in the plist 127 | @data += "\x00\x00\x00\x00\x00\x00\x00\x03".bytes 128 | # Bytes 16-23 are for an offset from the offset table 129 | @data += "\x00\x00\x00\x00\x00\x00\x00\x00".bytes 130 | # Bytes 24-31 denote the position of the offset table from the start of the file 131 | @data += "\x00\x00\x00\x00\x00\x00".bytes + [@url.length + 18].pack('S>').bytes 132 | 133 | @data = @data.pack('C*') 134 | end 135 | 136 | def save(filename) 137 | raise ArgumentError, "Filename cannot be nil or empty" if filename.nil? || filename.empty? 138 | 139 | begin 140 | File.open(filename, 'wb') { |f| f.write data } 141 | rescue => e 142 | raise WeblocError, "Failed to save webloc file '#{filename}': #{e.message}" 143 | end 144 | end 145 | end --------------------------------------------------------------------------------