├── pakman ├── test │ ├── pages │ │ ├── empty.txt │ │ ├── text.txt │ │ ├── page2.txt │ │ ├── page3.txt │ │ └── page1.txt │ ├── liquid │ │ ├── pak │ │ │ ├── hello.txt │ │ │ ├── test.txt │ │ │ ├── s9logo.png │ │ │ ├── hello.doc │ │ │ ├── testbin.txt │ │ │ └── test.html │ │ └── test.html │ ├── erb │ │ └── pak │ │ │ ├── test.txt │ │ │ └── test.html.erb │ ├── helper.rb │ ├── data │ │ └── test.yml │ ├── test_page.rb │ ├── test_erb.rb │ ├── test_liquid_drops.rb │ ├── test_liquid_binaries.rb │ └── test_liquid.rb ├── History.md ├── lib │ ├── pakman │ │ ├── version.rb │ │ ├── utils.rb │ │ ├── erb │ │ │ ├── template.rb │ │ │ └── templater.rb │ │ ├── cli │ │ │ ├── commands │ │ │ │ ├── fetch.rb │ │ │ │ ├── list.rb │ │ │ │ └── gen.rb │ │ │ ├── ctx.rb │ │ │ ├── helpers.rb │ │ │ ├── opts.rb │ │ │ └── runner.rb │ │ ├── copier.rb │ │ ├── finder.rb │ │ ├── page.rb │ │ ├── liquid │ │ │ ├── template.rb │ │ │ └── templater.rb │ │ ├── manifest.rb │ │ └── fetcher.rb │ └── pakman.rb ├── .gitignore ├── bin │ └── pakman ├── TODOS.md ├── Rakefile ├── Manifest.txt └── README.md ├── linkto ├── NOTES.md ├── HISTORY.md ├── lib │ ├── linkto │ │ ├── version.rb │ │ ├── bing.rb │ │ ├── untappd.rb │ │ ├── wikipedia.rb │ │ ├── google.rb │ │ └── flickr.rb │ └── linkto.rb ├── .gitignore ├── Manifest.txt ├── test │ ├── test_wikipedia.rb │ ├── helper.rb │ └── test_google.rb ├── Rakefile └── README.md ├── textutils-more ├── README.md ├── .gitignore └── lib │ └── textutils │ ├── reader │ └── markdown_reader.rb │ └── table │ └── table_reader.rb ├── textutils ├── HISTORY.md ├── TODO.md ├── test │ ├── data │ │ ├── de-deutschland │ │ │ ├── orte.txt │ │ │ └── 3--by-bayern │ │ │ │ └── 4--oberfranken │ │ │ │ ├── orte_ii.txt │ │ │ │ └── orte.txt │ │ ├── feedburner.txt │ │ └── cl_all.txt │ ├── helper.rb │ ├── test_tree_reader_ii.rb │ ├── test_unicode_helper.rb │ ├── test_fixture_reader.rb │ ├── test_taglist.rb │ ├── test_tree_reader.rb │ ├── test_block_reader.rb │ ├── test_title_mapper2.rb │ ├── test_slugify.rb │ ├── test_asciify.rb │ ├── test_title_mapper.rb │ ├── test_title_finder.rb │ ├── test_title_helper.rb │ ├── test_address_helper.rb │ └── test_hypertext_helper.rb ├── lib │ ├── textutils │ │ ├── filter │ │ │ ├── erb_filter.rb │ │ │ ├── code_filter.rb │ │ │ ├── comment_filter.rb │ │ │ └── erb_django_filter.rb │ │ ├── core_ext │ │ │ ├── time.rb │ │ │ ├── file.rb │ │ │ └── array.rb │ │ ├── version.rb │ │ ├── helper │ │ │ ├── xml_helper.rb │ │ │ ├── tag_helper.rb │ │ │ ├── unicode_helper.rb │ │ │ ├── date_helper.rb │ │ │ ├── value_helper_iii_numbers.rb │ │ │ ├── value_helper_ii.rb │ │ │ ├── value_helper_i.rb │ │ │ ├── title_helper.rb │ │ │ ├── address_helper.rb │ │ │ └── hypertext_helper.rb │ │ ├── reader │ │ │ ├── code_reader.rb │ │ │ ├── block_reader.rb │ │ │ ├── line_reader.rb │ │ │ ├── fixture_reader.rb │ │ │ ├── tree_reader.rb │ │ │ └── hash_reader.rb │ │ ├── utils.rb │ │ ├── sanitizier.rb │ │ ├── parser │ │ │ ├── name_tokenizer.rb │ │ │ └── name_parser.rb │ │ ├── patterns.rb │ │ ├── classifier.rb │ │ ├── title_mapper.rb │ │ ├── page.rb │ │ ├── title_mapper2.rb │ │ └── title.rb │ └── textutils.rb ├── .gitignore ├── Rakefile ├── Manifest.txt └── README.md ├── README.md ├── attic ├── fixture_reader.rb ├── line_reader_v2.rb ├── values_reader_v2.rb ├── hash_reader_v2.rb └── values_reader.rb └── NOTES.md /pakman/test/pages/empty.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /linkto/NOTES.md: -------------------------------------------------------------------------------- 1 | # Notes n Tips 2 | 3 | -------------------------------------------------------------------------------- /textutils-more/README.md: -------------------------------------------------------------------------------- 1 | # textutils-more 2 | 3 | 4 | -------------------------------------------------------------------------------- /pakman/test/pages/text.txt: -------------------------------------------------------------------------------- 1 | just some text 2 | no headers 3 | 4 | -------------------------------------------------------------------------------- /pakman/test/liquid/pak/hello.txt: -------------------------------------------------------------------------------- 1 | 2 | just some text 3 | no front matter 4 | -------------------------------------------------------------------------------- /pakman/test/pages/page2.txt: -------------------------------------------------------------------------------- 1 | --- 2 | --- 3 | 4 | try empty front matter 5 | 6 | -------------------------------------------------------------------------------- /pakman/test/pages/page3.txt: -------------------------------------------------------------------------------- 1 | --- 2 | # try empty front matter with comments 3 | --- -------------------------------------------------------------------------------- /pakman/History.md: -------------------------------------------------------------------------------- 1 | ## 0.0.1 / 2012-07-17 2 | 3 | * Everything is new. First release 4 | -------------------------------------------------------------------------------- /pakman/test/pages/page1.txt: -------------------------------------------------------------------------------- 1 | --- 2 | title: hello 3 | --- 4 | 5 | some text here 6 | -------------------------------------------------------------------------------- /textutils/HISTORY.md: -------------------------------------------------------------------------------- 1 | ### 0.1.0 / 2012-06-09 2 | 3 | * Everything is new. First release -------------------------------------------------------------------------------- /linkto/HISTORY.md: -------------------------------------------------------------------------------- 1 | ### 0.0.1 / 2014-03-15 2 | 3 | * Everything is new. First release. 4 | 5 | -------------------------------------------------------------------------------- /linkto/lib/linkto/version.rb: -------------------------------------------------------------------------------- 1 | 2 | module Linkto 3 | VERSION = '0.1.1' 4 | end 5 | 6 | 7 | -------------------------------------------------------------------------------- /pakman/test/erb/pak/test.txt: -------------------------------------------------------------------------------- 1 | ###### 2 | # simple test manifest 3 | 4 | __file__.html test.html.erb -------------------------------------------------------------------------------- /pakman/test/liquid/pak/test.txt: -------------------------------------------------------------------------------- 1 | ###### 2 | # simple test manifest 3 | 4 | __file__.html test.html 5 | -------------------------------------------------------------------------------- /pakman/lib/pakman/version.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module Pakman 4 | VERSION = '1.1.0' 5 | end 6 | -------------------------------------------------------------------------------- /pakman/test/liquid/pak/s9logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rubycocos/text/master/pakman/test/liquid/pak/s9logo.png -------------------------------------------------------------------------------- /pakman/.gitignore: -------------------------------------------------------------------------------- 1 | # ignore generated folders 2 | pkg/ 3 | doc/ 4 | tmp/ 5 | 6 | # ignore jekyll generated output 7 | site/_site/ 8 | 9 | -------------------------------------------------------------------------------- /pakman/test/helper.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # minitest setup 4 | require 'minitest/autorun' 5 | 6 | 7 | ## our own code 8 | require 'pakman' 9 | 10 | -------------------------------------------------------------------------------- /textutils/TODO.md: -------------------------------------------------------------------------------- 1 | # TODOs 2 | 3 | - [ ] add line number to unicode dash warning e.g. *** warning: found ndash U+2013 (-) in file >at-austria/2013_14/cup.txt<; converting to plain ascii hyphen_minus (-) 4 | 5 | -------------------------------------------------------------------------------- /pakman/test/liquid/pak/hello.doc: -------------------------------------------------------------------------------- 1 | --- 2 | front matter here 3 | --- 4 | 5 | try "unkown extension" 6 | just some text here 7 | 8 | note: front matter will not matter, that is, will get ignored (e.g. not checked) 9 | -------------------------------------------------------------------------------- /pakman/test/liquid/pak/testbin.txt: -------------------------------------------------------------------------------- 1 | ###### 2 | # test manifest with binary files e.g. graphics 3 | # and "unknown" extensions (will get handled like binary e.g. copied 1:1) 4 | 5 | s9logo.png 6 | hello.txt 7 | hello.doc 8 | -------------------------------------------------------------------------------- /linkto/.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | *.rbc 3 | .bundle 4 | .config 5 | coverage 6 | InstalledFiles 7 | lib/bundler/man 8 | pkg 9 | rdoc 10 | spec/reports 11 | test/tmp 12 | test/version_tmp 13 | tmp 14 | 15 | # YARD artifacts 16 | .yardoc 17 | _yardoc 18 | doc/ 19 | -------------------------------------------------------------------------------- /pakman/test/data/test.yml: -------------------------------------------------------------------------------- 1 | headers: 2 | title: test title 3 | author: test author 4 | 5 | 6 | slides: 7 | - header: test header 1 8 | content: test content 1 9 | - header: test header 2 10 | content: test content 2 11 | - content: test content 3 12 | 13 | -------------------------------------------------------------------------------- /textutils/test/data/de-deutschland/orte.txt: -------------------------------------------------------------------------------- 1 | 2 Bayern 2 | 24 .. Oberfranken 3 | 241 .... Bamberg (Stadt) ## Kreisfreie Stadt 4 | ...... Bamberg 5 | ........ Bamberg 6 | 7 | ##### 8 | # todo: for testing add berlin and some more 9 | 10 | 9 Berlin 11 | 91 .. Berlin 12 | 13 | -------------------------------------------------------------------------------- /pakman/bin/pakman: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | ################### 4 | # == DEV TIPS: 5 | # 6 | # For local testing run like: 7 | # 8 | # ruby -Ilib bin/pakman 9 | # 10 | # Set the executable bit in Linux. Example: 11 | # 12 | # % chmod a+x bin/pakman 13 | # 14 | 15 | require 'pakman' 16 | 17 | Pakman.main 18 | -------------------------------------------------------------------------------- /linkto/Manifest.txt: -------------------------------------------------------------------------------- 1 | HISTORY.md 2 | Manifest.txt 3 | README.md 4 | Rakefile 5 | lib/linkto.rb 6 | lib/linkto/bing.rb 7 | lib/linkto/flickr.rb 8 | lib/linkto/google.rb 9 | lib/linkto/untappd.rb 10 | lib/linkto/version.rb 11 | lib/linkto/wikipedia.rb 12 | test/helper.rb 13 | test/test_google.rb 14 | test/test_wikipedia.rb 15 | -------------------------------------------------------------------------------- /textutils/test/helper.rb: -------------------------------------------------------------------------------- 1 | 2 | ## $:.unshift(File.dirname(__FILE__)) 3 | 4 | ## minitest setup 5 | 6 | require 'minitest/autorun' 7 | 8 | 9 | ## make sure activesupport gets included/required 10 | # note: just activesupport or active_support will NOT work 11 | # require 'active_support/all' # -- now included in textutils itself 12 | 13 | 14 | ## our own code 15 | 16 | require 'textutils' 17 | -------------------------------------------------------------------------------- /pakman/lib/pakman/utils.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module Pakman 4 | 5 | 6 | # downcase and remove .txt (if anywhere in name) 7 | # e.g. welcome.quick.txt becomes welcome.quick 8 | # welcome.txt.quick becomse welcome.quick 9 | # s6blank.txt becomes s6blank 10 | 11 | def self.pakname_from_file( path ) 12 | File.basename( path ).downcase.gsub( '.txt', '' ) 13 | end 14 | 15 | end # class Pakman 16 | -------------------------------------------------------------------------------- /textutils/lib/textutils/filter/erb_filter.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module TextUtils 4 | module Filter 5 | 6 | # allow plugins/helpers; process source (including header) using erb 7 | def erb( content, options={} ) 8 | puts " Running embedded Ruby (erb) code/helpers..." 9 | 10 | content = ERB.new( content ).result( binding() ) 11 | content 12 | end 13 | 14 | end # module Filter 15 | end # module TextUtils -------------------------------------------------------------------------------- /textutils/test/data/feedburner.txt: -------------------------------------------------------------------------------- 1 | #################################### 2 | # feedburner text pattern (regex) 3 | # 4 | # pattern (regex) 5 | # --- 6 | # test1 7 | # --- 8 | # test2 9 | # --- 10 | # etc. 11 | 12 | 13 | ]*? 14 | src=("|')(:?http:)?//feeds\.feedburner\.com/~r/[^>]+?\1 15 | .*?> 16 | 17 | --- 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /linkto/lib/linkto/bing.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module Linkto 4 | module BingHelper 5 | 6 | 7 | def link_to_bing_search_images( q, opts={} ) 8 | link_to q, "http://www.bing.com/images/search?q=#{q}", opts 9 | end 10 | 11 | ############################ 12 | # shortcuts / aliases 13 | 14 | def bing_search_images( q, opts={} ) link_to_bing_search_images( q, opts) end 15 | 16 | 17 | end # module BingHelper 18 | end # module Linkto 19 | -------------------------------------------------------------------------------- /linkto/lib/linkto/untappd.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module Linkto 4 | module UntappdHelper 5 | 6 | 7 | def link_to_untappd_search( q, opts={} ) 8 | link_to q, "https://untappd.com/search?q=#{q}", opts 9 | end 10 | 11 | 12 | ############################### 13 | # shortcuts / aliases 14 | 15 | def untappd_search( q, opts={} ) link_to_untappd_search( q, opts ) end 16 | 17 | 18 | end # module UntappdHelper 19 | end # module Linkto 20 | -------------------------------------------------------------------------------- /textutils/test/data/cl_all.txt: -------------------------------------------------------------------------------- 1 | ##################################### 2 | # test data for fixture reader 3 | 4 | 5 | # -- leagues 6 | 7 | europe-champions-league!/leagues 8 | 9 | # -- 2011_12 10 | 11 | europe-champions-league!/2011_12/cl 12 | europe-champions-league!/2011_12/el 13 | 14 | # -- 2012_13 15 | 16 | europe-champions-league!/2012_13/cl 17 | europe-champions-league!/2012_13/el 18 | 19 | # -- 2013_14 20 | 21 | europe-champions-league!/2013_14/cl 22 | 23 | -------------------------------------------------------------------------------- /pakman/TODOS.md: -------------------------------------------------------------------------------- 1 | # Todos 2 | 3 | - [ ] check file for front matter; use more "efficient" way 4 | 5 | e.g. do NOT load complete file; just a look-a-head; 6 | try to make it work for binary file too? why? why not? 7 | check how jekyll checks for front matter; does jekyll also 8 | check binary files? does the file extension matter (e.g. png, gif, html, css, etc)?? 9 | 10 | 11 | ## robots.txt 12 | 13 | - [] see osm blogs templates; uses robots.txt template - do NOT use as manifest; add to exclude list !!!! 14 | -------------------------------------------------------------------------------- /textutils/test/test_tree_reader_ii.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | ### 4 | # to run use 5 | # ruby -I ./lib -I ./test test/test_tree_reader_ii.rb 6 | 7 | 8 | require 'helper' 9 | 10 | class TestTreeReaderIi < MiniTest::Test 11 | 12 | def test_at_n 13 | reader = TreeReader.from_file( "#{TextUtils.root}/test/data/at-austria/1--n-niederoesterreich/orte.txt" ) 14 | 15 | reader.check 16 | 17 | assert true ## assume everything ok if we get here 18 | end 19 | 20 | end # class TestTreeReaderIi 21 | -------------------------------------------------------------------------------- /linkto/test/test_wikipedia.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | 4 | require 'helper' 5 | 6 | 7 | class TestWikipedia < MiniTest::Unit::TestCase 8 | 9 | include LinktoHelper 10 | 11 | def test_search 12 | 13 | assert_equal "ottakringer", wikipedia_search( 'ottakringer' ) 14 | assert_equal "ottakringer", wikipedia_de_search( 'ottakringer' ) 15 | 16 | end 17 | 18 | end # class TestWikipedia 19 | -------------------------------------------------------------------------------- /pakman/test/liquid/test.html: -------------------------------------------------------------------------------- 1 | 2 |

Headers

3 | 4 | 11 | 12 | 13 |

Slides

14 | 15 | {% for slide in slides %} 16 |
{{ slide['content'] }}
17 |
{{ slide['header'] }}
18 | 19 |
{{ slide.content }}
20 |
{{ slide.header }}
21 | {% endfor %} 22 | -------------------------------------------------------------------------------- /pakman/test/erb/pak/test.html.erb: -------------------------------------------------------------------------------- 1 | 2 | 3 | pakman Test Template 4 | 5 | 6 | 7 |

Hello pakman

8 | 9 |

Headers

10 | 11 | 15 | 16 |

Slides

17 | 18 | <% slides.each do |slide| %> 19 |
<%= slide['content'] %>
20 |
<%= slide['header'] %>
21 | <% end %> 22 | 23 | 24 | -------------------------------------------------------------------------------- /pakman/test/liquid/pak/test.html: -------------------------------------------------------------------------------- 1 | --- 2 | --- 3 | 4 | 5 | 6 | pakman Test Template 7 | 8 | 9 | 10 |

Hello pakman

11 | 12 |

Headers

13 | 14 | 18 | 19 |

Slides

20 | 21 | {% for slide in slides %} 22 |
{{ slide.content }}
23 |
{{ slide.header }}
24 | {% endfor %} 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /textutils/lib/textutils/core_ext/time.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | 4 | class Time 5 | 6 | def self.cet( str ) # central european time (cet) + central european summer time (cest) 7 | ActiveSupport::TimeZone['Vienna'].parse( str ) 8 | end 9 | 10 | def self.eet( str ) # eastern european time (eet) + 2 hours 11 | ActiveSupport::TimeZone['Bucharest'].parse( str ) 12 | end 13 | 14 | def self.cst( str ) # central standard time (cst) - 6 hours 15 | ActiveSupport::TimeZone['Mexico City'].parse( str ) 16 | end 17 | 18 | end # class Time 19 | 20 | -------------------------------------------------------------------------------- /textutils/test/test_unicode_helper.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | ### 4 | # to run use 5 | # ruby -I ./lib -I ./test test/test_helper.rb 6 | # or better 7 | # rake test 8 | 9 | require 'helper' 10 | 11 | class TestUnicodeHelper < Minitest::Test 12 | 13 | def test_convert_unicode_dashes 14 | 15 | txt_in = "\u2010 \u2011 \u2212 \u2013 \u2014" # NB: unicode chars require double quoted strings 16 | txt_out = '- - - - -' 17 | 18 | assert_equal txt_out, TextUtils.convert_unicode_dashes_to_plain_ascii( txt_in ) 19 | end 20 | 21 | end # class TestUnicodeHelper -------------------------------------------------------------------------------- /textutils/lib/textutils/version.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module TextUtils 4 | 5 | MAJOR = 1 ## todo: namespace inside version or something - why? why not?? 6 | MINOR = 4 7 | PATCH = 0 8 | VERSION = [MAJOR,MINOR,PATCH].join('.') 9 | 10 | def self.version 11 | VERSION 12 | end 13 | 14 | def self.banner 15 | "textutils/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]" 16 | end 17 | 18 | def self.root 19 | "#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}" 20 | end 21 | 22 | end # module TextUtils 23 | 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # text tools, libraries & scripts 2 | 3 | Gems: 4 | 5 | - [**textutils**](textutils) - text filters, helpers, readers and more 6 | - [textutils-more](textutils-more) 7 | 8 | 9 | 10 | - [linkto](linkto) - link_to helpers for google search, bing search, flickr photo search, flickr photo tag, etc. 11 | 12 | 13 | 14 | 15 | - [pakman](pakman) - template pack manager (incl. embedded ruby, liquid, etc.) 16 | 17 | 18 | 19 | 20 | ## License 21 | 22 | The scripts are dedicated to the public domain. 23 | Use it as you please with no restrictions whatsoever. 24 | -------------------------------------------------------------------------------- /linkto/lib/linkto/wikipedia.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module Linkto 4 | module WikipediaHelper 5 | 6 | 7 | def link_to_wikipedia_search( q, opts={} ) 8 | link_to q, "http://en.wikipedia.org/?search=#{q}", opts 9 | end 10 | 11 | def link_to_wikipedia_de_search( q, opts={} ) 12 | link_to q, "http://de.wikipedia.org/?search=#{q}", opts 13 | end 14 | 15 | 16 | ############################### 17 | # shortcuts / aliases 18 | 19 | def wikipedia_search( q, opts={} ) link_to_wikipedia_search( q, opts ) end 20 | def wikipedia_de_search( q, opts={} ) link_to_wikipedia_de_search( q, opts ) end 21 | 22 | 23 | end # module WikipediaHelper 24 | end # module Linkto 25 | -------------------------------------------------------------------------------- /textutils/test/data/de-deutschland/3--by-bayern/4--oberfranken/orte_ii.txt: -------------------------------------------------------------------------------- 1 | 2 Bayern 2 | 24 .. Oberfranken 3 | 241 .... Bamberg (Stadt) ## Kreisfreie Stadt 4 | ...... Bamberg 5 | ........ Bamberg 6 | 242 .... Bayreuth (Stadt) ## Kreisfreie Stadt 7 | ...... Bayreuth 8 | ........ Bayreuth 9 | 10 | 245 .... Bamberg (Land) ## Landkreis -- 36 Gemeinden; see de.wikipedia.org/wiki/Landkreis_Bamberg 11 | ## 4 Städte 12 | ...... Baunach ## (4013, 30,9 km²) 13 | ........ Baunach 14 | ...... Hallstadt ## (8364, 14,5 km²) 15 | ........ Hallstadt ## (7588) 16 | ........ Dörfleins ## (1380) 17 | 18 | -------------------------------------------------------------------------------- /linkto/test/helper.rb: -------------------------------------------------------------------------------- 1 | ## $:.unshift(File.dirname(__FILE__)) 2 | 3 | ## minitest setup 4 | 5 | # require 'minitest/unit' 6 | require 'minitest/autorun' 7 | 8 | # include MiniTest::Unit # lets us use TestCase instead of MiniTest::Unit::TestCase 9 | 10 | ## make sure activesupport gets included/required 11 | # note: just activesupport or active_support will NOT work 12 | # require 'active_support/all' 13 | 14 | ## our own code 15 | 16 | require 'linkto' 17 | 18 | 19 | ### simple link_to method 20 | # - no need to include UrlHelper from Rails 21 | 22 | def link_to( title, link, opts={} ) 23 | ### 24 | # fix: 25 | # opts get ignored for now!! 26 | 27 | "#{title}" 28 | end 29 | 30 | -------------------------------------------------------------------------------- /linkto/test/test_google.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | 4 | require 'helper' 5 | 6 | 7 | class TestGoogle < MiniTest::Unit::TestCase 8 | 9 | include LinktoHelper 10 | 11 | %i( google_search link_to_google_search ).each do |method| 12 | define_method("test #{method}") do 13 | assert_equal "open mundi", send(method, 'open mundi') 14 | end 15 | end 16 | 17 | %i( google_de_search link_to_google_de_search ).each do |method| 18 | define_method("test #{method}") do 19 | assert_equal "open mundi", send(method, 'open mundi') 20 | end 21 | end 22 | 23 | end # class TestGoogle 24 | -------------------------------------------------------------------------------- /textutils/.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | *.rbc 3 | /.config 4 | /coverage/ 5 | /InstalledFiles 6 | /pkg/ 7 | /spec/reports/ 8 | /test/tmp/ 9 | /test/version_tmp/ 10 | /tmp/ 11 | 12 | ## Specific to RubyMotion: 13 | .dat* 14 | .repl_history 15 | build/ 16 | 17 | ## Documentation cache and generated files: 18 | /.yardoc/ 19 | /_yardoc/ 20 | /doc/ 21 | /rdoc/ 22 | 23 | ## Environment normalisation: 24 | /.bundle/ 25 | /vendor/bundle 26 | /lib/bundler/man/ 27 | 28 | # for a library or gem, you might want to ignore these files since the code is 29 | # intended to run in multiple environments; otherwise, check them in: 30 | # Gemfile.lock 31 | # .ruby-version 32 | # .ruby-gemset 33 | 34 | # unless supporting rvm < 1.11.0 or doing something fancy, ignore this: 35 | .rvmrc 36 | 37 | 38 | -------------------------------------------------------------------------------- /textutils-more/.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | *.rbc 3 | /.config 4 | /coverage/ 5 | /InstalledFiles 6 | /pkg/ 7 | /spec/reports/ 8 | /test/tmp/ 9 | /test/version_tmp/ 10 | /tmp/ 11 | 12 | ## Specific to RubyMotion: 13 | .dat* 14 | .repl_history 15 | build/ 16 | 17 | ## Documentation cache and generated files: 18 | /.yardoc/ 19 | /_yardoc/ 20 | /doc/ 21 | /rdoc/ 22 | 23 | ## Environment normalisation: 24 | /.bundle/ 25 | /vendor/bundle 26 | /lib/bundler/man/ 27 | 28 | # for a library or gem, you might want to ignore these files since the code is 29 | # intended to run in multiple environments; otherwise, check them in: 30 | # Gemfile.lock 31 | # .ruby-version 32 | # .ruby-gemset 33 | 34 | # unless supporting rvm < 1.11.0 or doing something fancy, ignore this: 35 | .rvmrc 36 | 37 | 38 | -------------------------------------------------------------------------------- /textutils/lib/textutils/core_ext/file.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | class File 4 | def self.read_utf8( path ) 5 | text = open( path, 'r:bom|utf-8' ) do |file| 6 | file.read 7 | end 8 | 9 | ## 10 | ## todo: make normalize newlines into a filter (for easy (re)use) 11 | 12 | ## normalize newlines 13 | ## always use LF \n (Unix): 14 | ## 15 | ## convert CR/LF \r\n (Windows) => \n 16 | ## convert CR \r (old? Mac) => \n -- still in use? 17 | text = text.gsub( /\r\n|\r/, "\n" ) 18 | 19 | # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus 20 | text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path ) 21 | 22 | text 23 | end 24 | end # class File 25 | 26 | -------------------------------------------------------------------------------- /textutils/lib/textutils/helper/xml_helper.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module TextUtils 4 | module XmlHelper 5 | 6 | 7 | def prettify_xml( xml ) 8 | require 'rexml/document' 9 | 10 | begin 11 | d = REXML::Document.new( xml ) 12 | 13 | # d.write( pretty_xml="", 2 ) 14 | # pretty_xml # return prettified xml 15 | 16 | formatter = REXML::Formatters::Pretty.new( 2 ) # indent=2 17 | formatter.compact = true # This is the magic line that does what you need! 18 | pretty_xml = formatter.write( d.root, "" ) # todo/checl: what's 2nd arg used for ?? 19 | pretty_xml 20 | rescue Exception => ex 21 | "warn: prettify_xml failed: #{ex}\n\n\n" + xml 22 | end 23 | end 24 | 25 | 26 | end # module XmlHelper 27 | end # module TextUtils 28 | -------------------------------------------------------------------------------- /linkto/Rakefile: -------------------------------------------------------------------------------- 1 | require 'hoe' 2 | require './lib/linkto/version.rb' 3 | 4 | Hoe.spec 'linkto' do 5 | 6 | self.version = Linkto::VERSION 7 | 8 | self.summary = 'linkto - link_to helpers for google search, bing search, flickr photo search, flickr photo tag, etc.' 9 | self.description = summary 10 | 11 | self.urls = ['https://github.com/rubylibs/linkto'] 12 | 13 | self.author = 'Gerald Bauer' 14 | self.email = 'webslideshow@googlegroups.com' 15 | 16 | # switch extension to .markdown for gihub formatting 17 | self.readme_file = 'README.md' 18 | self.history_file = 'HISTORY.md' 19 | 20 | self.extra_deps = [ 21 | ['logutils' ] 22 | ] 23 | 24 | self.licenses = ['Public Domain'] 25 | 26 | self.spec_extras = { 27 | :required_ruby_version => '>= 1.9.2' 28 | } 29 | 30 | 31 | end 32 | -------------------------------------------------------------------------------- /pakman/lib/pakman/erb/template.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module Pakman 4 | 5 | class ErbTemplate 6 | 7 | def self.from_file( path ) 8 | ## todo/fix: update logutils - (auto-)add ("static") logger helper/mixin too!!!!! 9 | LogKernel::Logger[ self ].info " Loading template (from file) >#{path}<..." 10 | text = File.open( path, 'r:bom|utf-8' ).read ## note: assume utf8 11 | self.new( text, path: path ) ## note: pass along path as an option 12 | end 13 | 14 | def self.from_string( text ) ### use parse as alias - why?? why not?? 15 | self.new( text ) 16 | end 17 | 18 | def initialize( text, opts={} ) 19 | @template = ERB.new( text ) 20 | end 21 | 22 | def render( binding ) 23 | @template.result( binding ) 24 | end 25 | 26 | end # class ErbTemplate 27 | end # module Pakman 28 | -------------------------------------------------------------------------------- /textutils/test/test_fixture_reader.rb: -------------------------------------------------------------------------------- 1 | require 'helper' 2 | 3 | 4 | class TestFixtureReader < Minitest::Test 5 | 6 | def test_read 7 | path = "#{TextUtils.root}/test/data/cl_all.txt" 8 | puts "[TestFixtureReader.test_read] path: #{path}" 9 | 10 | reader = FixtureReader.from_file( path ) 11 | 12 | ary = [ 13 | 'europe-champions-league!/leagues', 14 | 'europe-champions-league!/2011_12/cl', 15 | 'europe-champions-league!/2011_12/el', 16 | 'europe-champions-league!/2012_13/cl', 17 | 'europe-champions-league!/2012_13/el', 18 | 'europe-champions-league!/2013_14/cl' ] 19 | 20 | i=0 21 | reader.each do |fx| 22 | assert_equal ary[i], fx 23 | i+=1 24 | end 25 | end # method test_read 26 | 27 | end # class TestFixtureReader 28 | 29 | -------------------------------------------------------------------------------- /linkto/lib/linkto/google.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module Linkto 4 | module GoogleHelper 5 | 6 | def link_to_google_search( q, opts={} ) 7 | link_to q, "https://www.google.com/search?q=#{q}", opts 8 | end 9 | 10 | def link_to_google_de_search( q, opts={} ) 11 | link_to q, "https://www.google.de/search?hl=de&q=#{q}", opts 12 | end 13 | 14 | 15 | def link_to_google_search_images( q, opts={} ) 16 | link_to q, "https://www.google.com/search?tbm=isch&q=#{q}", opts 17 | end 18 | 19 | 20 | ############################### 21 | # shortcuts / aliases 22 | 23 | alias_method :google_search, :link_to_google_search 24 | alias_method :google_de_search, :link_to_google_de_search 25 | alias_method :google_search_images, :link_to_google_search_images 26 | 27 | end # module GoogleHelper 28 | end # module Linkto 29 | -------------------------------------------------------------------------------- /pakman/lib/pakman/cli/commands/fetch.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module Pakman 4 | 5 | class Fetch 6 | 7 | include LogUtils::Logging 8 | 9 | def initialize( opts ) 10 | @opts = opts 11 | end 12 | 13 | attr_reader :opts 14 | 15 | def run 16 | logger.debug "fetch_uri: >#{opts.fetch_uri}<" 17 | src = opts.fetch_uri 18 | 19 | uri = URI.parse( src ) 20 | logger.debug "scheme: >#{uri.scheme}<, host: >#{uri.host}<, port: >#{uri.port}<, path: >#{uri.path}<" 21 | 22 | pakname = Pakman.pakname_from_file( uri.path ) 23 | logger.debug "pakname: >#{pakname}<" 24 | 25 | pakpath = File.expand_path( pakname, opts.config_path ) 26 | logger.debug "pakpath: >#{pakpath}<" 27 | 28 | Fetcher.new.fetch_pak( src, pakpath ) 29 | end # method run 30 | 31 | end # class Fetch 32 | end # module Pakman 33 | -------------------------------------------------------------------------------- /textutils-more/lib/textutils/reader/markdown_reader.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | 4 | ####################################################### 5 | ############# work in progress ####################### 6 | # 7 | # NOTE: do NOT include for now in packaged gem 8 | 9 | ####### 10 | ## read data records "encoded" in markdown / plain text 11 | ### 12 | 13 | class MarkdownReader 14 | 15 | include LogUtils::Logging 16 | 17 | def self.from_file( path ) 18 | text = 'to be done' 19 | self.from_string( text ) 20 | end 21 | 22 | def self.from_string( text ) 23 | MarkdownReader.new( text ) 24 | end 25 | 26 | def initialize( path, more_attribs={} ) 27 | @more_attribs = more_attribs 28 | @text = text 29 | ## to be done 30 | end 31 | 32 | ## to be done 33 | 34 | end # class MarkdownReader 35 | 36 | -------------------------------------------------------------------------------- /textutils/test/test_taglist.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | 4 | require 'helper' 5 | 6 | 7 | class TestTaglist < Minitest::Test 8 | 9 | include TextUtils::ValueHelper # lets us use is_taglist?, etc. 10 | 11 | def test_taglist_starting_w_digit 12 | ## for now - taglist cannot start w/ number 13 | assert is_taglist?( '20 ha' ) == false 14 | assert is_taglist?( '5000 hl' ) == false 15 | assert is_taglist?( '5_000 hl' ) == false 16 | end 17 | 18 | def test_taglist_upcase 19 | ## taglist cannot use upcase letters 20 | assert is_taglist?( 'ABC' ) == false 21 | end 22 | 23 | def test_taglist 24 | assert is_taglist?( 'a' ) 25 | assert is_taglist?( 'a|b|c' ) 26 | assert is_taglist?( 'a b c' ) 27 | assert is_taglist?( 'a_b_c' ) 28 | end 29 | 30 | 31 | end # class TestTaglist 32 | 33 | -------------------------------------------------------------------------------- /pakman/lib/pakman/cli/commands/list.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module Pakman 4 | 5 | class List 6 | 7 | include LogUtils::Logging 8 | 9 | include ManifestHelper 10 | 11 | def initialize( opts ) 12 | @opts = opts 13 | end 14 | 15 | attr_reader :opts 16 | 17 | def run 18 | manifests = installed_template_manifests 19 | 20 | puts 'Installed template packs in search path' 21 | 22 | installed_template_manifest_patterns.each_with_index do |pattern,i| 23 | puts " [#{i+1}] #{pattern}" 24 | end 25 | puts ' include:' 26 | 27 | if manifests.empty? 28 | puts " -- none --" 29 | else 30 | manifests.each do |manifest| 31 | puts "%16s (%s)" % [manifest[0].gsub('.txt',''), manifest[1]] 32 | end 33 | end 34 | end 35 | 36 | end # class List 37 | end # module Pakman 38 | -------------------------------------------------------------------------------- /pakman/Rakefile: -------------------------------------------------------------------------------- 1 | require 'hoe' 2 | require './lib/pakman/version.rb' 3 | 4 | Hoe.spec 'pakman' do 5 | 6 | self.version = Pakman::VERSION 7 | 8 | self.summary = 'pakman - Template Pack Manager (incl. Embedded Ruby, Liquid, etc.)' 9 | self.description = summary 10 | 11 | self.urls = ['https://github.com/rubylibs/pakman'] 12 | 13 | self.author = 'Gerald Bauer' 14 | self.email = 'wwwmake@googlegroups.com' 15 | 16 | self.extra_deps = [ 17 | ['fetcher', '>= 0.4.5'], 18 | ['logutils', '>= 0.6.1'], 19 | ['liquid', '>= 4.0.0'], 20 | ] 21 | 22 | # switch extension to .rdoc for gihub formatting 23 | # self.readme_file = 'README.md' 24 | # self.history_file = 'History.md' 25 | 26 | self.licenses = ['Public Domain'] 27 | 28 | self.spec_extras = { 29 | required_ruby_version: '>= 2.3' 30 | } 31 | 32 | end 33 | -------------------------------------------------------------------------------- /textutils/test/test_tree_reader.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | ### 4 | # to run use 5 | # ruby -I ./lib -I ./test test/test_tree_reader.rb 6 | 7 | 8 | require 'helper' 9 | 10 | class TestTreeReader < MiniTest::Test 11 | 12 | def test_oberfranken 13 | reader = TreeReader.from_file( "#{TextUtils.root}/test/data/de-deutschland/3--by-bayern/4--oberfranken/orte.txt" ) 14 | 15 | reader.each_line do |_| 16 | ## do nothing for now 17 | end 18 | 19 | assert true ## assume everything ok if we get here 20 | end 21 | 22 | def test_de 23 | reader = TreeReader.from_file( "#{TextUtils.root}/test/data/de-deutschland/orte.txt" ) 24 | 25 | reader.each_line do |_| 26 | ## do nothing for now 27 | end 28 | 29 | assert true ## assume everything ok if we get here 30 | end 31 | 32 | end # class TestTreeReader 33 | 34 | -------------------------------------------------------------------------------- /pakman/lib/pakman/cli/ctx.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module Pakman 4 | 5 | class Ctx # Context 6 | 7 | def initialize( hash ) 8 | @hash = hash 9 | end 10 | 11 | def ctx 12 | ### todo: check if method_missing works with binding in erb??? 13 | binding 14 | end 15 | 16 | def method_missing( mn, *args, &blk ) 17 | ## only allow read-only access (no arguments) 18 | if args.length > 0 # || mn[-1].chr == "=" 19 | return super # super( mn, *args, &blk ) 20 | end 21 | 22 | key = mn.to_s 23 | 24 | if @hash.has_key?( key ) 25 | puts "calling ctx.#{key}" 26 | value = @hash[ key ] 27 | puts " returning #{value.class.name}:" 28 | pp value 29 | value 30 | else 31 | puts "*** warning: ctx.#{key} missing" 32 | super 33 | end 34 | end 35 | 36 | end # class Ctx 37 | end # module Pakman 38 | -------------------------------------------------------------------------------- /textutils/test/test_block_reader.rb: -------------------------------------------------------------------------------- 1 | ### 2 | # to run use 3 | # ruby -I ./lib -I ./test test/test_block_reader.rb 4 | # or better 5 | # rake test 6 | 7 | require 'helper' 8 | 9 | 10 | class TestBlockReader < MiniTest::Test 11 | 12 | def test_feedburner 13 | blocks = BlockReader.from_file( "#{TextUtils.root}/test/data/feedburner.txt" ).read 14 | 15 | ## note: regex - use %q - do NOT escape \. or \1 etc. 16 | pattern = %q{]*?src=("|')(:?http:)?//feeds\.feedburner\.com/~r/[^>]+?\1.*?>} 17 | 18 | test1 = %q{} 19 | 20 | assert_equal 2, blocks.size 21 | assert_equal pattern, blocks[0].gsub( /[\n ]/, '' ) ## note: need to remove newlines and spaces 22 | assert_equal test1, blocks[1] 23 | end 24 | 25 | end # class TestBlockReader 26 | 27 | -------------------------------------------------------------------------------- /pakman/lib/pakman/cli/helpers.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module Pakman 4 | 5 | module ManifestHelper 6 | 7 | def installed_template_manifest_patterns 8 | # 1) search . # that is, working/current dir 9 | # 2) search 10 | # 3) search /templates 11 | 12 | builtin_patterns = [ 13 | "#{Pakman.root}/templates/*.txt" 14 | ] 15 | config_patterns = [ 16 | "#{File.expand_path(opts.config_path)}/*.txt", 17 | "#{File.expand_path(opts.config_path)}/*/*.txt" 18 | ] 19 | current_patterns = [ 20 | "*.txt", 21 | "*/*.txt" 22 | ] 23 | 24 | patterns = [] 25 | patterns += current_patterns 26 | patterns += config_patterns 27 | patterns += builtin_patterns 28 | end 29 | 30 | def installed_template_manifests 31 | excludes = [ 32 | "Manifest.txt", 33 | "*/Manifest.txt" 34 | ] 35 | 36 | Finder.new.find_manifests( installed_template_manifest_patterns, excludes ) 37 | end 38 | 39 | end # module ManifestHelper 40 | end # module Pakman 41 | -------------------------------------------------------------------------------- /linkto/lib/linkto/flickr.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module Linkto 4 | module FlickrHelper 5 | 6 | 7 | ##################### 8 | # browse tags 9 | 10 | def link_to_flickr_tags( tags, opts={} ) # fix: add alias for link_to_flickr_tag 11 | # e.g. use 12 | # ottakringer 13 | # ottakringer+beer -- use plus for multiple tags 14 | link_to tags, "http://www.flickr.com/photos/tags/#{tags}", opts 15 | end 16 | 17 | ######################### 18 | # search terms (q) 19 | 20 | def link_to_flickr_search( q, opts={} ) 21 | # e.g. use 22 | # ottakringer 23 | # ottakringer+beer -- note: + is url encoded for space e.g. equals ottakringer beer 24 | link_to q, "http://www.flickr.com/search/?q=#{q}", opts 25 | end 26 | 27 | ############################### 28 | # shortcuts / aliases 29 | 30 | def flickr_tags( tags, opts={} ) link_to_flickr_tags( tags, opts ) end 31 | def flickr_search( q, opts={} ) link_to_flickr_search( q, opts ) end 32 | 33 | 34 | end # module FlickrHelper 35 | end # module Linkto 36 | -------------------------------------------------------------------------------- /pakman/test/test_page.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | ### 4 | # to run use 5 | # ruby -I ./lib -I ./test test/test_page.rb 6 | 7 | 8 | require 'helper' 9 | 10 | 11 | class TestPage < MiniTest::Test 12 | 13 | def test_page1 14 | page = Pakman::Page.from_file( "#{Pakman.root}/test/pages/page1.txt" ) 15 | assert page.headers? 16 | end # method test_page1 17 | 18 | def test_page2 19 | page = Pakman::Page.from_file( "#{Pakman.root}/test/pages/page2.txt" ) 20 | assert page.headers? 21 | end # method test_page2 22 | 23 | def test_page3 24 | page = Pakman::Page.from_file( "#{Pakman.root}/test/pages/page3.txt" ) 25 | assert page.headers? 26 | end # method test_page3 27 | 28 | def test_empty 29 | page = Pakman::Page.from_file( "#{Pakman.root}/test/pages/empty.txt" ) 30 | assert page.headers? == false 31 | end # method test_empty 32 | 33 | def test_text 34 | page = Pakman::Page.from_file( "#{Pakman.root}/test/pages/text.txt" ) 35 | assert page.headers? == false 36 | end # method test_text 37 | 38 | end # class TestPage 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /textutils/Rakefile: -------------------------------------------------------------------------------- 1 | require 'hoe' 2 | require './lib/textutils/version.rb' 3 | 4 | Hoe.spec 'textutils' do 5 | 6 | self.version = TextUtils::VERSION 7 | 8 | self.summary = 'textutils - Text Filters, Helpers, Readers and More' 9 | self.description = summary 10 | 11 | self.urls = ['https://github.com/textkit/textutils'] 12 | 13 | self.author = 'Gerald Bauer' 14 | self.email = 'ruby-talk@ruby-lang.org' 15 | 16 | # switch extension to .markdown for gihub formatting 17 | self.readme_file = 'README.md' 18 | self.history_file = 'HISTORY.md' 19 | 20 | self.extra_deps = [ 21 | ['props', '>=1.1.2'], 22 | ['logutils', '>=0.6.1'], 23 | ### 3rd party gems 24 | ['rubyzip', '>=1.0.0'], ## note: 1.0 changed to require zip (pre 1.0 was zip/zip); todo/check: make optional -why? why not?? 25 | ['activesupport'] ## todo/check: really needed? document what methods get used 26 | ] 27 | 28 | self.licenses = ['Public Domain'] 29 | 30 | self.spec_extras = { 31 | required_ruby_version: '>= 1.9.2' 32 | } 33 | 34 | end 35 | -------------------------------------------------------------------------------- /linkto/lib/linkto.rb: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## require 'props' 4 | 5 | require 'logutils' 6 | 7 | ## require 'textutils' 8 | 9 | 10 | # our own code 11 | 12 | require 'linkto/version' # let it always go first 13 | 14 | require 'linkto/bing' 15 | require 'linkto/flickr' 16 | require 'linkto/google' 17 | require 'linkto/untappd' 18 | require 'linkto/wikipedia' 19 | 20 | 21 | module Linkto 22 | 23 | def self.banner 24 | "linkto/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]" 25 | end 26 | 27 | def self.root 28 | "#{File.expand_path( File.dirname(File.dirname(__FILE__)) )}" 29 | end 30 | 31 | ### convenience - includes all helpers; use include LinktoHelper 32 | module Helper 33 | include BingHelper 34 | include FlickrHelper 35 | include GoogleHelper 36 | include UntappdHelper 37 | include WikipediaHelper 38 | end 39 | 40 | end # module Linkto 41 | 42 | 43 | ## for convenience add aliases for module 44 | LinkTo = Linkto 45 | LinkToHelper = Linkto::Helper 46 | LinktoHelper = Linkto::Helper 47 | 48 | 49 | puts Linkto.banner # say hello 50 | -------------------------------------------------------------------------------- /textutils-more/lib/textutils/table/table_reader.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | 4 | ####################################################### 5 | ############# work in progress ####################### 6 | # 7 | # NOTE: do NOT include for now in packaged gem 8 | 9 | 10 | #### 11 | ## move to csvutils ??? why? why not? 12 | ## 13 | 14 | ####### 15 | ## read data records in csv (comma-separated values) format in plain text 16 | 17 | 18 | class TableReader ## rename to CsvTableReader ? or CsvReader? 19 | 20 | include LogUtils::Logging 21 | 22 | def self.from_file( path ) 23 | text = 'to be done' 24 | self.from_string( text ) 25 | end 26 | 27 | def self.from_string( text ) 28 | TableReader.new( text ) 29 | end 30 | 31 | def initialize( text, opts={} ) 32 | @opts = opts 33 | @text = text 34 | ## to be done 35 | end 36 | 37 | def quick_check 38 | # use a quick scan of all rows (return some stats e.g. no of records) 39 | # - throws an exception if any error 40 | 41 | ## to be done 42 | end 43 | 44 | ## to be done 45 | 46 | end # class TableReader 47 | 48 | -------------------------------------------------------------------------------- /pakman/test/test_erb.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | ### 4 | # to run use 5 | # ruby -I ./lib -I ./test test/test_erb.rb 6 | 7 | 8 | require 'helper' 9 | 10 | 11 | class TestErb < MiniTest::Test 12 | 13 | class Ctx # Context 14 | 15 | def initialize( hash ) 16 | @hash = hash 17 | @headers = hash['headers'] 18 | @slides = hash['slides'] 19 | 20 | puts 'hash:' 21 | pp @hash 22 | puts 'headers:' 23 | pp @headers 24 | puts 'slides:' 25 | pp @slides 26 | end 27 | 28 | attr_reader :headers 29 | attr_reader :slides 30 | 31 | def ctx 32 | ### todo: check if method_missing works with binding in erb??? 33 | binding 34 | end 35 | end 36 | 37 | def test_merge 38 | hash = YAML.load_file( "#{Pakman.root}/test/data/test.yml" ) 39 | ctx = Ctx.new( hash ) 40 | 41 | manifestsrc = "#{Pakman.root}/test/erb/pak/test.txt" 42 | outpath = "#{Pakman.root}/tmp/#{Time.now.to_i}" ## pakpath/output path 43 | 44 | Pakman::Templater.new.merge_pak( manifestsrc, outpath, ctx.ctx, 'test' ) 45 | 46 | assert true 47 | end # method test_merge 48 | 49 | end # class TestErb 50 | 51 | -------------------------------------------------------------------------------- /textutils/lib/textutils/core_ext/array.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | 4 | class Array 5 | 6 | ## todo: check if there's already a builtin method for this 7 | # 8 | # note: 9 | # in rails ary.in_groups(3) results in 10 | # top-to-bottom, left-to-right. 11 | # and not left-to-right first and than top-to-bottom. 12 | # 13 | # rename to in_groups_vertical(3) ??? 14 | 15 | def in_columns( cols ) # alias for convenience for chunks - needed? why? why not? 16 | chunks( cols ) 17 | end 18 | 19 | def chunks( number_of_chunks ) 20 | ## NB: use chunks - columns might be in use by ActiveRecord! 21 | ### 22 | # e.g. 23 | # [1,2,3,4,5,6,7,8,9,10].columns(3) 24 | # becomes: 25 | # [[1,4,7,10], 26 | # [2,5,8], 27 | # [3,6,9]] 28 | 29 | ## check/todo: make a copy of the array first?? 30 | # for now reference to original items get added to columns 31 | chunks = (1..number_of_chunks).collect { [] } 32 | each_with_index do |item,index| 33 | chunks[ index % number_of_chunks ] << item 34 | end 35 | chunks 36 | end 37 | 38 | end # class Array 39 | 40 | -------------------------------------------------------------------------------- /textutils/lib/textutils/reader/code_reader.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # fix: move into TextUtils namespace/module!! 4 | 5 | class CodeReader 6 | 7 | include LogUtils::Logging 8 | 9 | def self.from_file( path ) 10 | ## nb: assume/enfore utf-8 encoding (with or without BOM - byte order mark) 11 | ## - see textutils/utils.rb 12 | code = File.read_utf8( path ) 13 | self.from_string( code ) 14 | end 15 | 16 | def self.from_string( code ) 17 | CodeReader.new( code: code ) 18 | end 19 | 20 | 21 | def initialize( arg ) 22 | if arg.is_a?( String ) ## old style (deprecated) - pass in filepath as string 23 | path = arg 24 | logger.info "CodeReader.new - deprecated API - use CodeReader.from_file() instead" 25 | @code = File.read_utf8( path ) 26 | else ## assume it's a hash 27 | opts = arg 28 | @code = opts[:code] 29 | end 30 | end 31 | 32 | 33 | def eval( klass ) 34 | klass.class_eval( @code ) 35 | 36 | # NB: same as 37 | # 38 | # module WorldDB 39 | # include WorldDB::Models 40 | # 41 | # end 42 | end 43 | 44 | end # class CodeReader 45 | -------------------------------------------------------------------------------- /pakman/lib/pakman/copier.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module Pakman 4 | 5 | class Copier 6 | 7 | include LogUtils::Logging 8 | 9 | 10 | def copy_pak( manifestsrc, pakpath ) 11 | 12 | start = Time.now 13 | 14 | pakname = Pakman.pakname_from_file( manifestsrc ) 15 | 16 | logger.info "Copying template pack '#{pakname}'" 17 | 18 | ## todo: after depreciate change back to just load_file 19 | manifest = Manifest.load_file_v2( manifestsrc ) 20 | 21 | manifest.each do |entry| 22 | dest = entry[0] 23 | source = entry[1] 24 | 25 | # get full (absolute) path and make sure path exists 26 | destfull = File.expand_path( dest, pakpath ) 27 | destpath = File.dirname( destfull ) 28 | FileUtils.makedirs( destpath ) unless File.directory?( destpath ) 29 | 30 | logger.debug "destfull=>#{destfull}<" 31 | logger.debug "destpath=>#{destpath}<" 32 | 33 | logger.info " Copying to #{dest} from #{source}..." 34 | FileUtils.copy( source, destfull ) 35 | end 36 | 37 | logger.info "Done (in #{Time.now-start} s)." 38 | end # method copy_pak 39 | 40 | end # class Copier 41 | end # module Pakman 42 | -------------------------------------------------------------------------------- /textutils/lib/textutils/helper/tag_helper.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module TextUtils 4 | module TagHelper 5 | 6 | #### 7 | # - todo: use new additional sub module ??? 8 | # e.g. TextUtils::Reader::TagHelper 9 | # lets us use "classic" web helpers a la rails 10 | # find a good name for sub module - Reader? Fixtures? Values? Parser? 11 | 12 | 13 | def find_tags( value ) 14 | # logger.debug " found tags: >>#{value}<<" 15 | 16 | tag_keys = value.split('|') 17 | 18 | ## unify; replace _w/ space; remove leading n trailing whitespace 19 | tag_keys = tag_keys.map do |key| 20 | key = key.gsub( '_', ' ' ) 21 | key = key.strip 22 | key 23 | end 24 | 25 | tag_keys # return tag keys as ary 26 | end 27 | 28 | def find_tags_in_attribs!( attribs ) 29 | # NB: will remove :tags from attribs hash 30 | 31 | if attribs[:tags].present? 32 | tag_keys = find_tags( attribs[:tags] ) 33 | attribs.delete(:tags) 34 | tag_keys # return tag keys as ary of strings 35 | else 36 | [] # nothing found; return empty ary 37 | end 38 | end 39 | 40 | end # module TagHelper 41 | end # module TextUtils 42 | -------------------------------------------------------------------------------- /pakman/Manifest.txt: -------------------------------------------------------------------------------- 1 | History.md 2 | Manifest.txt 3 | README.md 4 | Rakefile 5 | bin/pakman 6 | lib/pakman.rb 7 | lib/pakman/cli/commands/fetch.rb 8 | lib/pakman/cli/commands/gen.rb 9 | lib/pakman/cli/commands/list.rb 10 | lib/pakman/cli/ctx.rb 11 | lib/pakman/cli/helpers.rb 12 | lib/pakman/cli/opts.rb 13 | lib/pakman/cli/runner.rb 14 | lib/pakman/copier.rb 15 | lib/pakman/erb/template.rb 16 | lib/pakman/erb/templater.rb 17 | lib/pakman/fetcher.rb 18 | lib/pakman/finder.rb 19 | lib/pakman/liquid/template.rb 20 | lib/pakman/liquid/templater.rb 21 | lib/pakman/manifest.rb 22 | lib/pakman/page.rb 23 | lib/pakman/utils.rb 24 | lib/pakman/version.rb 25 | test/data/test.yml 26 | test/erb/pak/test.html.erb 27 | test/erb/pak/test.txt 28 | test/helper.rb 29 | test/liquid/pak/hello.doc 30 | test/liquid/pak/hello.txt 31 | test/liquid/pak/s9logo.png 32 | test/liquid/pak/test.html 33 | test/liquid/pak/test.txt 34 | test/liquid/pak/testbin.txt 35 | test/liquid/test.html 36 | test/pages/empty.txt 37 | test/pages/page1.txt 38 | test/pages/page2.txt 39 | test/pages/page3.txt 40 | test/pages/text.txt 41 | test/test_erb.rb 42 | test/test_liquid.rb 43 | test/test_liquid_binaries.rb 44 | test/test_liquid_drops.rb 45 | test/test_page.rb 46 | -------------------------------------------------------------------------------- /attic/fixture_reader.rb: -------------------------------------------------------------------------------- 1 | 2 | if @path.ends_with?( '.yml' ) || @path.ends_with?( '.yaml' ) 3 | ### fix/todo: remove later on!!! - do not use!! 4 | puts "deprecated api - FixtureReader w/ yaml format - will get removed; please use new plain text manifest format" 5 | @ary = old_deprecated_yaml_reader( text ) 6 | else 7 | .. 8 | end 9 | 10 | 11 | def old_deprecated_yaml_reader( text ) 12 | hash = YAML.load( text ) 13 | 14 | ### build up array for fixtures from hash 15 | ary = [] 16 | 17 | hash.each do |key_wild, value_wild| 18 | key = key_wild.to_s.strip 19 | 20 | logger.debug "yaml key:#{key_wild.class.name} >>#{key}<<, value:#{value_wild.class.name} >>#{value_wild}<<" 21 | 22 | if value_wild.kind_of?( String ) # assume single fixture name 23 | ary << value_wild 24 | elsif value_wild.kind_of?( Array ) # assume array of fixture names as strings 25 | ary = ary + value_wild 26 | else 27 | logger.error "unknow fixture type in setup (yaml key:#{key_wild.class.name} >>#{key}<<, value:#{value_wild.class.name} >>#{value_wild}<<); skipping" 28 | end 29 | end 30 | ary # return fixture ary 31 | end 32 | -------------------------------------------------------------------------------- /textutils/test/test_title_mapper2.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | ### 4 | # to run use 5 | # ruby -I ./lib -I ./test test/test_title_mapper2.rb 6 | 7 | 8 | require 'helper' 9 | 10 | 11 | class TestTitleMapper2 < Minitest::Test 12 | 13 | ClubStruct = Struct.new(:key, :title, :synonyms) 14 | 15 | def test_title_table 16 | 17 | titles_in = [ 18 | ClubStruct.new( 'barcelona', 'Barcelona', 'FC Barcelona' ), 19 | ClubStruct.new( 'espanyol', 'Espanyol', 'RCD Espanyol|Espanyol Barcelona' ), 20 | ClubStruct.new( 'sevilla', 'Sevilla', 'Sevilla FC' ) 21 | ] 22 | 23 | mapper = TextUtils::TitleMapper2.new( titles_in, 'club' ) 24 | titles_out = mapper.known_titles 25 | 26 | puts 'titles_out:' 27 | pp titles_out 28 | 29 | line = "Espanyol Barcelona 1-0 FC Barcelona" 30 | mapper.map_titles!( line ) 31 | puts "=> #{line}" 32 | 33 | club1 = mapper.find_key!( line ) 34 | club2 = mapper.find_key!( line ) 35 | puts "=> #{line}" 36 | 37 | assert_equal 'espanyol', club1 38 | assert_equal 'barcelona', club2 39 | 40 | assert true ## assume everything ok if we get here 41 | 42 | end # method test_title_table 43 | 44 | 45 | end # class TestTitleMapper2 46 | -------------------------------------------------------------------------------- /pakman/lib/pakman/cli/opts.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module Pakman 4 | 5 | class Opts 6 | 7 | def list=(value) 8 | @list = value 9 | end 10 | 11 | def list? 12 | return false if @list.nil? # default list flag is false 13 | @list == true 14 | end 15 | 16 | 17 | def generate=(value) 18 | @generate = value 19 | end 20 | 21 | def generate? 22 | return false if @generate.nil? # default generate flag is false 23 | @generate == true 24 | end 25 | 26 | 27 | def fetch_uri=(value) 28 | @fetch_uri = value 29 | end 30 | 31 | def fetch_uri 32 | @fetch_uri || '-fetch uri required-' 33 | end 34 | 35 | def fetch? 36 | @fetch_uri.nil? ? false : true 37 | end 38 | 39 | 40 | def manifest=(value) 41 | @manifest = value 42 | end 43 | 44 | ## fix:/todo: use a different default manifest 45 | def manifest 46 | @manifest || 's6.txt' 47 | end 48 | 49 | 50 | def config_path=(value) 51 | @config_path = value 52 | end 53 | 54 | def config_path 55 | @config_path || '~/.pak' 56 | end 57 | 58 | 59 | def output_path=(value) 60 | @output_path = value 61 | end 62 | 63 | def output_path 64 | @output_path || '.' 65 | end 66 | 67 | end # class Opts 68 | end # module Pakman 69 | -------------------------------------------------------------------------------- /pakman/test/test_liquid_drops.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | ### 4 | # to run use 5 | # ruby -I ./lib -I ./test test/test_liquid_drops.rb 6 | 7 | 8 | require 'helper' 9 | 10 | 11 | class TestLiquidDrops < MiniTest::Test 12 | 13 | class HeadersDrop < Liquid::Drop 14 | 15 | def initialize( h ) 16 | @h = h 17 | end 18 | 19 | def author() puts "call author"; @h['author']; end 20 | def title() puts "call title"; @h['title']; end 21 | end 22 | 23 | class SlideDrop < Liquid::Drop 24 | 25 | def initialize( h ) 26 | @h = h 27 | end 28 | 29 | def content() puts "call content"; @h['content']; end 30 | def header() puts "call header"; @h['header']; end 31 | end 32 | 33 | def setup 34 | Liquid::Template.error_mode = :strict 35 | end 36 | 37 | 38 | def test_template 39 | hash = YAML.load_file( "#{Pakman.root}/test/data/test.yml" ) 40 | headers = HeadersDrop.new( hash['headers'] ) 41 | slides = hash['slides'].map { |h| SlideDrop.new( h ) } 42 | ctx= { 'headers' => headers, 'slides' => slides } 43 | pp ctx 44 | 45 | path = "#{Pakman.root}/test/liquid/test.html" 46 | t = Pakman::LiquidTemplate.from_file( path ) 47 | pp t.render( ctx ) 48 | 49 | assert true 50 | end 51 | 52 | end # class TestLiquidDrops 53 | 54 | -------------------------------------------------------------------------------- /attic/line_reader_v2.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | 4 | # fix: move into TextUtils namespace/module!! 5 | 6 | 7 | class LineReaderV2 8 | include LogUtils::Logging 9 | 10 | def initialize( name, include_path ) 11 | @name = name 12 | @include_path = include_path 13 | 14 | # map name to name_real_path 15 | # name might include !/ for virtual path (gets cut off) 16 | # e.g. at-austria!/w-wien/beers becomse w-wien/beers 17 | 18 | pos = @name.index( '!/') 19 | if pos.nil? 20 | @name_real_path = @name # not found; real path is the same as name 21 | else 22 | # cut off everything until !/ e.g. 23 | # at-austria!/w-wien/beers becomes 24 | # w-wien/beers 25 | @name_real_path = @name[ (pos+2)..-1 ] 26 | end 27 | end 28 | 29 | attr_reader :name 30 | attr_reader :name_real_path 31 | attr_reader :include_path 32 | 33 | def each_line 34 | path = "#{include_path}/#{name_real_path}.txt" 35 | reader = LineReader.from_file( path ) 36 | 37 | logger.info "parsing data '#{name}' (#{path})..." 38 | 39 | reader.each_line do |line| 40 | yield( line ) 41 | end 42 | 43 | ConfDb::Model::Prop.create_from_fixture!( name, path ) 44 | end 45 | 46 | end # class LineReaderV2 47 | 48 | -------------------------------------------------------------------------------- /pakman/lib/pakman/cli/commands/gen.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module Pakman 4 | 5 | class Gen 6 | 7 | include LogUtils::Logging 8 | 9 | include ManifestHelper 10 | 11 | def initialize( opts ) 12 | @opts = opts 13 | end 14 | 15 | attr_reader :opts 16 | 17 | def run( args ) 18 | manifest_name = opts.manifest 19 | manifest_name = manifest_name.downcase.gsub('.txt', '' ) # remove .txt if present 20 | 21 | logger.debug "manifest=#{manifest_name}" 22 | 23 | # check for matching manifests 24 | manifests = installed_template_manifests.select { |m| m[0] == manifest_name+'.txt' } 25 | 26 | if manifests.empty? 27 | puts "*** error: unknown template pack '#{manifest_name}'; use pakman -l to list installed template packs" 28 | exit 2 29 | end 30 | 31 | manifestsrc = manifests[0][1] 32 | pakpath = opts.output_path 33 | 34 | if args.empty? 35 | Copier.new.copy_pak( manifestsrc, pakpath ) 36 | else 37 | args.each do |arg| 38 | data = YAML.load_file( arg ) 39 | name = File.basename( arg, '.*' ) 40 | puts "#{name}:" 41 | pp data 42 | Templater.new.merge_pak( manifestsrc, pakpath, Ctx.new(data).ctx, name ) 43 | end 44 | end 45 | 46 | end 47 | 48 | end # class Gen 49 | end # module Pakman 50 | -------------------------------------------------------------------------------- /pakman/test/test_liquid_binaries.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | ### 4 | # to run use 5 | # ruby -I ./lib -I ./test test/test_liquid_binaries.rb 6 | 7 | 8 | require 'helper' 9 | 10 | 11 | class TestLiquidBinaries < MiniTest::Test 12 | 13 | 14 | def setup 15 | Liquid::Template.error_mode = :strict 16 | end 17 | 18 | 19 | def test_rx 20 | rx = Pakman::LiquidTemplater::REGEX_EXT 21 | 22 | pp rx 23 | 24 | ## todo: check why assert rx.match( 'test.html' ) == true doesn't work 25 | ## (note: regex.match will return MatchData or nil) 26 | 27 | assert rx.match( 'test.html' ).nil? == false 28 | assert rx.match( 'TEST.HTML' ).nil? == false 29 | assert rx.match( 'test.js' ).nil? == false 30 | assert rx.match( 'test.json' ).nil? == false 31 | assert rx.match( 'test.gif' ).nil? == true 32 | end 33 | 34 | 35 | def test_merge 36 | hash = YAML.load_file( "#{Pakman.root}/test/data/test.yml" ) 37 | ctx= { 'headers' => hash['headers'], 'slides' => hash['slides'] } 38 | pp ctx 39 | 40 | manifestsrc = "#{Pakman.root}/test/liquid/pak/testbin.txt" 41 | outpath = "#{Pakman.root}/tmp/#{Time.now.to_i}" ## pakpath/output path 42 | 43 | Pakman::LiquidTemplater.new.merge_pak( manifestsrc, outpath, ctx, 'test' ) 44 | 45 | assert true 46 | end # method test_merge 47 | 48 | end # class TestLiquidBinaries 49 | -------------------------------------------------------------------------------- /textutils/lib/textutils/filter/code_filter.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module TextUtils 4 | module Filter 5 | 6 | def code_block_curly_style( content, options={} ) 7 | # replace {{{ w/
 8 |     # replace }}}  w/ 
9 | # use 4-6 { or } to escape back to literal value (e.g. {{{{ or {{{{{{ => {{{ ) 10 | # note: {{{ / }}} are anchored to beginning of line ( spaces and tabs before {{{/}}}allowed ) 11 | 12 | # track statistics 13 | code_begin = 0 14 | code_begin_esc = 0 15 | code_end = 0 16 | code_end_esc = 0 17 | 18 | content.gsub!( /^[ \t]*(\{{3,6})/ ) do |match| 19 | escaped = ($1.length > 3) 20 | if escaped 21 | code_begin_esc += 1 22 | "{{{" 23 | else 24 | code_begin += 1 25 | "
"
26 |       end
27 |     end
28 |     
29 |     content.gsub!( /^[ \t]*(\}{3,6})/ ) do |match|
30 |       escaped = ($1.length > 3)
31 |       if escaped
32 |         code_end_esc += 1
33 |         "}}}"
34 |       else
35 |         code_end += 1
36 |         "
" 37 | end 38 | end 39 | 40 | puts " Patching {{{/}}}-code blocks (#{code_begin}/#{code_end} blocks, " + 41 | "#{code_begin_esc}/#{code_end_esc} escaped blocks)..." 42 | 43 | content 44 | end 45 | 46 | end # module Filter 47 | end # module TextUtils -------------------------------------------------------------------------------- /linkto/README.md: -------------------------------------------------------------------------------- 1 | # linkto 2 | 3 | linkto gem - link_to helpers for google search, bing search, flickr photo search, flickr photo tag, etc. 4 | 5 | * home :: [github.com/rubylibs/linkto](https://github.com/rubylibs/linkto) 6 | * bugs :: [github.com/rubylibs/linkto/issues](https://github.com/rubylibs/linkto/issues) 7 | * gem :: [rubygems.org/gems/linkto](https://rubygems.org/gems/linkto) 8 | * rdoc :: [rubydoc.info/gems/linkto](http://rubydoc.info/gems/linkto) 9 | 10 | 11 | ## Usage 12 | 13 | link_to_google_search 'open mundi' 14 | 15 | will become 16 | 17 | https://www.google.com/search?q=open+mundi 18 | 19 | 20 | ### Google 21 | 22 | - `link_to_google_search` 23 | - `link_to_google_de_search` 24 | - `link_to_google_search_images` 25 | 26 | ### Bing 27 | 28 | - `link_to_bing_search_images` 29 | 30 | ### Flickr 31 | 32 | - `link_to_flickr_tags` 33 | - `link_to_flickr_search` 34 | 35 | ### Wikipedia 36 | 37 | - `link_to_wikipedia_search` 38 | - `link_to_wikipedia_de_search` 39 | 40 | ### Untappd 41 | 42 | - `link_to_untappd_search` 43 | 44 | 45 | 46 | ## Real World Usage 47 | 48 | - [beer.db.admin](https://github.com/geraldb/beer.db.admin) - open source world beer guide; beer.db browser 49 | 50 | 51 | ## Alternatives 52 | 53 | 54 | ## License 55 | 56 | The `linkto` scripts are dedicated to the public domain. 57 | Use it as you please with no restrictions whatsoever. 58 | -------------------------------------------------------------------------------- /textutils/lib/textutils/utils.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module TextUtils 4 | # make helpers available as class methods e.g. TextUtils.convert_unicode_dashes_to_plain_ascii 5 | extend UnicodeHelper 6 | extend TitleHelper 7 | extend AddressHelper 8 | 9 | extend StringFilter # adds asciify and slugify 10 | end 11 | 12 | 13 | 14 | def title_esc_regex( title_unescaped ) 15 | puts "*** warn: depreceated fn call: use TextUtils.title_esc_regex() or include TextUtils::TitleHelpers" 16 | TextUtils.title_esc_regex( title_unescaped ) 17 | end 18 | 19 | 20 | def find_data_path_from_gemfile_gitref( name ) 21 | puts "[textutils] find_data_path( name='#{name}' )..." 22 | puts "load path:" 23 | pp $LOAD_PATH 24 | 25 | # escape chars for regex e.g. . becomes \. 26 | name_esc = name.gsub( '.', '\.' ) 27 | 28 | 29 | # note: 30 | # - hexdigest must be 12 chars e.g. b7d1c9619a54 or similar 31 | 32 | # e.g. match /\/(beer\.db-[a-z0-9]+)|(beer\.db)\// 33 | 34 | name_regex = /\/((#{name_esc}-[a-z0-9]{12})|(#{name_esc}))\/lib$/ 35 | candidates = [] 36 | $LOAD_PATH.each do |path| 37 | if path =~ name_regex 38 | # cutoff trailing /lib 39 | candidates << path[0..-5] 40 | end 41 | end 42 | 43 | puts 'found candidates:' 44 | pp candidates 45 | 46 | ## use first candidate 47 | candidates[0] 48 | end 49 | 50 | -------------------------------------------------------------------------------- /textutils/test/test_slugify.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | require 'helper' 4 | 5 | class TestSlugify < Minitest::Test 6 | 7 | def test_slugify 8 | 9 | txt_io = [ 10 | [ 'São Paulo', 'sao-paulo' ], 11 | [ 'São Gonçalo', 'sao-goncalo' ], 12 | [ 'Výčepní', 'vycepni' ], 13 | [ 'Żubr', 'zubr' ], 14 | [ 'Żywiec', 'zywiec' ], 15 | [ 'Lomża Export', 'lomza-export' ], 16 | [ 'Nogne Ø Imperial Stout', 'nogne-o-imperial-stout' ], 17 | [ 'Xyauyù', 'xyauyu' ], 18 | [ 'Águila', 'aguila' ], 19 | [ 'Arena Amazônia', 'arena-amazonia' ], 20 | [ 'Tōkyō', 'tokyo' ], 21 | [ 'Ōsaka', 'osaka' ], 22 | [ 'El Djazaïr', 'el-djazair' ], 23 | [ 'Al-Kharṭūm', 'al-khartum' ], 24 | [ 'Ṭarābulus', 'tarabulus' ], 25 | [ 'Al-Iskandarīyah', 'al-iskandariyah' ], 26 | [ 'Pishōr', 'pishor' ], 27 | [ 'Pishāwar', 'pishawar' ], 28 | [ 'Islām ābād', 'islam-abad' ], 29 | [ 'Thành Phố Hồ Chí Minh', 'thanh-pho-ho-chi-minh' ], 30 | [ 'Åland Islands', 'aland-islands' ], 31 | [ "Pe\u{030C}awar", 'pexawar'] ## note: use unicode literal; Pex̌awar -- see en.wikipedia.org/wiki/Peshawar 32 | ] 33 | 34 | txt_io.each do |txt| 35 | assert_equal txt[1], TextUtils.slugify( txt[0] ) 36 | end 37 | end # method test_slugify 38 | 39 | 40 | end # class TestSlugify 41 | -------------------------------------------------------------------------------- /attic/values_reader_v2.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # fix: move into TextUtils namespace/module!! 4 | 5 | ## todo/fix: find a better name than HashReaderV2 (HashReaderPlus?) ?? 6 | 7 | class ValuesReaderV2 8 | include LogUtils::Logging 9 | 10 | def initialize( name, include_path, more_attribs={} ) 11 | @name = name 12 | @include_path = include_path 13 | @more_attribs = more_attribs 14 | 15 | # map name to name_real_path 16 | # name might include !/ for virtual path (gets cut off) 17 | # e.g. at-austria!/w-wien/beers becomse w-wien/beers 18 | 19 | pos = @name.index( '!/') 20 | if pos.nil? 21 | @name_real_path = @name # not found; real path is the same as name 22 | else 23 | # cut off everything until !/ e.g. 24 | # at-austria!/w-wien/beers becomes 25 | # w-wien/beers 26 | @name_real_path = @name[ (pos+2)..-1 ] 27 | end 28 | end 29 | 30 | attr_reader :name 31 | attr_reader :name_real_path 32 | attr_reader :include_path 33 | attr_reader :more_attribs 34 | 35 | def each_line 36 | path = "#{include_path}/#{name_real_path}.txt" 37 | reader = ValuesReader.new( path, more_attribs ) 38 | 39 | logger.info "parsing data '#{name}' (#{path})..." 40 | 41 | reader.each_line do |attribs, values| 42 | yield( attribs, values ) 43 | end 44 | 45 | ConfDb::Model::Prop.create_from_fixture!( name, path ) 46 | end 47 | 48 | end # class ValuesReaderV2 49 | 50 | -------------------------------------------------------------------------------- /pakman/lib/pakman.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | ### 4 | # Note: for local testing run like: 5 | # 6 | # 1.9.x: ruby -Ilib lib/pakman.rb 7 | 8 | # core and stlibs 9 | 10 | require 'yaml' 11 | require 'pp' 12 | require 'erb' 13 | require 'logger' 14 | require 'optparse' 15 | require 'fileutils' 16 | 17 | # rubygems 18 | 19 | require 'logutils' 20 | require 'fetcher' # fetch (download) files 21 | 22 | 23 | # 3rd party rubygems 24 | require 'liquid' 25 | 26 | # our own code 27 | 28 | require 'pakman/copier' 29 | require 'pakman/fetcher' 30 | require 'pakman/finder' 31 | require 'pakman/manifest' 32 | 33 | require 'pakman/erb/template' 34 | require 'pakman/erb/templater' 35 | 36 | require 'pakman/liquid/template' 37 | require 'pakman/liquid/templater' 38 | 39 | require 'pakman/page' 40 | require 'pakman/utils' 41 | require 'pakman/version' 42 | 43 | require 'pakman/cli/ctx' 44 | require 'pakman/cli/helpers' 45 | require 'pakman/cli/opts' 46 | require 'pakman/cli/runner' 47 | require 'pakman/cli/commands/fetch' 48 | require 'pakman/cli/commands/gen' 49 | require 'pakman/cli/commands/list' 50 | 51 | 52 | module Pakman 53 | 54 | def self.banner 55 | "pakman #{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]" 56 | end 57 | 58 | def self.root 59 | "#{File.expand_path( File.dirname(File.dirname(__FILE__)) )}" 60 | end 61 | 62 | def self.main 63 | Runner.new.run(ARGV) 64 | end 65 | 66 | end # module Pakman 67 | 68 | 69 | Pakman.main if __FILE__ == $0 70 | -------------------------------------------------------------------------------- /pakman/test/test_liquid.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | ### 4 | # to run use 5 | # ruby -I ./lib -I ./test test/test_liquid.rb 6 | 7 | 8 | require 'helper' 9 | 10 | 11 | class TestLiquid < MiniTest::Test 12 | 13 | 14 | def setup 15 | Liquid::Template.error_mode = :strict 16 | end 17 | 18 | 19 | def test_template 20 | hash = YAML.load_file( "#{Pakman.root}/test/data/test.yml" ) 21 | ctx= { 'headers' => hash['headers'], 'slides' => hash['slides'] } 22 | pp ctx 23 | 24 | path = "#{Pakman.root}/test/liquid/test.html" 25 | t = Pakman::LiquidTemplate.from_file( path ) 26 | pp t.render( ctx ) 27 | 28 | assert true 29 | end 30 | 31 | def test_page_template 32 | hash = YAML.load_file( "#{Pakman.root}/test/data/test.yml" ) 33 | ctx= { 'headers' => hash['headers'], 'slides' => hash['slides'] } 34 | pp ctx 35 | 36 | path = "#{Pakman.root}/test/liquid/pak/test.html" 37 | t = Pakman::LiquidPageTemplate.from_file( path ) 38 | pp t.render( ctx ) 39 | 40 | assert true 41 | end 42 | 43 | def test_merge 44 | hash = YAML.load_file( "#{Pakman.root}/test/data/test.yml" ) 45 | ctx= { 'headers' => hash['headers'], 'slides' => hash['slides'] } 46 | pp ctx 47 | 48 | manifestsrc = "#{Pakman.root}/test/liquid/pak/test.txt" 49 | outpath = "#{Pakman.root}/tmp/#{Time.now.to_i}" ## pakpath/output path 50 | 51 | Pakman::LiquidTemplater.new.merge_pak( manifestsrc, outpath, ctx, 'test' ) 52 | 53 | assert true 54 | end # method test_merge 55 | 56 | end # class TestLiquid 57 | 58 | -------------------------------------------------------------------------------- /pakman/lib/pakman/finder.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module Pakman 4 | 5 | class Finder 6 | 7 | include LogUtils::Logging 8 | 9 | 10 | def find_manifests( patterns, excludes=[] ) 11 | manifests = [] 12 | 13 | patterns.each do |pattern| 14 | pattern.gsub!( '\\', '/') # normalize path; make sure all path use / only 15 | logger.debug "Checking >#{pattern}<" 16 | Dir.glob( pattern ) do |file| 17 | logger.debug " Found manifest candidate >#{file}<" 18 | if File.directory?( file ) # NB: do not include directories 19 | logger.debug " Skipping match; it's a directory" 20 | else 21 | unless exclude?( file, excludes ) # check for excludes; skip if excluded 22 | logger.debug " Adding match >#{file}<" 23 | 24 | ## todo/fix: 25 | # array first entry - downcase and gsub('.txt','') ?? 26 | # use Pakman.pakname_from_file() 27 | 28 | manifests << [ File.basename( file ), file ] 29 | end 30 | end 31 | end 32 | end 33 | 34 | manifests 35 | end 36 | 37 | private 38 | def exclude?( file, excludes ) 39 | excludes.each do |pattern| 40 | ## todo: FNM_DOTMATCH helps or not?? (make up some tests??) 41 | if File.fnmatch?( pattern, file, File::FNM_CASEFOLD | File::FNM_DOTMATCH ) 42 | logger.debug " Skipping match; it's excluded by pattern >#{pattern}<" 43 | return true 44 | end 45 | end 46 | false 47 | end 48 | 49 | end # class Finder 50 | end # module Pakman 51 | -------------------------------------------------------------------------------- /textutils/lib/textutils/sanitizier.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module TextUtils 4 | 5 | class Sanitizier 6 | 7 | include LogUtils::Logging 8 | 9 | @@ignore_tags = %w{ head script style } 10 | @@inline_tags = %w{ span b i u } 11 | @@block_tags = %w{ p div ul ol } 12 | 13 | 14 | def initialize( ht ) 15 | @ht = ht # hypertext (html source) 16 | end 17 | 18 | def to_plain_text 19 | 20 | ht = @ht 21 | ht = handle_ignore_tags( ht ) 22 | 23 | ## handle_pre_tags ?? - special rule for preformatted (keep whitespace) 24 | 25 | ht = handle_inline_tags( ht ) 26 | ht = handle_block_tags( ht ) 27 | ht = handle_other_tags( ht ) # rules for remain/left over tags 28 | 29 | ht = handle_entities( ht ) 30 | 31 | ht 32 | end 33 | 34 | def handle_entities( ht ) 35 | ## unescape entities 36 | # - check if it also works for generic entities like  etc. 37 | # or only for > < etc. 38 | ht = CGI.unescapeHTML( ht ) 39 | end 40 | 41 | def tag_regex( tag ) 42 | # note use non-greedy .*? for content 43 | 44 | /<#{tag}[^>]*>(.*?)<\/#{tag}>/mi 45 | end 46 | 47 | def handle_ignore_tags( ht ) 48 | @@ignore_tags.each do |tag| 49 | ht.gsub!( tag_regex(tag), '' ) 50 | end 51 | ht 52 | end 53 | 54 | def handle_inline_tags( ht ) 55 | @@inline_tags.each do |tag| 56 | # add a space after 57 | ht.gsub!( tag_regex(tag), '\1 ' ) 58 | end 59 | ht 60 | end 61 | 62 | def handle_block_tags( ht ) 63 | @@block_tags.each do |tag| 64 | ht.gsub!( tag_regex(tag), "\n\1\n" ) 65 | end 66 | ht 67 | end 68 | 69 | 70 | end # class Sanitizier 71 | 72 | end # module TextUtils 73 | -------------------------------------------------------------------------------- /attic/hash_reader_v2.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # fix: move into TextUtils namespace/module!! 4 | 5 | ## todo/fix: find a better name than HashReaderV2 (HashReaderPlus?) ?? 6 | 7 | class HashReaderV2 8 | include LogUtils::Logging 9 | 10 | def initialize( name, include_path ) 11 | @name = name 12 | @include_path = include_path 13 | 14 | # map name to name_real_path 15 | # name might include !/ for virtual path (gets cut off) 16 | # e.g. at-austria!/w-wien/beers becomse w-wien/beers 17 | 18 | pos = @name.index( '!/') 19 | if pos.nil? 20 | @name_real_path = @name # not found; real path is the same as name 21 | else 22 | # cut off everything until !/ e.g. 23 | # at-austria!/w-wien/beers becomes 24 | # w-wien/beers 25 | @name_real_path = @name[ (pos+2)..-1 ] 26 | end 27 | end 28 | 29 | attr_reader :name 30 | attr_reader :name_real_path 31 | attr_reader :include_path 32 | 33 | def each 34 | path = "#{include_path}/#{name_real_path}.yml" 35 | reader = HashReader.from_file( path ) 36 | 37 | logger.info "parsing data '#{name}' (#{path})..." 38 | 39 | reader.each do |key, value| 40 | yield( key, value ) 41 | end 42 | 43 | ConfDb::Model::Prop.create_from_fixture!( name, path ) 44 | end 45 | 46 | 47 | def each_typed 48 | path = "#{include_path}/#{name_real_path}.yml" 49 | reader = HashReader.from_file( path ) 50 | 51 | logger.info "parsing data '#{name}' (#{path})..." 52 | 53 | reader.each_typed do |key, value| 54 | yield( key, value ) 55 | end 56 | 57 | ConfDb::Model::Prop.create_from_fixture!( name, path ) 58 | end 59 | 60 | 61 | end # class HashReaderV2 62 | -------------------------------------------------------------------------------- /pakman/lib/pakman/page.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module Pakman 4 | 5 | 6 | ## Jekyll-style page 7 | ## with optional front-matter (yaml block) 8 | 9 | class Page 10 | 11 | def self.from_file( path ) 12 | puts " Loading page (from file) >#{path}<..." 13 | text = File.open( path, 'r:bom|utf-8' ).read ## note: assume utf8 14 | self.new( text, path: path ) ## note: pass along path as an option 15 | end 16 | 17 | def self.from_string( text ) ### use parse as alias - why?? why not?? 18 | self.new( text ) 19 | end 20 | 21 | attr_reader :contents 22 | attr_reader :headers 23 | 24 | ## has headers/metadata (front matter block) - yes/no - use hash for check for now 25 | def headers?() @headers.kind_of?( Hash ); end 26 | 27 | ## check if \s includes newline too? 28 | ## fix/check ^ - just means start of newline (use /A or something --- MUST always be first 29 | ## 30 | ## note: include --- in headers 31 | ## e.g. --- results in nil 32 | ## empty string (without leading ---) results in false! (we want nil if no headers for empty block) 33 | HEADERS_PATTERN = / 34 | ^(?---\s*\n 35 | .*?) 36 | ^(---\s*$\n?) 37 | /xm 38 | 39 | def initialize( text, opts={} ) 40 | ## todo/fix: check regex in jekyll (add link to source etc.) 41 | if m=HEADERS_PATTERN.match( text ) 42 | @contents = m.post_match 43 | pp m 44 | pp m[:headers] 45 | @headers = YAML.load( m[:headers] ) 46 | pp @headers 47 | @headers = {} if @headers.nil? ## check if headers is nil use/assign empty hash 48 | else 49 | @contents = text 50 | @headers = nil 51 | end 52 | end 53 | 54 | end # class Page 55 | end # module Pakman 56 | -------------------------------------------------------------------------------- /pakman/lib/pakman/erb/templater.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module Pakman 4 | 5 | ### todo: 6 | ## rename to ErbTemplater (or RubyTemplater) - why? why not? 7 | 8 | 9 | class Templater 10 | 11 | include LogUtils::Logging 12 | 13 | 14 | def merge_pak( manifestsrc, pakpath, binding, name ) 15 | 16 | start = Time.now 17 | 18 | pakname = Pakman.pakname_from_file( manifestsrc ) 19 | 20 | logger.info "Merging template pack '#{pakname}'" 21 | 22 | # todo: rename to load_file once depreated API got removed 23 | manifest = Manifest.load_file_v2( manifestsrc ) 24 | 25 | manifest.each do |entry| 26 | dest = entry[0] 27 | source = entry[1] 28 | 29 | if dest =~ /__file__/ # replace w/ name 30 | dest = dest.gsub( '__file__', name ) 31 | end 32 | 33 | # get full (absolute) path and make sure path exists 34 | destfull = File.expand_path( dest, pakpath ) 35 | destpath = File.dirname( destfull ) 36 | FileUtils.makedirs( destpath ) unless File.directory?( destpath ) 37 | 38 | logger.debug "destfull=>#{destfull}<" 39 | logger.debug "destpath=>#{destpath}<" 40 | 41 | if source =~ /\.erb\.|.erb$/ 42 | logger.info " Merging to #{dest}..." 43 | 44 | out = File.new( destfull, 'w+:utf-8' ) ## note: use utf8 (by default) 45 | out << ErbTemplate.from_file( source ).render( binding ) 46 | out.flush 47 | out.close 48 | else 49 | logger.info " Copying to #{dest} from #{source}..." 50 | 51 | FileUtils.copy( source, destfull ) 52 | end 53 | end # each entry in manifest 54 | 55 | logger.info "Done (in #{Time.now-start} s)." 56 | end # method merge_pak 57 | 58 | end # class Templater 59 | end # module Pakman 60 | -------------------------------------------------------------------------------- /textutils/lib/textutils/filter/comment_filter.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module TextUtils 4 | module Filter 5 | 6 | def comments_percent_style( content, options={} ) 7 | 8 | # remove comments 9 | # % comments 10 | # %begin multiline comment 11 | # %end multiline comment 12 | 13 | # track statistics 14 | comments_multi = 0 15 | comments_single = 0 16 | comments_end = 0 17 | 18 | # remove multi-line comments 19 | content.gsub!(/^%(begin|comment|comments).*?%end/m) do |match| 20 | comments_multi += 1 21 | "" 22 | end 23 | 24 | # remove everyting starting w/ %end (note, can only be once in file) 25 | content.sub!(/^%end.*/m) do |match| 26 | comments_end += 1 27 | "" 28 | end 29 | 30 | # hack/note: 31 | # note multi-line erb expressions/stmts might cause trouble 32 | # 33 | # %> gets escaped as special case (not treated as comment) 34 | # <% 35 | # whatever 36 | # %> /, '' ) 161 | end 162 | 163 | def remove_leading_spaces( text ) 164 | # remove leading spaces if less than four !!! 165 | text.gsub( /^[ \t]+(?![ \t])/, '' ) # use negative regex lookahead e.g. (?!) 166 | end 167 | 168 | def remove_blanks( text ) 169 | # remove lines only with .. 170 | text.gsub( /^[ \t]*\.{2}[ \t]*\n/, '' ) 171 | end 172 | 173 | def cleanup_newlines( text ) 174 | # remove all blank lines that go over three 175 | text.gsub( /\n{4,}/, "\n\n\n" ) 176 | end 177 | 178 | 179 | def concat_lines( text ) 180 | # lines ending with ++ will get newlines get removed 181 | # e.g. 182 | # >| hello1 ++ 183 | # >1 hello2 184 | # becomes 185 | # >| hello1 hello2 186 | 187 | # 188 | # note: do NOT use \s - will include \n (newline) ?? 189 | 190 | text.gsub( /[ \t]+\+{2}[ \t]*\n[ \t]*/, ' ' ) # note: replace with single space 191 | end 192 | 193 | 194 | end # class PageTemplate 195 | 196 | end # module TextUtils 197 | 198 | -------------------------------------------------------------------------------- /textutils/lib/textutils/reader/hash_reader.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # fix: move into TextUtils namespace/module!! 4 | 5 | 6 | class HashReader 7 | 8 | include LogUtils::Logging 9 | 10 | def self.from_zip( zip_file, entry_path ) 11 | entry = zip_file.find_entry( entry_path ) 12 | 13 | ## todo/fix: add force encoding to utf-8 ?? 14 | ## check!!! 15 | ## clean/prepprocess lines 16 | ## e.g. CR/LF (/r/n) to LF (e.g. /n) 17 | text = entry.get_input_stream().read() 18 | 19 | ## NOTE: needs logger ref; only available in instance methods; use global logger for now 20 | logger = LogUtils::Logger.root 21 | logger.debug "text.encoding.name (before): #{text.encoding.name}" 22 | ##### 23 | # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here 24 | ## NB: 25 | # for now "hardcoded" to utf8 - what else can we do? 26 | # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation 27 | text = text.force_encoding( Encoding::UTF_8 ) 28 | logger.debug "text.encoding.name (after): #{text.encoding.name}" 29 | 30 | ## todo: 31 | # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus 32 | ## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path ) 33 | 34 | self.from_string( text ) 35 | end 36 | 37 | def self.from_file( path ) 38 | ## nb: assume/enfore utf-8 encoding (with or without BOM - byte order mark) 39 | ## - see textutils/utils.rb 40 | text = File.read_utf8( path ) 41 | self.from_string( text ) 42 | end 43 | 44 | def self.from_string( text ) 45 | HashReader.new( text: text ) 46 | end 47 | 48 | def initialize( arg ) 49 | 50 | if arg.is_a?( String ) ## old style (deprecated) - pass in filepath as string 51 | path = arg 52 | logger.info "HashReader.new - deprecated API - use HashReader.from_file() instead" 53 | text = File.read_utf8( path ) 54 | else ## assume it's a hash 55 | opts = arg 56 | text = opts[:text] 57 | end 58 | 59 | ### hack for syck yaml parser (e.g.ruby 1.9.2) (cannot handle !!null) 60 | ## change it to !null to get plain nil 61 | ## w/ both syck and psych/libyml 62 | 63 | text = text.gsub( '!!null', '!null' ) 64 | 65 | ### hacks for yaml 66 | 67 | ### see yaml gotschas 68 | ## - http://www.perlmonks.org/?node_id=738671 69 | ## - 70 | 71 | ## replace all tabs w/ two spaces and issue a warning 72 | ## nb: yaml does NOT support tabs see why here -> yaml.org/faq.html 73 | 74 | text = text.gsub( "\t" ) do |_| 75 | logger.warn "hash reader - found tab (\t) replacing w/ two spaces; yaml forbids tabs; see yaml.org/faq.html (path=#{path})" 76 | ' ' # replace w/ two spaces 77 | end 78 | 79 | ## quote implicit boolean types on,no,n,y 80 | 81 | ## nb: escape only if key e.g. no: or "free standing" value on its own line e.g. 82 | ## no: no 83 | 84 | text = text.gsub( /^([ ]*)(ON|On|on|OFF|Off|off|YES|Yes|yes|NO|No|no|Y|y|N|n)[ ]*:/ ) do |value| 85 | logger.warn "hash reader - found implicit bool (#{$1}#{$2}) for key; adding quotes to turn into string; see yaml.org/refcard.html (path=#{path})" 86 | # nb: preserve leading spaces for structure - might be significant 87 | "#{$1}'#{$2}':" # add quotes to turn it into a string (not bool e.g. true|false) 88 | end 89 | 90 | ## nb: value must be freestanding (only allow optional eol comment) 91 | ## do not escape if part of string sequence e.g. 92 | ## key: nb,nn,no,se => nb,nn,'no',se -- avoid!! 93 | # 94 | # check: need we add true|false too??? 95 | 96 | text = text.gsub( /:[ ]+(ON|On|on|OFF|Off|off|YES|Yes|yes|NO|No|no|Y|y|N|n)[ ]*($| #.*$)/ ) do |value| 97 | logger.warn "hash reader - found implicit bool (#{$1}) for value; adding quotes to turn into string; see yaml.org/refcard.html (path=#{path})" 98 | ": '#{$1}'" # add quotes to turn it into a string (not bool e.g. true|false) 99 | end 100 | 101 | 102 | @hash = YAML.load( text ) 103 | end 104 | 105 | ### 106 | # nb: returns all values as strings 107 | # 108 | 109 | def each 110 | @hash.each do |key_wild, value_wild| 111 | # normalize 112 | # - key n value as string (not symbols, bool? int? array?) 113 | # - remove leading and trailing whitespace 114 | key = key_wild.to_s.strip 115 | value = value_wild.to_s.strip 116 | 117 | logger.debug "yaml key:#{key_wild.class.name} >>#{key}<<, value:#{value_wild.class.name} >>#{value}<<" 118 | 119 | yield( key, value ) 120 | end 121 | end # method each 122 | 123 | ### 124 | # todo: what name to use: each_object or each_typed ??? 125 | # or use new TypedHashReader class or similar?? 126 | 127 | def each_typed 128 | @hash.each do |key_wild, value_wild| 129 | # normalize 130 | # - key n value as string (not symbols, bool? int? array?) 131 | # - remove leading and trailing whitespace 132 | key = key_wild.to_s.strip 133 | 134 | if value_wild.is_a?( String ) 135 | value = value_wild.strip 136 | else 137 | value = value_wild 138 | end 139 | 140 | logger.debug "yaml key:#{key_wild.class.name} >>#{key}<<, value:#{value_wild.class.name} >>#{value}<<" 141 | 142 | yield( key, value ) 143 | end 144 | end # method each 145 | 146 | end # class HashReader 147 | -------------------------------------------------------------------------------- /textutils/lib/textutils/title_mapper2.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | 4 | ## see textutils/title.rb 5 | ## for existing code 6 | ## move over here 7 | 8 | 9 | module TextUtils 10 | 11 | class TitleMapper2 ## todo/check: rename to NameMapper ? why? why not?? 12 | 13 | include LogUtils::Logging 14 | 15 | attr_reader :known_titles ## rename to mapping or mappings or just titles - why? why not? 16 | 17 | ## 18 | ## key: e.g. augsburg 19 | ## title: e.g. FC Augsburg 20 | ## length (of title - not pattern): e.g. 11 -- do not count dots (e.g. U.S.A. => 3 or 6) why? why not? 21 | MappingStruct = Struct.new( :key, :title, :length, :pattern) ## todo/check: use (rename to) TitleStruct - why? why not?? 22 | 23 | 24 | def initialize( records, tag ) 25 | @known_titles = build_title_table_for( records ) ## build mapping lookup table 26 | 27 | ## todo: rename tag to attrib or attrib_name - why ?? why not ??? 28 | @tag = tag # e.g. tag name use for @@brewery@@ @@team@@ etc. 29 | end 30 | 31 | 32 | def map_titles!( line ) ## rename to just map! - why?? why not??? 33 | begin 34 | found = map_title_for!( @tag, line, @known_titles ) 35 | end while found 36 | end 37 | 38 | def find_key!( line ) 39 | find_key_for!( @tag, line ) 40 | end 41 | 42 | def find_keys!( line ) # NB: keys (plural!) - will return array 43 | counter = 1 44 | keys = [] 45 | 46 | key = find_key_for!( "#{@tag}#{counter}", line ) 47 | while key.present? 48 | keys << key 49 | counter += 1 50 | key = find_key_for!( "#{@tag}#{counter}", line ) 51 | end 52 | keys 53 | end 54 | 55 | 56 | private 57 | def build_title_table_for( records ) 58 | 59 | ## build known tracks table w/ synonyms e.g. 60 | # 61 | # [[ 'wolfsbrug', 'VfL Wolfsburg'], 62 | # [ 'augsburg', 'FC Augsburg'], 63 | # [ 'augsburg', 'Augi2'], 64 | # [ 'augsburg', 'Augi3' ], 65 | # [ 'stuttgart', 'VfB Stuttgart']] 66 | 67 | known_titles = [] 68 | 69 | records.each_with_index do |rec,index| 70 | 71 | title_candidates = [] 72 | title_candidates << rec.title 73 | 74 | title_candidates += rec.synonyms.split('|') if rec.synonyms.present? 75 | 76 | 77 | ## check if title includes subtitle e.g. Grand Prix Japan (Suzuka Circuit) 78 | # make subtitle optional by adding title w/o subtitle e.g. Grand Prix Japan 79 | 80 | titles = [] 81 | title_candidates.each do |t| 82 | titles << t 83 | if t =~ /\(.+\)/ 84 | extra_title = t.gsub( /\(.+\)/, '' ) # remove/delete subtitles 85 | # note: strip leading n trailing withspaces too! 86 | # -- todo: add squish or something if () is inline e.g. leaves two spaces? 87 | extra_title.strip! 88 | titles << extra_title 89 | end 90 | end 91 | 92 | titles.each do |t| 93 | m = MappingStruct.new 94 | m.key = rec.key 95 | m.title = t 96 | m.length = t.length 97 | ## note: escape for regex plus allow subs for special chars/accents 98 | m.pattern = TextUtils.title_esc_regex( t ) 99 | 100 | known_titles << m 101 | end 102 | 103 | logger.debug " #{rec.class.name}[#{index+1}] #{rec.key} >#{titles.join('|')}<" 104 | 105 | ## NB: only include code field - if defined 106 | if rec.respond_to?(:code) && rec.code.present? 107 | m = MappingStruct.new 108 | m.key = rec.key 109 | m.title = rec.code 110 | m.length = rec.code.length 111 | m.pattern = rec.code ## note: use code for now as is (no variants allowed fow now) 112 | 113 | known_titles << m 114 | end 115 | end 116 | 117 | ## note: sort here by length (largest goes first - best match) 118 | # exclude code and key (key should always go last) 119 | known_titles = known_titles.sort { |left,right| right.length <=> left.length } 120 | known_titles 121 | end 122 | 123 | 124 | def map_title_for!( tag, line, mappings ) 125 | 126 | downcase_tag = tag.downcase 127 | 128 | mappings.each do |mapping| 129 | 130 | key = mapping.key 131 | value = mapping.pattern 132 | ## nb: \b does NOT include space or newline for word boundry (only alphanums e.g. a-z0-9) 133 | ## (thus add it, allows match for Benfica Lis. for example - note . at the end) 134 | 135 | ## check add $ e.g. (\b| |\t|$) does this work? - check w/ Benfica Lis.$ 136 | regex = /\b#{value}(\b| |\t|$)/ # wrap with world boundry (e.g. match only whole words e.g. not wac in wacker) 137 | if line =~ regex 138 | logger.debug " match for #{downcase_tag} >#{key}< >#{value}<" 139 | # make sure @@oo{key}oo@@ doesn't match itself with other key e.g. wacker, wac, etc. 140 | line.sub!( regex, "@@oo#{key}oo@@ " ) # NB: add one space char at end 141 | return true # break out after first match (do NOT continue) 142 | end 143 | end 144 | return false 145 | end 146 | 147 | 148 | def find_key_for!( tag, line ) 149 | regex = /@@oo([^@]+?)oo@@/ # e.g. everything in @@ .... @@ (use non-greedy +? plus all chars but not @, that is [^@]) 150 | 151 | upcase_tag = tag.upcase 152 | downcase_tag = tag.downcase 153 | 154 | if line =~ regex 155 | value = "#{$1}" 156 | logger.debug " #{downcase_tag}: >#{value}<" 157 | 158 | line.sub!( regex, "[#{upcase_tag}]" ) 159 | 160 | return $1 161 | else 162 | return nil 163 | end 164 | end # method find_key_for! 165 | 166 | 167 | end # class TitleMapper2 168 | end # module TextUtils 169 | -------------------------------------------------------------------------------- /textutils/lib/textutils/title.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | 4 | ## todo: rename to TitleFinder or TitleMapper ?? 5 | # other options TitleMatcher? 6 | # TitleMapping? TitleMappings? 7 | # or rename to KeyMapping?, KeyMapper?, KeyTable? etc. 8 | 9 | 10 | ###### 11 | ## todo/check: 12 | ### remove - use TitleMapper instead 13 | ## deprecated/obsolete - do NOT use will get removed 14 | 15 | 16 | module TextUtils 17 | module TitleTable 18 | 19 | #### 20 | ## fix: turn it into a class w/ methods 21 | # 22 | #e.g t =TitleMapper.new( records, name ) # e.g. name='team' 23 | # t.map!( line ) 24 | # t.find_key!( line ) 25 | # etc. 26 | # 27 | # see textutils/title_mapper.rb 28 | # 29 | # deprecate code here!!! - move to new TitleMapper class 30 | 31 | 32 | def build_title_table_for( records ) 33 | LogUtils::Logger.root.info " build_title_table_for - deprecated API - use TitleMapper.new instead" 34 | 35 | ## build known tracks table w/ synonyms e.g. 36 | # 37 | # [[ 'wolfsbrug', [ 'VfL Wolfsburg' ]], 38 | # [ 'augsburg', [ 'FC Augsburg', 'Augi2', 'Augi3' ]], 39 | # [ 'stuttgart', [ 'VfB Stuttgart' ]] ] 40 | 41 | known_titles = [] 42 | 43 | records.each_with_index do |rec,index| 44 | 45 | title_candidates = [] 46 | title_candidates << rec.title 47 | 48 | title_candidates += rec.synonyms.split('|') if rec.synonyms.present? 49 | 50 | 51 | ## check if title includes subtitle e.g. Grand Prix Japan (Suzuka Circuit) 52 | # make subtitle optional by adding title w/o subtitle e.g. Grand Prix Japan 53 | 54 | titles = [] 55 | title_candidates.each do |t| 56 | titles << t 57 | if t =~ /\(.+\)/ 58 | extra_title = t.gsub( /\(.+\)/, '' ) # remove/delete subtitles 59 | extra_title.strip! # strip leading n trailing withspaces too! 60 | titles << extra_title 61 | end 62 | end 63 | 64 | 65 | ## NB: sort here by length (largest goes first - best match) 66 | # exclude code and key (key should always go last) 67 | titles = titles.sort { |left,right| right.length <=> left.length } 68 | 69 | ## escape for regex plus allow subs for special chars/accents 70 | titles = titles.map { |title| TextUtils.title_esc_regex( title ) } 71 | 72 | ## NB: only include code field - if defined 73 | titles << rec.code if rec.respond_to?(:code) && rec.code.present? 74 | 75 | known_titles << [ rec.key, titles ] 76 | 77 | ### fix: use plain logger 78 | LogUtils::Logger.root.debug " #{rec.class.name}[#{index+1}] #{rec.key} >#{titles.join('|')}<" 79 | end 80 | 81 | known_titles 82 | end 83 | 84 | 85 | 86 | def find_key_for!( name, line ) 87 | LogUtils::Logger.root.info " find_key_for! #{name} - deprecated API - use TitleMapper.find_key! instead" 88 | 89 | regex = /@@oo([^@]+?)oo@@/ # e.g. everything in @@ .... @@ (use non-greedy +? plus all chars but not @, that is [^@]) 90 | 91 | upcase_name = name.upcase 92 | downcase_name = name.downcase 93 | 94 | if line =~ regex 95 | value = "#{$1}" 96 | ### fix: use plain logger 97 | LogUtils::Logger.root.debug " #{downcase_name}: >#{value}<" 98 | 99 | line.sub!( regex, "[#{upcase_name}]" ) 100 | 101 | return $1 102 | else 103 | return nil 104 | end 105 | end 106 | 107 | 108 | def find_keys_for!( name, line ) # NB: keys (plural!) - will return array 109 | LogUtils::Logger.root.info " find_keys_for! #{name} - deprecated API - use TitleMapper.find_keys! instead" 110 | 111 | counter = 1 112 | keys = [] 113 | 114 | downcase_name = name.downcase 115 | 116 | key = find_key_for!( "#{downcase_name}#{counter}", line ) 117 | while key.present? 118 | keys << key 119 | counter += 1 120 | key = find_key_for!( "#{downcase_name}#{counter}", line ) 121 | end 122 | 123 | keys 124 | end 125 | 126 | 127 | def map_titles_for!( name, line, title_table ) 128 | LogUtils::Logger.root.info " map_titles_for! #{name} - deprecated API - use TitleMapper.map_titles! instead" 129 | 130 | title_table.each do |rec| 131 | key = rec[0] 132 | values = rec[1] 133 | map_title_worker_for!( name, line, key, values ) 134 | end 135 | end 136 | 137 | 138 | def map_title_worker_for!( name, line, key, values ) 139 | 140 | downcase_name = name.downcase 141 | 142 | values.each do |value| 143 | ## nb: \b does NOT include space or newline for word boundry (only alphanums e.g. a-z0-9) 144 | ## (thus add it, allows match for Benfica Lis. for example - note . at the end) 145 | 146 | ## check add $ e.g. (\b| |\t|$) does this work? - check w/ Benfica Lis.$ 147 | regex = /\b#{value}(\b| |\t|$)/ # wrap with world boundry (e.g. match only whole words e.g. not wac in wacker) 148 | if line =~ regex 149 | ### fix: use plain logger 150 | LogUtils::Logger.root.debug " match for #{downcase_name} >#{key}< >#{value}<" 151 | # make sure @@oo{key}oo@@ doesn't match itself with other key e.g. wacker, wac, etc. 152 | line.sub!( regex, "@@oo#{key}oo@@ " ) # NB: add one space char at end 153 | return true # break out after first match (do NOT continue) 154 | end 155 | end 156 | return false 157 | end 158 | 159 | end # module TitleTable 160 | end # module TextUtils 161 | 162 | 163 | ## auto-include methods 164 | 165 | module TextUtils 166 | # make helpers available as class methods e.g. TextUtils.convert_unicode_dashes_to_plain_ascii 167 | extend TitleTable # lets us use TextUtils.build_title_table_for etc. 168 | end 169 | 170 | 171 | -------------------------------------------------------------------------------- /textutils/lib/textutils/helper/title_helper.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | 4 | ### 5 | # 6 | # fix: move to filter!!!! 7 | # follows fn( content ) pattern!!! 8 | 9 | 10 | module TextUtils 11 | module TitleHelper 12 | 13 | #### 14 | # - todo: use new additional sub module ??? 15 | # e.g. TextUtils::Reader::TagHelper 16 | # lets us use "classic" web helpers a la rails 17 | # find a good name for sub module - Reader? Fixtures? Values? Parser? 18 | 19 | def strip_part_markers( title ) # use different name e.g. strip_name_markers/strip_name_enclosure etc.?? 20 | # remove optional part markers 21 | # e.g. Bock ‹Damm› becomes => Bock Damm 22 | # ‹Estrella› ‹Damm› Inedit becomes => Estrella Damm Inedit 23 | 24 | title.gsub( /[<>‹›]/, '' ) 25 | end 26 | 27 | def strip_translations( title ) 28 | # remove optional english translation in square brackets ([]) 29 | # e.g. Wien [Vienna] => Wien 30 | 31 | title.gsub( /\[[^\]]+\]/, '' ) 32 | end 33 | 34 | def strip_subtitles( title ) 35 | # remove optional longer title part in () 36 | # e.g. Las Palmas (de Gran Canaria) => Las Palmas 37 | # Palma (de Mallorca) => Palma 38 | 39 | title.gsub( /\([^\)]+\)/, '' ) 40 | end 41 | 42 | def strip_tags( title ) # todo: use an alias or rename for better name ?? 43 | # remove optional longer title part in {} 44 | # e.g. Ottakringer {Bio} => Ottakringer 45 | # Ottakringer {Alkoholfrei} => Ottakringer 46 | # 47 | # todo: use for autotags? e.g. {Bio} => bio 48 | 49 | title.gsub( /\{[^\}]+\}/, '' ) 50 | end 51 | 52 | def strip_whitespaces( title ) 53 | # remove all whitespace and punctuation 54 | title.gsub( /[ \t_\-\.!()\[\]'"’\/]/, '' ) 55 | end 56 | 57 | def strip_special_chars( title ) 58 | # remove special chars (e.g. %°&$) 59 | # e.g. +Malta 60 | # Minerva 8:60 61 | # $Alianz$ Arena 62 | title.gsub( /[%&°+:$]/, '' ) 63 | end 64 | 65 | def title_to_key( title ) 66 | 67 | ## NB: used in/moved from readers/values_reader.rb 68 | 69 | ## NB: downcase does NOT work for accented chars (thus, include in alternatives) 70 | key = title.downcase 71 | 72 | key = strip_part_markers( key ) # e.g. ‹Estrella› ‹Damm› Inedit becomes => Estrella Damm Inedit 73 | 74 | key = strip_translations( key ) 75 | 76 | key = strip_subtitles( key ) 77 | 78 | key = strip_tags( key ) 79 | 80 | key = strip_whitespaces( key ) 81 | 82 | key = strip_special_chars( key ) 83 | 84 | key = TextUtils.asciify( key ).downcase ## see filter/string_filter 85 | 86 | key 87 | end # method title_to_key 88 | 89 | 90 | def title_esc_regex( title_unescaped ) 91 | 92 | ## escape regex special chars e.g. 93 | # . to \. and 94 | # ( to \( 95 | # ) to \) 96 | # ? to \? -- zero or one 97 | # * to \* -- zero or more 98 | # + to \+ -- one or more 99 | # $ to \$ -- end of line 100 | # ^ to \^ -- start of line etc. 101 | 102 | ### add { and } ??? 103 | ### add [ and ] ??? 104 | ### add \ too ??? 105 | ### add | too ??? 106 | 107 | # e.g. Benfica Lis. 108 | # e.g. Club Atlético Colón (Santa Fe) 109 | # e.g. Bauer Anton (????) 110 | 111 | ## NB: cannot use Regexp.escape! will escape space '' to '\ ' 112 | ## title = Regexp.escape( title_unescaped ) 113 | title = title_unescaped.gsub( '.', '\.' ) 114 | title = title.gsub( '(', '\(' ) 115 | title = title.gsub( ')', '\)' ) 116 | title = title.gsub( '?', '\?' ) 117 | title = title.gsub( '*', '\*' ) 118 | title = title.gsub( '+', '\+' ) 119 | title = title.gsub( '$', '\$' ) 120 | title = title.gsub( '^', '\^' ) 121 | 122 | ## match accented char with or without accents 123 | ## add (ü|ue) etc. 124 | ## also make - optional change to (-| ) e.g. Blau-Weiss == Blau Weiss 125 | 126 | ## todo: add some more 127 | ## see http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references for more 128 | ## 129 | ## reuse for all readers! 130 | 131 | alternatives = [ 132 | ['-', '(-| )'], ## e.g. Blau-Weiß Linz 133 | ['æ', '(æ|ae)'], ## e.g. 134 | ['ä', '(ä|ae)'], ## e.g. 135 | ['Ö', '(Ö|Oe)'], ## e.g. Österreich 136 | ['ö', '(ö|oe)'], ## e.g. Mönchengladbach 137 | ['ß', '(ß|ss)'], ## e.g. Blau-Weiß Linz 138 | ['ü', '(ü|ue)'], ## e.g. 139 | 140 | ['á', '(á|a)'], ## e.g. Bogotá, Sársfield 141 | ['ã', '(ã|a)'], ## e.g São Paulo 142 | ['ç', '(ç|c)'], ## e.g. Fenerbahçe 143 | ['é', '(é|e)'], ## e.g. Vélez 144 | ['ê', '(ê|e)'], ## e.g. Grêmio 145 | ['ï', '(ï|i)' ], ## e.g. El Djazaïr 146 | ['ñ', '(ñ|n)'], ## e.g. Porteño 147 | ['ň', '(ň|n)'], ## e.g. Plzeň 148 | ['ó', '(ó|o)'], ## e.g. Colón 149 | ['ō', '(ō|o)'], # # e.g. Tōkyō 150 | ['ș', '(ș|s)'], ## e.g. Bucarești 151 | ['ú', '(ú|u)'] ## e.g. Fútbol 152 | ] 153 | 154 | ### fix/todo: check for dot+space e.g. . and make dot optional 155 | ## 156 | # e.g. make dot (.) optional plus allow alternative optional space e.g. 157 | # -- for U.S.A. => allow USA or U S A 158 | # 159 | ## e.g. U. de G. or U de G or U.de G. ?? 160 | ## collect some more (real-world) examples first!!!!! 161 | 162 | alternatives.each do |alt| 163 | title = title.gsub( alt[0], alt[1] ) 164 | end 165 | 166 | title 167 | end 168 | 169 | 170 | end # module TitleHelper 171 | end # module TextUtils 172 | -------------------------------------------------------------------------------- /textutils/lib/textutils/helper/address_helper.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | 4 | module TextUtils 5 | module AddressHelper 6 | 7 | def normalize_addr( old_address, country_key=nil ) 8 | 9 | # for now only checks german (de) 5-digit zip code and 10 | # austrian (at) 4-digit zip code 11 | # 12 | # e.g. Alte Plauener Straße 24 // 95028 Hof becomes 13 | # 95028 Hof // Alte Plauener Straße 24 14 | 15 | if country_key.nil? 16 | puts "TextUtils.normalize_addr drepreciated call - country_key now required; please add !!" 17 | return old_address 18 | end 19 | 20 | new_address = old_address # default - do nothing - just path through 21 | 22 | lines = old_address.split( '//' ) 23 | 24 | if lines.size == 2 # two lines / check for switching lines 25 | 26 | line1 = lines[0].strip 27 | line2 = lines[1].strip 28 | 29 | regex_nnnn = /^[0-9]{4}\s+/ # four digits postal code 30 | regex_nnnnn = /^[0-9]{5}\s+/ # five digits postal code 31 | 32 | if (country_key == 'at' && line2 =~ regex_nnnn ) || 33 | (country_key == 'de' && line2 =~ regex_nnnnn ) 34 | new_address = "#{line2} // #{line1}" 35 | end 36 | end 37 | 38 | new_address 39 | end 40 | 41 | 42 | def find_city_in_addr_without_postal_code( address ) 43 | 44 | ## general rule; not country-specific; no postal code/zip code or state 45 | # - must be like two lines (one line empty) e.g. 46 | # // London or 47 | # London // 48 | # will assume entry is city 49 | # note: city may NOT include numbers, or pipe (|) or comma (,) chars 50 | 51 | # fix: use blank? 52 | return nil if address.nil? || address.empty? # do NOT process nil or empty address lines; sorry 53 | 54 | old_lines = address.split( '//' ) 55 | 56 | ### 57 | # note: London // will get split into arry with size 1 e.g. ['London '] 58 | # support it, that is, add missing empty line 59 | 60 | # 1) strip lines 61 | # 2) remove blank lines 62 | lines = [] 63 | 64 | old_lines.each do |line| 65 | linec = line.strip 66 | next if linec.empty? 67 | lines << linec 68 | end 69 | 70 | if lines.size == 1 71 | linec = lines[0] 72 | # note: city may NOT include 73 | # numbers (e.g. assumes zip/postal code etc.) or 74 | # pipe (|) or 75 | # comma (,) 76 | if linec =~ /[0-9|,]/ 77 | return nil 78 | end 79 | # more than two uppercase letters e.g. TX NY etc. 80 | # check if city exists wit tow uppercase letters?? 81 | if linec =~ /[A-Z]{2,}/ 82 | return nil 83 | end 84 | return linec # bingo!!! assume candidate line is a city name 85 | end 86 | 87 | nil # no generic city match found 88 | end 89 | 90 | 91 | def find_city_in_addr_with_postal_code( address, country_key ) 92 | 93 | # fix: use blank? 94 | return nil if address.nil? || address.empty? # do NOT process nil or empty address lines; sorry 95 | 96 | lines = address.split( '//' ) 97 | 98 | if country_key == 'at' || country_key == 'be' 99 | # support for now 100 | # - 2018 Antwerpen or 2870 Breendonk-Puurs (be) 101 | lines.each do |line| 102 | linec = line.strip 103 | regex_nnnn = /^[0-9]{4}\s+/ 104 | if linec =~ regex_nnnn # must start w/ four digit postal code ? assume its the city line 105 | return linec.sub( regex_nnnn, '' ) # cut off leading postal code; assume rest is city 106 | end 107 | end 108 | elsif country_key == 'de' 109 | lines.each do |line| 110 | linec = line.strip 111 | regex_nnnnn = /^[0-9]{5}\s+/ 112 | if linec =~ regex_nnnnn # must start w/ five digit postal code ? assume its the city line 113 | return linec.sub( regex_nnnnn, '' ) # cut off leading postal code; assume rest is city 114 | end 115 | end 116 | elsif country_key == 'cz' || country_key == 'sk' 117 | # support for now 118 | # - 284 15 Kutná Hora or 288 25 Nymburk (cz) 119 | # - 036 42 Martin or 974 05 Banská Bystrica (sk) 120 | lines.each do |line| 121 | linec = line.strip 122 | regex_nnn_nn = /^[0-9]{3}\s[0-9]{2}\s+/ 123 | if linec =~ regex_nnn_nn # must start w/ five digit postal code ? assume its the city line 124 | return linec.sub( regex_nnn_nn, '' ) # cut off leading postal code; assume rest is city 125 | end 126 | end 127 | elsif country_key == 'us' 128 | # support for now 129 | # - Brooklyn | NY 11249 or Brooklyn, NY 11249 130 | # - Brooklyn | NY or Brooklyn, NY 131 | 132 | lines.each do |line| 133 | linec = line.strip 134 | regexes_us = [/\s*[|,]\s+[A-Z]{2}\s+[0-9]{5}\s*$/, 135 | /\s*[|,]\s+[A-Z]{2}\s*$/] 136 | 137 | regexes_us.each do |regex| 138 | if linec =~ regex 139 | return linec.sub( regex, '' ) # cut off leading postal code; assume rest is city 140 | end 141 | end 142 | end 143 | else 144 | # unsupported country/address schema for now; sorry 145 | end 146 | return nil # sorry nothing found 147 | end 148 | 149 | 150 | def find_city_in_addr( address, country_key ) 151 | 152 | # fix: use blank? 153 | return nil if address.nil? || address.empty? # do NOT process nil or empty address lines; sorry 154 | 155 | ## try geneneric rule first (e.g. w/o postal code/zip code or state), see above 156 | city = find_city_in_addr_without_postal_code( address ) 157 | return city unless city.nil? 158 | 159 | city = find_city_in_addr_with_postal_code( address, country_key ) 160 | return city unless city.nil? 161 | 162 | nil # sorry; no city found (using known patterns) 163 | end 164 | 165 | 166 | end # module AddressHelper 167 | end # module TextUtils 168 | -------------------------------------------------------------------------------- /textutils/lib/textutils/helper/hypertext_helper.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module TextUtils 4 | module HypertextHelper 5 | 6 | 7 | def strip_tags( ht ) 8 | ### to be done 9 | ## strip markup tags; return plain text; use brute force for now 10 | # check at least for presence of required a-z+ tag names 11 | # 12 | # note: make sure we cover h1/h2/h3/h4/h5/h6 tag w/ number!! 13 | 14 | ### ht.gsub( /<[^>]+>/, '' ) - old simple 15 | 16 | ## todo: add strip comments e.g. ??? 17 | ## or use new strip_comments( ht ) 18 | 19 | 20 | ## note: follow offical xml spec 21 | ## - allows for first char: (Letter | '_' | ':') 22 | ## - allows for followup chars: (Letter | Digit | '_' | ':' | '.' | '-') 23 | 24 | tag_name_pattern = "[a-z_:][a-z0-9_:.\\-]*" 25 | 26 | empty_tag_pattern = "<#{tag_name_pattern}\\s*/>" 27 | opening_tag_pattern = "<#{tag_name_pattern}(\\s+[^>]*)?>" 28 | closing_tag_pattern = "" 29 | 30 | ht = ht.gsub( /#{empty_tag_pattern}/i, '' ) # remove xml-style empty tags eg.
or
31 | ht = ht.gsub( /#{opening_tag_pattern}/i, '' ) # opening tag

32 | ht = ht.gsub( /#{closing_tag_pattern}/i, '' ) # closing tag e.g.

33 | ht 34 | end 35 | 36 | 37 | def whitelist( ht, tags, opts={} ) 38 | 39 | # note: assumes properly escaped <> in ht/hypertext 40 | 41 | ############################################### 42 | # step one - save whitelisted tags use ‹tag› 43 | tags.each do |tag| 44 | # note: we strip all attribues 45 | # note: match all tags case insensitive e.g. allow a,A or br,BR,bR etc. 46 | # downcase all tags 47 | 48 | # convert xml-style empty tags to simple html emtpty tags 49 | # e.g.
or
becomses
50 | ht = ht.gsub( /<(#{tag})\s*\/>/i ) { |_| "‹#{$1.downcase}›" } # eg.
or
becomes ‹br› 51 | 52 | # make sure we won't swall
for for example, thus use \s+ before [^>] 53 | ht = ht.gsub( /<(#{tag})(\s+[^>]*)?>/i ) { |_| "‹#{$1.downcase}›" } # opening tag

54 | ht = ht.gsub( /<\/(#{tag})\s*>/i ) { |_| "‹/#{$1.downcase}›" } # closing tag e.g.

55 | end 56 | 57 | ############################ 58 | # step two - clean tags 59 | 60 | # strip images - special treatment for debugging 61 | ht = ht.gsub( /]*>/i, '♦' ) # for debugging use black diamond e.g. ♦ 62 | ht = ht.gsub( /<\/img>/i, '' ) # should not exists 63 | 64 | # strip all remaining tags 65 | # -- note: will NOT strip comments for now e.g. 66 | ht = strip_tags( ht ) 67 | 68 | ## pp ht # fix: debugging indo - remove 69 | 70 | ############################################ 71 | # step three - restore whitelisted tags 72 | 73 | return ht if opts[:skip_restore] # skip step 3 for debugging 74 | 75 | tags.each do |tag| 76 | # ht = ht.gsub( /‹(#{tag})›/, "<\1>" ) # opening tag e.g.

77 | # ht = ht.gsub( /‹\/(#{tag})›/, "<\/\1>" ) # closing tag e.g.

78 | ht = ht.gsub( /‹(#{tag})›/ ) { |_| "<#{$1}>" } 79 | ht = ht.gsub( /‹\/(#{tag})›/ ) { |_| "<\/#{$1}>" } # closing tag e.g.

80 | end 81 | 82 | ht 83 | end # method whitelist 84 | 85 | 86 | 87 | 88 | ## change to simple_hypertext or 89 | # hypertext_simple or 90 | # sanitize ??? 91 | 92 | def sanitize( ht, opts={} ) # ht -> hypertext 93 | # todo: add options for 94 | # keep links, images, lists (?too), code, codeblocks 95 | 96 | ht = whitelist( ht, [:br, :p, :ul, :ol, :li, :pre, :code, :blockquote, :q, :cite], opts ) 97 | 98 | # clean (prettify) literal urls (strip protocoll) 99 | ht = ht.gsub( /(http|https):\/\//, '' ) 100 | ht 101 | end 102 | 103 | 104 | def textify( ht, opts={} ) # ht -> hypertext 105 | ## turn into plain (or markdown/wiki-style) text - to be done 106 | 107 | sanitize( ht, opts ) # step 1 - sanitize html 108 | # to be done 109 | 110 | # strip bold 111 | # ht = ht.gsub( /]*>/, '**' ) # fix: will also swallow bxxx tags - add b space 112 | # ht = ht.gsub( /<\/b>/, '**' ) 113 | 114 | # strip em 115 | # ht = ht.gsub( /]*>/, '__' ) 116 | # ht = ht.gsub( /<\/em>/, '__' ) 117 | 118 | # ht = ht.gsub( / /, ' ' ) 119 | 120 | # # try to cleanup whitespaces 121 | # # -- keep no more than two spaces 122 | # ht = ht.gsub( /[ \t]{3,}/, ' ' ) 123 | # # -- keep no more than two new lines 124 | # ht = ht.gsub( /\n{2,}/m, "\n\n" ) 125 | # # -- remove all trailing spaces 126 | # ht = ht.gsub( /[ \t\n]+$/m, '' ) 127 | # # -- remove all leading spaces 128 | # ht = ht.gsub( /^[ \t\n]+/m, '' ) 129 | end 130 | 131 | 132 | ############################## 133 | # rails-style asset, url tag helpers and friends 134 | # 135 | # todo: move into different helper module/modules?? why? why not? 136 | 137 | def tag( tag, opts={} ) # empty tag (no content e.g.
, etc.) 138 | attribs = [] 139 | opts.each do |key,value| 140 | attribs << "#{key}='#{value}'" 141 | end 142 | 143 | if attribs.size > 0 144 | "<#{tag} #{attribs.join(' ')}>" 145 | else 146 | "<#{tag}>" 147 | end 148 | end 149 | 150 | def content_tag( tag, content, opts={} ) # content tag (e.g.

hello

- w/ opening and closing tag) 151 | attribs = [] 152 | opts.each do |key,value| 153 | attribs << "#{key}='#{value}'" 154 | end 155 | 156 | if attribs.size > 0 157 | "<#{tag} #{attribs.join(' ')}>#{content}" 158 | else 159 | "<#{tag}>#{content}" 160 | end 161 | end 162 | 163 | 164 | def stylesheet_link_tag( href, opts={} ) 165 | href = "#{href}.css" unless href.end_with?( '.css' ) # auto-add .css if not present 166 | attribs = { rel: 'stylesheet', 167 | type: 'text/css', 168 | href: href } 169 | attribs = attribs.merge( opts ) ### fix/todo: use reverse merge e.g. overwrite only if not present 170 | tag( :link, attribs ) 171 | end 172 | 173 | def image_tag( src, opts={} ) 174 | attribs = { src: src } 175 | attribs = attribs.merge( opts ) ### fix/todo: use reverse merge e.g. overwrite only if not present 176 | tag( :img, attribs ) ### "" 177 | end 178 | 179 | def link_to( content, href, opts={} ) 180 | attribs = { href: href } 181 | attribs = attribs.merge( opts ) ### fix/todo: use reverse merge e.g. overwrite only if not present 182 | content_tag( :a, content, attribs ) ### "#{text}" 183 | end 184 | 185 | 186 | end # module HypertextHelper 187 | end # module TextUtils 188 | --------------------------------------------------------------------------------