├── pakman
    ├── test
    │   ├── pages
    │   │   ├── empty.txt
    │   │   ├── text.txt
    │   │   ├── page2.txt
    │   │   ├── page3.txt
    │   │   └── page1.txt
    │   ├── liquid
    │   │   ├── pak
    │   │   │   ├── hello.txt
    │   │   │   ├── test.txt
    │   │   │   ├── s9logo.png
    │   │   │   ├── hello.doc
    │   │   │   ├── testbin.txt
    │   │   │   └── test.html
    │   │   └── test.html
    │   ├── erb
    │   │   └── pak
    │   │   │   ├── test.txt
    │   │   │   └── test.html.erb
    │   ├── helper.rb
    │   ├── data
    │   │   └── test.yml
    │   ├── test_page.rb
    │   ├── test_erb.rb
    │   ├── test_liquid_drops.rb
    │   ├── test_liquid_binaries.rb
    │   └── test_liquid.rb
    ├── History.md
    ├── lib
    │   ├── pakman
    │   │   ├── version.rb
    │   │   ├── utils.rb
    │   │   ├── erb
    │   │   │   ├── template.rb
    │   │   │   └── templater.rb
    │   │   ├── cli
    │   │   │   ├── commands
    │   │   │   │   ├── fetch.rb
    │   │   │   │   ├── list.rb
    │   │   │   │   └── gen.rb
    │   │   │   ├── ctx.rb
    │   │   │   ├── helpers.rb
    │   │   │   ├── opts.rb
    │   │   │   └── runner.rb
    │   │   ├── copier.rb
    │   │   ├── finder.rb
    │   │   ├── page.rb
    │   │   ├── liquid
    │   │   │   ├── template.rb
    │   │   │   └── templater.rb
    │   │   ├── manifest.rb
    │   │   └── fetcher.rb
    │   └── pakman.rb
    ├── .gitignore
    ├── bin
    │   └── pakman
    ├── TODOS.md
    ├── Rakefile
    ├── Manifest.txt
    └── README.md
├── linkto
    ├── NOTES.md
    ├── HISTORY.md
    ├── lib
    │   ├── linkto
    │   │   ├── version.rb
    │   │   ├── bing.rb
    │   │   ├── untappd.rb
    │   │   ├── wikipedia.rb
    │   │   ├── google.rb
    │   │   └── flickr.rb
    │   └── linkto.rb
    ├── .gitignore
    ├── Manifest.txt
    ├── test
    │   ├── test_wikipedia.rb
    │   ├── helper.rb
    │   └── test_google.rb
    ├── Rakefile
    └── README.md
├── textutils-more
    ├── README.md
    ├── .gitignore
    └── lib
    │   └── textutils
    │       ├── reader
    │           └── markdown_reader.rb
    │       └── table
    │           └── table_reader.rb
├── textutils
    ├── HISTORY.md
    ├── TODO.md
    ├── test
    │   ├── data
    │   │   ├── de-deutschland
    │   │   │   ├── orte.txt
    │   │   │   └── 3--by-bayern
    │   │   │   │   └── 4--oberfranken
    │   │   │   │       ├── orte_ii.txt
    │   │   │   │       └── orte.txt
    │   │   ├── feedburner.txt
    │   │   └── cl_all.txt
    │   ├── helper.rb
    │   ├── test_tree_reader_ii.rb
    │   ├── test_unicode_helper.rb
    │   ├── test_fixture_reader.rb
    │   ├── test_taglist.rb
    │   ├── test_tree_reader.rb
    │   ├── test_block_reader.rb
    │   ├── test_title_mapper2.rb
    │   ├── test_slugify.rb
    │   ├── test_asciify.rb
    │   ├── test_title_mapper.rb
    │   ├── test_title_finder.rb
    │   ├── test_title_helper.rb
    │   ├── test_address_helper.rb
    │   └── test_hypertext_helper.rb
    ├── lib
    │   ├── textutils
    │   │   ├── filter
    │   │   │   ├── erb_filter.rb
    │   │   │   ├── code_filter.rb
    │   │   │   ├── comment_filter.rb
    │   │   │   └── erb_django_filter.rb
    │   │   ├── core_ext
    │   │   │   ├── time.rb
    │   │   │   ├── file.rb
    │   │   │   └── array.rb
    │   │   ├── version.rb
    │   │   ├── helper
    │   │   │   ├── xml_helper.rb
    │   │   │   ├── tag_helper.rb
    │   │   │   ├── unicode_helper.rb
    │   │   │   ├── date_helper.rb
    │   │   │   ├── value_helper_iii_numbers.rb
    │   │   │   ├── value_helper_ii.rb
    │   │   │   ├── value_helper_i.rb
    │   │   │   ├── title_helper.rb
    │   │   │   ├── address_helper.rb
    │   │   │   └── hypertext_helper.rb
    │   │   ├── reader
    │   │   │   ├── code_reader.rb
    │   │   │   ├── block_reader.rb
    │   │   │   ├── line_reader.rb
    │   │   │   ├── fixture_reader.rb
    │   │   │   ├── tree_reader.rb
    │   │   │   └── hash_reader.rb
    │   │   ├── utils.rb
    │   │   ├── sanitizier.rb
    │   │   ├── parser
    │   │   │   ├── name_tokenizer.rb
    │   │   │   └── name_parser.rb
    │   │   ├── patterns.rb
    │   │   ├── classifier.rb
    │   │   ├── title_mapper.rb
    │   │   ├── page.rb
    │   │   ├── title_mapper2.rb
    │   │   └── title.rb
    │   └── textutils.rb
    ├── .gitignore
    ├── Rakefile
    ├── Manifest.txt
    └── README.md
├── README.md
├── attic
    ├── fixture_reader.rb
    ├── line_reader_v2.rb
    ├── values_reader_v2.rb
    ├── hash_reader_v2.rb
    └── values_reader.rb
└── NOTES.md


/pakman/test/pages/empty.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/linkto/NOTES.md:
--------------------------------------------------------------------------------
1 | # Notes n Tips
2 | 
3 | 


--------------------------------------------------------------------------------
/textutils-more/README.md:
--------------------------------------------------------------------------------
1 | # textutils-more
2 | 
3 | 
4 | 


--------------------------------------------------------------------------------
/pakman/test/pages/text.txt:
--------------------------------------------------------------------------------
1 | just some text
2 | no headers
3 | 
4 | 


--------------------------------------------------------------------------------
/pakman/test/liquid/pak/hello.txt:
--------------------------------------------------------------------------------
1 | 
2 | just some text
3 | no front matter
4 | 


--------------------------------------------------------------------------------
/pakman/test/pages/page2.txt:
--------------------------------------------------------------------------------
1 | ---
2 | ---
3 | 
4 | try empty front matter
5 | 
6 | 


--------------------------------------------------------------------------------
/pakman/test/pages/page3.txt:
--------------------------------------------------------------------------------
1 | ---
2 | # try empty front matter with comments
3 | ---


--------------------------------------------------------------------------------
/pakman/History.md:
--------------------------------------------------------------------------------
1 | ## 0.0.1 / 2012-07-17
2 | 
3 | * Everything is new. First release
4 | 


--------------------------------------------------------------------------------
/pakman/test/pages/page1.txt:
--------------------------------------------------------------------------------
1 | ---
2 | title: hello
3 | ---
4 | 
5 | some text here
6 | 


--------------------------------------------------------------------------------
/textutils/HISTORY.md:
--------------------------------------------------------------------------------
1 | ### 0.1.0 / 2012-06-09
2 | 
3 | * Everything is new. First release


--------------------------------------------------------------------------------
/linkto/HISTORY.md:
--------------------------------------------------------------------------------
1 | ### 0.0.1 / 2014-03-15
2 | 
3 | * Everything is new. First release.
4 | 
5 | 


--------------------------------------------------------------------------------
/linkto/lib/linkto/version.rb:
--------------------------------------------------------------------------------
1 | 
2 | module Linkto
3 |    VERSION = '0.1.1'
4 | end
5 | 
6 | 
7 | 


--------------------------------------------------------------------------------
/pakman/test/erb/pak/test.txt:
--------------------------------------------------------------------------------
1 | ######
2 | # simple test manifest
3 | 
4 | __file__.html test.html.erb


--------------------------------------------------------------------------------
/pakman/test/liquid/pak/test.txt:
--------------------------------------------------------------------------------
1 | ######
2 | # simple test manifest
3 | 
4 | __file__.html test.html
5 | 


--------------------------------------------------------------------------------
/pakman/lib/pakman/version.rb:
--------------------------------------------------------------------------------
1 | # encoding: utf-8
2 | 
3 | module Pakman
4 |   VERSION = '1.1.0'
5 | end
6 | 


--------------------------------------------------------------------------------
/pakman/test/liquid/pak/s9logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rubycocos/text/master/pakman/test/liquid/pak/s9logo.png


--------------------------------------------------------------------------------
/pakman/.gitignore:
--------------------------------------------------------------------------------
1 | # ignore generated folders 
2 | pkg/
3 | doc/
4 | tmp/
5 | 
6 | # ignore jekyll generated output
7 | site/_site/
8 | 
9 | 


--------------------------------------------------------------------------------
/pakman/test/helper.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | # minitest setup
 4 | require 'minitest/autorun'
 5 | 
 6 | 
 7 | ## our own code
 8 | require 'pakman'
 9 | 
10 | 


--------------------------------------------------------------------------------
/textutils/TODO.md:
--------------------------------------------------------------------------------
1 | # TODOs
2 | 
3 | - [ ] add line number to unicode dash warning e.g. *** warning: found ndash U+2013 (-) in file >at-austria/2013_14/cup.txt<; converting to plain ascii hyphen_minus (-)
4 | 
5 | 


--------------------------------------------------------------------------------
/pakman/test/liquid/pak/hello.doc:
--------------------------------------------------------------------------------
1 | ---
2 | front matter here
3 | ---
4 | 
5 | try "unkown extension"
6 | just some text here
7 | 
8 | note: front matter will not matter, that is, will get ignored (e.g. not checked)
9 | 


--------------------------------------------------------------------------------
/pakman/test/liquid/pak/testbin.txt:
--------------------------------------------------------------------------------
1 | ######
2 | # test manifest with binary files e.g. graphics
3 | #   and "unknown" extensions (will get handled like binary e.g. copied 1:1)
4 | 
5 | s9logo.png
6 | hello.txt
7 | hello.doc
8 | 


--------------------------------------------------------------------------------
/linkto/.gitignore:
--------------------------------------------------------------------------------
 1 | *.gem
 2 | *.rbc
 3 | .bundle
 4 | .config
 5 | coverage
 6 | InstalledFiles
 7 | lib/bundler/man
 8 | pkg
 9 | rdoc
10 | spec/reports
11 | test/tmp
12 | test/version_tmp
13 | tmp
14 | 
15 | # YARD artifacts
16 | .yardoc
17 | _yardoc
18 | doc/
19 | 


--------------------------------------------------------------------------------
/pakman/test/data/test.yml:
--------------------------------------------------------------------------------
 1 | headers:
 2 |   title: test title
 3 |   author: test author
 4 | 
 5 | 
 6 | slides:
 7 |   - header:  test header 1
 8 |     content: test content 1
 9 |   - header:  test header 2
10 |     content: test content 2
11 |   - content: test content 3
12 | 
13 | 


--------------------------------------------------------------------------------
/textutils/test/data/de-deutschland/orte.txt:
--------------------------------------------------------------------------------
 1 | 2     Bayern
 2 | 24    .. Oberfranken
 3 | 241   .... Bamberg (Stadt)     ## Kreisfreie Stadt
 4 |       ...... Bamberg
 5 |       ........ Bamberg
 6 | 
 7 | #####
 8 | # todo: for testing add berlin and some more
 9 | 
10 | 9     Berlin
11 | 91    .. Berlin
12 | 
13 | 


--------------------------------------------------------------------------------
/pakman/bin/pakman:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | 
 3 | ###################
 4 | # == DEV TIPS:
 5 | #
 6 | # For local testing run like:
 7 | #
 8 | #    ruby -Ilib bin/pakman
 9 | #
10 | # Set the executable bit in Linux. Example:
11 | #
12 | #    % chmod a+x bin/pakman
13 | #
14 | 
15 | require 'pakman'
16 | 
17 | Pakman.main
18 | 


--------------------------------------------------------------------------------
/linkto/Manifest.txt:
--------------------------------------------------------------------------------
 1 | HISTORY.md
 2 | Manifest.txt
 3 | README.md
 4 | Rakefile
 5 | lib/linkto.rb
 6 | lib/linkto/bing.rb
 7 | lib/linkto/flickr.rb
 8 | lib/linkto/google.rb
 9 | lib/linkto/untappd.rb
10 | lib/linkto/version.rb
11 | lib/linkto/wikipedia.rb
12 | test/helper.rb
13 | test/test_google.rb
14 | test/test_wikipedia.rb
15 | 


--------------------------------------------------------------------------------
/textutils/test/helper.rb:
--------------------------------------------------------------------------------
 1 | 
 2 | ## $:.unshift(File.dirname(__FILE__))
 3 | 
 4 | ## minitest setup
 5 | 
 6 | require 'minitest/autorun'
 7 | 
 8 | 
 9 | ## make sure activesupport gets included/required
10 | # note: just activesupport or active_support will NOT work
11 | # require 'active_support/all'  # -- now included in textutils itself
12 | 
13 | 
14 | ## our own code
15 | 
16 | require 'textutils'
17 | 


--------------------------------------------------------------------------------
/pakman/lib/pakman/utils.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module Pakman
 4 | 
 5 | 
 6 |   # downcase and remove .txt (if anywhere in name)
 7 |   # e.g. welcome.quick.txt becomes welcome.quick
 8 |   #      welcome.txt.quick becomse welcome.quick
 9 |   #      s6blank.txt becomes s6blank
10 | 
11 |   def self.pakname_from_file( path )
12 |     File.basename( path ).downcase.gsub( '.txt', '' )
13 |   end
14 | 
15 | end # class Pakman
16 | 


--------------------------------------------------------------------------------
/textutils/lib/textutils/filter/erb_filter.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module TextUtils
 4 |   module Filter
 5 | 
 6 |   # allow plugins/helpers; process source (including header) using erb    
 7 |   def erb( content, options={} )
 8 |     puts "  Running embedded Ruby (erb) code/helpers..."
 9 |     
10 |     content =  ERB.new( content ).result( binding() )
11 |     content
12 |   end
13 | 
14 |   end  # module Filter
15 | end   # module TextUtils


--------------------------------------------------------------------------------
/textutils/test/data/feedburner.txt:
--------------------------------------------------------------------------------
 1 | ####################################
 2 | # feedburner text pattern (regex)
 3 | #
 4 | #  pattern (regex)
 5 | #  ---
 6 | #  test1
 7 | #  ---
 8 | #  test2
 9 | #  ---
10 | #  etc.
11 | 
12 | 
13 | <img[^>]*?
14 |   src=("|')(:?http:)?//feeds\.feedburner\.com/~r/[^>]+?\1
15 |   .*?>
16 | 
17 | ---
18 | 
19 | <img src="//feeds.feedburner.com/~r/Rubyflow/~4/1wUDnBztAJY" height="1" width="1" alt=""/>
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/linkto/lib/linkto/bing.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module Linkto
 4 |   module BingHelper
 5 | 
 6 | 
 7 |   def link_to_bing_search_images( q, opts={} )
 8 |     link_to q, "http://www.bing.com/images/search?q=#{q}", opts
 9 |   end
10 | 
11 | ############################
12 | # shortcuts / aliases
13 | 
14 |   def bing_search_images( q, opts={} ) link_to_bing_search_images( q, opts) end
15 | 
16 | 
17 |   end # module BingHelper
18 | end # module Linkto
19 | 


--------------------------------------------------------------------------------
/linkto/lib/linkto/untappd.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module Linkto
 4 |   module UntappdHelper
 5 | 
 6 | 
 7 |   def link_to_untappd_search( q, opts={} )
 8 |     link_to q, "https://untappd.com/search?q=#{q}", opts
 9 |   end
10 | 
11 | 
12 | ###############################
13 | # shortcuts / aliases
14 | 
15 |   def untappd_search( q, opts={} ) link_to_untappd_search( q, opts )  end
16 | 
17 | 
18 |   end # module UntappdHelper
19 | end # module Linkto
20 | 


--------------------------------------------------------------------------------
/textutils/test/data/cl_all.txt:
--------------------------------------------------------------------------------
 1 | #####################################
 2 | # test data for fixture reader
 3 | 
 4 | 
 5 | # -- leagues
 6 | 
 7 | europe-champions-league!/leagues
 8 | 
 9 | # -- 2011_12
10 | 
11 | europe-champions-league!/2011_12/cl
12 | europe-champions-league!/2011_12/el
13 | 
14 | # -- 2012_13
15 | 
16 | europe-champions-league!/2012_13/cl
17 | europe-champions-league!/2012_13/el
18 | 
19 | # -- 2013_14
20 | 
21 | europe-champions-league!/2013_14/cl
22 | 
23 | 


--------------------------------------------------------------------------------
/pakman/TODOS.md:
--------------------------------------------------------------------------------
 1 | # Todos
 2 | 
 3 | - [ ]  check file for front matter; use more "efficient" way
 4 | 
 5 | e.g. do NOT load complete file; just a look-a-head;
 6 | try to make it work for binary file too? why? why not?
 7 | check how jekyll checks for front matter; does jekyll also
 8 | check binary files? does the file extension matter (e.g. png, gif, html, css, etc)??
 9 | 
10 | 
11 | ## robots.txt
12 | 
13 | - [] see osm blogs templates; uses robots.txt template - do NOT use as manifest; add to exclude list !!!!
14 | 


--------------------------------------------------------------------------------
/textutils/test/test_tree_reader_ii.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_tree_reader_ii.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | class TestTreeReaderIi < MiniTest::Test
11 | 
12 |   def test_at_n
13 |     reader = TreeReader.from_file( "#{TextUtils.root}/test/data/at-austria/1--n-niederoesterreich/orte.txt" )
14 |  
15 |     reader.check
16 |  
17 |     assert true ## assume everything ok if we get here
18 |   end
19 | 
20 | end # class TestTreeReaderIi
21 | 


--------------------------------------------------------------------------------
/linkto/test/test_wikipedia.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | require 'helper'
 5 | 
 6 | 
 7 | class TestWikipedia < MiniTest::Unit::TestCase
 8 | 
 9 |   include LinktoHelper
10 | 
11 |   def test_search
12 | 
13 |     assert_equal "<a href='http://en.wikipedia.org/?search=ottakringer'>ottakringer</a>", wikipedia_search( 'ottakringer' )
14 |     assert_equal "<a href='http://de.wikipedia.org/?search=ottakringer'>ottakringer</a>", wikipedia_de_search( 'ottakringer' )
15 | 
16 |   end
17 | 
18 | end # class TestWikipedia
19 | 


--------------------------------------------------------------------------------
/pakman/test/liquid/test.html:
--------------------------------------------------------------------------------
 1 | 
 2 | <h3>Headers</h3>
 3 |    
 4 | <ul>
 5 |   <li>author: {{ headers['author'] }}</li>
 6 |   <li>title:  {{ headers['title'] }}</li>
 7 | 
 8 |   <li>author: {{ headers.author }}</li>
 9 |   <li>title:  {{ headers.title }}</li>
10 | </ul>
11 | 
12 | 
13 | <h3>Slides</h3>   
14 |       
15 | {% for slide in slides %}
16 |   <div>{{ slide['content'] }}</div>
17 |   <div>{{ slide['header'] }}</div>       
18 | 
19 |   <div>{{ slide.content }}</div>
20 |   <div>{{ slide.header }}</div>       
21 | {% endfor %}
22 | 


--------------------------------------------------------------------------------
/pakman/test/erb/pak/test.html.erb:
--------------------------------------------------------------------------------
 1 | <html>
 2 |   <head>
 3 |     <title>pakman Test Template</title>
 4 |   </head>
 5 |   <body>
 6 |     
 7 |    <h1>Hello pakman</h1>    
 8 | 
 9 |    <h3>Headers</h3>
10 |    
11 |    <ul>
12 |     <li>author: <%= headers['author'] %></li>
13 |     <li>title: <%= headers['title'] %></li>
14 |    </ul>
15 | 
16 |   <h3>Slides</h3>   
17 |       
18 |    <% slides.each do |slide| %>
19 |      <div><%= slide['content'] %></div>
20 |      <div><%= slide['header'] %></div>       
21 |    <% end %>
22 |     
23 |   </body>
24 | </html>


--------------------------------------------------------------------------------
/pakman/test/liquid/pak/test.html:
--------------------------------------------------------------------------------
 1 | ---
 2 | ---
 3 | 
 4 | <html>
 5 |   <head>
 6 |     <title>pakman Test Template</title>
 7 |   </head>
 8 |   <body>
 9 |     
10 |    <h1>Hello pakman</h1>    
11 | 
12 |    <h3>Headers</h3>
13 |    
14 |    <ul>
15 |     <li>author: {{ headers.author }}</li>
16 |     <li>title:  {{ headers.title }}</li>
17 |     </ul>
18 | 
19 |   <h3>Slides</h3>   
20 |       
21 |    {% for slide in slides %}
22 |      <div>{{ slide.content }}</div>
23 |      <div>{{ slide.header }}</div>       
24 |    {% endfor %}
25 |     
26 |   </body>
27 | </html>
28 | 


--------------------------------------------------------------------------------
/textutils/lib/textutils/core_ext/time.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | class Time
 5 |   
 6 |   def self.cet( str )   # central european time (cet) + central european summer time (cest)  
 7 |     ActiveSupport::TimeZone['Vienna'].parse( str )
 8 |   end
 9 | 
10 |   def self.eet( str )  # eastern european time (eet)  + 2 hours
11 |     ActiveSupport::TimeZone['Bucharest'].parse( str )
12 |   end
13 |   
14 |   def self.cst( str )  # central standard time (cst) - 6 hours 
15 |     ActiveSupport::TimeZone['Mexico City'].parse( str )
16 |   end
17 |   
18 | end # class Time
19 | 
20 | 


--------------------------------------------------------------------------------
/textutils/test/test_unicode_helper.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_helper.rb
 6 | #  or better
 7 | #     rake test
 8 | 
 9 | require 'helper'
10 | 
11 | class TestUnicodeHelper < Minitest::Test
12 | 
13 |   def test_convert_unicode_dashes
14 |     
15 |     txt_in  = "\u2010 \u2011 \u2212 \u2013 \u2014"  # NB: unicode chars require double quoted strings
16 |     txt_out = '- - - - -'
17 | 
18 |     assert_equal txt_out, TextUtils.convert_unicode_dashes_to_plain_ascii( txt_in )
19 |   end
20 | 
21 | end # class TestUnicodeHelper


--------------------------------------------------------------------------------
/textutils/lib/textutils/version.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module TextUtils
 4 | 
 5 |   MAJOR = 1 ## todo: namespace inside version or something - why? why not??
 6 |   MINOR = 4
 7 |   PATCH = 0
 8 |   VERSION = [MAJOR,MINOR,PATCH].join('.')
 9 | 
10 |   def self.version
11 |     VERSION
12 |   end
13 | 
14 |   def self.banner
15 |     "textutils/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
16 |   end
17 | 
18 |   def self.root
19 |     "#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}"
20 |   end
21 | 
22 | end   # module TextUtils
23 | 
24 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # text tools, libraries & scripts
 2 | 
 3 | Gems:
 4 | 
 5 | - [**textutils**](textutils)       - text filters, helpers, readers and more
 6 | - [textutils-more](textutils-more)
 7 | 
 8 | <!-- break -->
 9 | 
10 | - [linkto](linkto)  - link_to helpers for google search, bing search, flickr photo search, flickr photo tag, etc.
11 | 
12 | 
13 | <!-- break -->
14 | 
15 | - [pakman](pakman)  - template pack manager (incl. embedded ruby, liquid, etc.)
16 | 
17 | 
18 | 
19 | 
20 | ## License
21 | 
22 | The scripts are dedicated to the public domain.
23 | Use it as you please with no restrictions whatsoever.
24 | 


--------------------------------------------------------------------------------
/linkto/lib/linkto/wikipedia.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module Linkto
 4 |   module WikipediaHelper
 5 | 
 6 | 
 7 |   def link_to_wikipedia_search( q, opts={} )
 8 |     link_to q, "http://en.wikipedia.org/?search=#{q}", opts
 9 |   end
10 | 
11 |   def link_to_wikipedia_de_search( q, opts={} )
12 |     link_to q, "http://de.wikipedia.org/?search=#{q}", opts
13 |   end
14 | 
15 | 
16 | ###############################
17 | # shortcuts / aliases
18 | 
19 |   def wikipedia_search( q, opts={} )    link_to_wikipedia_search( q, opts ) end
20 |   def wikipedia_de_search( q, opts={} ) link_to_wikipedia_de_search( q, opts ) end
21 | 
22 | 
23 |   end  # module WikipediaHelper
24 | end # module Linkto
25 | 


--------------------------------------------------------------------------------
/textutils/test/data/de-deutschland/3--by-bayern/4--oberfranken/orte_ii.txt:
--------------------------------------------------------------------------------
 1 | 2     Bayern
 2 | 24    .. Oberfranken
 3 | 241   .... Bamberg (Stadt)     ## Kreisfreie Stadt
 4 |       ...... Bamberg
 5 |       ........ Bamberg
 6 | 242   .... Bayreuth (Stadt)    ## Kreisfreie Stadt
 7 |       ...... Bayreuth
 8 |       ........ Bayreuth
 9 | 
10 | 245   .... Bamberg (Land)      ## Landkreis   -- 36 Gemeinden; see de.wikipedia.org/wiki/Landkreis_Bamberg
11 |              ## 4 Städte
12 |       ...... Baunach        ## (4013, 30,9 km²)
13 |       ........ Baunach
14 |       ...... Hallstadt      ## (8364, 14,5 km²)
15 |       ........ Hallstadt    ## (7588)
16 |       ........ Dörfleins    ## (1380)
17 | 
18 | 


--------------------------------------------------------------------------------
/linkto/test/helper.rb:
--------------------------------------------------------------------------------
 1 | ## $:.unshift(File.dirname(__FILE__))
 2 | 
 3 | ## minitest setup
 4 | 
 5 | # require 'minitest/unit'
 6 | require 'minitest/autorun'
 7 | 
 8 | # include MiniTest::Unit  # lets us use TestCase instead of MiniTest::Unit::TestCase
 9 | 
10 | ## make sure activesupport gets included/required
11 | # note: just activesupport or active_support will NOT work
12 | # require 'active_support/all'
13 | 
14 | ## our own code
15 | 
16 | require 'linkto'
17 | 
18 | 
19 | ### simple link_to method
20 | #  - no need to include UrlHelper from Rails
21 | 
22 | def link_to( title, link, opts={} )
23 |   ###
24 |   # fix:
25 |   #  opts get ignored for now!!
26 |   
27 |   "<a href='#{link}'>#{title}</a>"
28 | end
29 | 
30 | 


--------------------------------------------------------------------------------
/linkto/test/test_google.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | require 'helper'
 5 | 
 6 | 
 7 | class TestGoogle < MiniTest::Unit::TestCase
 8 | 
 9 |   include LinktoHelper
10 | 
11 |   %i( google_search link_to_google_search ).each do |method|
12 |     define_method("test #{method}") do
13 |       assert_equal "<a href='https://www.google.com/search?q=open mundi'>open mundi</a>", send(method, 'open mundi')
14 |     end
15 |   end
16 | 
17 |   %i( google_de_search link_to_google_de_search ).each do |method|
18 |     define_method("test #{method}") do
19 |       assert_equal "<a href='https://www.google.de/search?hl=de&q=open mundi'>open mundi</a>", send(method, 'open mundi')
20 |     end
21 |   end
22 | 
23 | end # class TestGoogle
24 | 


--------------------------------------------------------------------------------
/textutils/.gitignore:
--------------------------------------------------------------------------------
 1 | *.gem
 2 | *.rbc
 3 | /.config
 4 | /coverage/
 5 | /InstalledFiles
 6 | /pkg/
 7 | /spec/reports/
 8 | /test/tmp/
 9 | /test/version_tmp/
10 | /tmp/
11 | 
12 | ## Specific to RubyMotion:
13 | .dat*
14 | .repl_history
15 | build/
16 | 
17 | ## Documentation cache and generated files:
18 | /.yardoc/
19 | /_yardoc/
20 | /doc/
21 | /rdoc/
22 | 
23 | ## Environment normalisation:
24 | /.bundle/
25 | /vendor/bundle
26 | /lib/bundler/man/
27 | 
28 | # for a library or gem, you might want to ignore these files since the code is
29 | # intended to run in multiple environments; otherwise, check them in:
30 | # Gemfile.lock
31 | # .ruby-version
32 | # .ruby-gemset
33 | 
34 | # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
35 | .rvmrc
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/textutils-more/.gitignore:
--------------------------------------------------------------------------------
 1 | *.gem
 2 | *.rbc
 3 | /.config
 4 | /coverage/
 5 | /InstalledFiles
 6 | /pkg/
 7 | /spec/reports/
 8 | /test/tmp/
 9 | /test/version_tmp/
10 | /tmp/
11 | 
12 | ## Specific to RubyMotion:
13 | .dat*
14 | .repl_history
15 | build/
16 | 
17 | ## Documentation cache and generated files:
18 | /.yardoc/
19 | /_yardoc/
20 | /doc/
21 | /rdoc/
22 | 
23 | ## Environment normalisation:
24 | /.bundle/
25 | /vendor/bundle
26 | /lib/bundler/man/
27 | 
28 | # for a library or gem, you might want to ignore these files since the code is
29 | # intended to run in multiple environments; otherwise, check them in:
30 | # Gemfile.lock
31 | # .ruby-version
32 | # .ruby-gemset
33 | 
34 | # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
35 | .rvmrc
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/textutils/lib/textutils/core_ext/file.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | class File
 4 |   def self.read_utf8( path )
 5 |     text = open( path, 'r:bom|utf-8' ) do |file|
 6 |       file.read
 7 |     end
 8 | 
 9 |     ##
10 |     ## todo: make normalize newlines into a filter (for easy (re)use)
11 | 
12 |     ##   normalize newlines
13 |     ##    always use LF \n (Unix):
14 |     ##
15 |     ##   convert CR/LF \r\n (Windows)  => \n
16 |     ##   convert CR    \r   (old? Mac) => \n  -- still in use?
17 |     text = text.gsub( /\r\n|\r/, "\n" )
18 | 
19 |     # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
20 |     text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
21 | 
22 |     text
23 |   end
24 | end # class File
25 | 
26 | 


--------------------------------------------------------------------------------
/textutils/lib/textutils/helper/xml_helper.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module TextUtils
 4 |   module XmlHelper
 5 | 
 6 | 
 7 |   def prettify_xml( xml )
 8 |     require 'rexml/document'
 9 |     
10 |     begin
11 |       d = REXML::Document.new( xml )
12 |     
13 |       # d.write( pretty_xml="", 2 )
14 |       # pretty_xml  # return prettified xml
15 |     
16 |       formatter = REXML::Formatters::Pretty.new( 2 )  # indent=2
17 |       formatter.compact = true # This is the magic line that does what you need!
18 |       pretty_xml = formatter.write( d.root, "" )  # todo/checl: what's 2nd arg used for ??
19 |       pretty_xml
20 |     rescue Exception => ex
21 |       "warn: prettify_xml failed: #{ex}\n\n\n" + xml
22 |     end
23 |   end
24 | 
25 | 
26 |   end # module XmlHelper
27 | end # module TextUtils
28 | 


--------------------------------------------------------------------------------
/linkto/Rakefile:
--------------------------------------------------------------------------------
 1 | require 'hoe'
 2 | require './lib/linkto/version.rb'
 3 | 
 4 | Hoe.spec 'linkto' do
 5 | 
 6 |   self.version = Linkto::VERSION
 7 | 
 8 |   self.summary = 'linkto - link_to helpers for google search, bing search, flickr photo search, flickr photo tag, etc.'
 9 |   self.description = summary
10 | 
11 |   self.urls    = ['https://github.com/rubylibs/linkto']
12 | 
13 |   self.author  = 'Gerald Bauer'
14 |   self.email   = 'webslideshow@googlegroups.com'
15 | 
16 |   # switch extension to .markdown for gihub formatting
17 |   self.readme_file  = 'README.md'
18 |   self.history_file = 'HISTORY.md'
19 | 
20 |   self.extra_deps = [
21 |     ['logutils' ]
22 |   ]
23 | 
24 |   self.licenses = ['Public Domain']
25 | 
26 |   self.spec_extras = {
27 |    :required_ruby_version => '>= 1.9.2'
28 |   }
29 | 
30 | 
31 | end
32 | 


--------------------------------------------------------------------------------
/pakman/lib/pakman/erb/template.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module Pakman
 4 | 
 5 | class ErbTemplate
 6 | 
 7 |   def self.from_file( path )
 8 |     ## todo/fix: update logutils - (auto-)add ("static") logger helper/mixin too!!!!!
 9 |     LogKernel::Logger[ self ].info "  Loading template (from file) >#{path}<..."
10 |     text = File.open( path, 'r:bom|utf-8' ).read     ## note: assume utf8
11 |     self.new( text, path: path )   ## note: pass along path as an option
12 |   end
13 | 
14 |   def self.from_string( text )  ### use parse as alias - why?? why not??
15 |     self.new( text )
16 |   end
17 | 
18 |   def initialize( text, opts={} )
19 |     @template = ERB.new( text )
20 |   end
21 | 
22 |   def render( binding )
23 |     @template.result( binding )
24 |   end
25 | 
26 | end # class ErbTemplate
27 | end # module Pakman
28 | 


--------------------------------------------------------------------------------
/textutils/test/test_fixture_reader.rb:
--------------------------------------------------------------------------------
 1 | require 'helper'
 2 | 
 3 | 
 4 | class TestFixtureReader < Minitest::Test
 5 | 
 6 |   def test_read
 7 |     path = "#{TextUtils.root}/test/data/cl_all.txt"
 8 |     puts "[TestFixtureReader.test_read] path: #{path}"
 9 | 
10 |     reader = FixtureReader.from_file( path )
11 | 
12 |     ary = [
13 |       'europe-champions-league!/leagues',
14 |       'europe-champions-league!/2011_12/cl',
15 |       'europe-champions-league!/2011_12/el',
16 |       'europe-champions-league!/2012_13/cl',
17 |       'europe-champions-league!/2012_13/el',
18 |       'europe-champions-league!/2013_14/cl' ]
19 | 
20 |     i=0
21 |     reader.each do |fx|
22 |       assert_equal ary[i], fx
23 |       i+=1
24 |     end
25 |   end # method test_read
26 | 
27 | end # class TestFixtureReader
28 | 
29 | 


--------------------------------------------------------------------------------
/linkto/lib/linkto/google.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module Linkto
 4 |   module GoogleHelper
 5 | 
 6 |   def link_to_google_search( q, opts={} )
 7 |     link_to q, "https://www.google.com/search?q=#{q}", opts
 8 |   end
 9 | 
10 |   def link_to_google_de_search( q, opts={} )
11 |     link_to q, "https://www.google.de/search?hl=de&q=#{q}", opts
12 |   end
13 | 
14 | 
15 |   def link_to_google_search_images( q, opts={} )
16 |     link_to q, "https://www.google.com/search?tbm=isch&q=#{q}", opts
17 |   end
18 | 
19 | 
20 | ###############################
21 | # shortcuts / aliases
22 | 
23 |   alias_method :google_search, :link_to_google_search
24 |   alias_method :google_de_search, :link_to_google_de_search
25 |   alias_method :google_search_images, :link_to_google_search_images
26 | 
27 |   end # module GoogleHelper
28 | end # module Linkto
29 | 


--------------------------------------------------------------------------------
/pakman/lib/pakman/cli/commands/fetch.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module Pakman
 4 | 
 5 | class Fetch
 6 | 
 7 |   include LogUtils::Logging
 8 | 
 9 |   def initialize( opts )
10 |     @opts = opts
11 |   end
12 | 
13 |   attr_reader :opts
14 | 
15 |   def run
16 |     logger.debug "fetch_uri: >#{opts.fetch_uri}<"
17 |     src = opts.fetch_uri
18 | 
19 |     uri = URI.parse( src )
20 |     logger.debug "scheme: >#{uri.scheme}<, host: >#{uri.host}<, port: >#{uri.port}<, path: >#{uri.path}<"
21 | 
22 |     pakname = Pakman.pakname_from_file( uri.path )
23 |     logger.debug "pakname: >#{pakname}<"
24 | 
25 |     pakpath = File.expand_path( pakname, opts.config_path )
26 |     logger.debug "pakpath: >#{pakpath}<"
27 | 
28 |     Fetcher.new.fetch_pak( src, pakpath )
29 |   end # method run
30 | 
31 | end # class Fetch
32 | end # module Pakman
33 | 


--------------------------------------------------------------------------------
/textutils-more/lib/textutils/reader/markdown_reader.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | #######################################################
 5 | ############# work in progress  #######################
 6 | #
 7 | #  NOTE: do NOT include for now in packaged gem
 8 | 
 9 | #######
10 | ## read data records "encoded" in markdown / plain text
11 | ###
12 | 
13 | class MarkdownReader
14 | 
15 |   include LogUtils::Logging
16 | 
17 |   def self.from_file( path )
18 |     text = 'to be done'
19 |     self.from_string( text )
20 |   end
21 |   
22 |   def self.from_string( text )
23 |     MarkdownReader.new( text )
24 |   end
25 | 
26 |   def initialize( path, more_attribs={} )
27 |     @more_attribs = more_attribs
28 |     @text         = text
29 |     ## to be done
30 |   end
31 | 
32 |   ## to be done
33 | 
34 | end # class MarkdownReader
35 | 
36 | 


--------------------------------------------------------------------------------
/textutils/test/test_taglist.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | require 'helper'
 5 | 
 6 | 
 7 | class TestTaglist < Minitest::Test
 8 | 
 9 |   include TextUtils::ValueHelper   #  lets us use is_taglist?, etc.
10 | 
11 |   def test_taglist_starting_w_digit
12 |     ## for now - taglist cannot start w/ number
13 |     assert is_taglist?( '20 ha' ) == false
14 |     assert is_taglist?( '5000 hl' ) == false
15 |     assert is_taglist?( '5_000 hl' ) == false
16 |   end
17 | 
18 |   def test_taglist_upcase
19 |     ## taglist cannot use upcase letters
20 |     assert is_taglist?( 'ABC' ) == false
21 |   end
22 | 
23 |   def test_taglist
24 |     assert is_taglist?( 'a' )
25 |     assert is_taglist?( 'a|b|c' )
26 |     assert is_taglist?( 'a b c' )
27 |     assert is_taglist?( 'a_b_c' )
28 |   end
29 | 
30 | 
31 | end # class TestTaglist
32 | 
33 | 


--------------------------------------------------------------------------------
/pakman/lib/pakman/cli/commands/list.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module Pakman
 4 | 
 5 | class List
 6 | 
 7 |   include LogUtils::Logging
 8 | 
 9 |   include ManifestHelper
10 | 
11 |   def initialize( opts )
12 |     @opts = opts
13 |   end
14 | 
15 |   attr_reader :opts
16 | 
17 |   def run
18 |     manifests = installed_template_manifests
19 | 
20 |     puts 'Installed template packs in search path'
21 | 
22 |     installed_template_manifest_patterns.each_with_index do |pattern,i|
23 |       puts "    [#{i+1}] #{pattern}"
24 |     end
25 |     puts '  include:'
26 | 
27 |     if manifests.empty?
28 |       puts "    -- none --"
29 |     else
30 |       manifests.each do |manifest|
31 |         puts "%16s (%s)" % [manifest[0].gsub('.txt',''), manifest[1]]
32 |       end
33 |     end
34 |   end
35 | 
36 | end # class List
37 | end # module Pakman
38 | 


--------------------------------------------------------------------------------
/pakman/Rakefile:
--------------------------------------------------------------------------------
 1 | require 'hoe'
 2 | require './lib/pakman/version.rb'
 3 | 
 4 | Hoe.spec 'pakman' do
 5 | 
 6 |   self.version = Pakman::VERSION
 7 | 
 8 |   self.summary = 'pakman - Template Pack Manager (incl. Embedded Ruby, Liquid, etc.)'
 9 |   self.description = summary
10 | 
11 |   self.urls    = ['https://github.com/rubylibs/pakman']
12 | 
13 |   self.author  = 'Gerald Bauer'
14 |   self.email   = 'wwwmake@googlegroups.com'
15 | 
16 |   self.extra_deps = [
17 |     ['fetcher',  '>= 0.4.5'],
18 |     ['logutils', '>= 0.6.1'],
19 |     ['liquid',   '>= 4.0.0'],
20 |   ]
21 | 
22 |   # switch extension to .rdoc for gihub formatting
23 |   # self.readme_file  = 'README.md'
24 |   # self.history_file = 'History.md'
25 | 
26 |   self.licenses = ['Public Domain']
27 | 
28 |   self.spec_extras = {
29 |    required_ruby_version: '>= 2.3'
30 |   }
31 | 
32 | end
33 | 


--------------------------------------------------------------------------------
/textutils/test/test_tree_reader.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_tree_reader.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | class TestTreeReader < MiniTest::Test
11 | 
12 |   def test_oberfranken
13 |     reader = TreeReader.from_file( "#{TextUtils.root}/test/data/de-deutschland/3--by-bayern/4--oberfranken/orte.txt" )
14 |  
15 |     reader.each_line do |_|
16 |       ## do nothing for now
17 |     end
18 |     
19 |     assert true ## assume everything ok if we get here
20 |   end
21 | 
22 |   def test_de
23 |     reader = TreeReader.from_file( "#{TextUtils.root}/test/data/de-deutschland/orte.txt" )
24 |  
25 |     reader.each_line do |_|
26 |       ## do nothing for now
27 |     end
28 | 
29 |     assert true ## assume everything ok if we get here
30 |   end
31 | 
32 | end # class TestTreeReader
33 | 
34 | 


--------------------------------------------------------------------------------
/pakman/lib/pakman/cli/ctx.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module Pakman
 4 | 
 5 | class Ctx   # Context
 6 | 
 7 |   def initialize( hash )
 8 |     @hash = hash
 9 |   end
10 | 
11 |   def ctx
12 |     ### todo: check if method_missing works with binding in erb???
13 |     binding
14 |   end
15 | 
16 |   def method_missing( mn, *args, &blk )
17 |     ## only allow read-only access (no arguments)
18 |     if args.length > 0    # || mn[-1].chr == "="
19 |       return super # super( mn, *args, &blk )
20 |     end
21 | 
22 |     key = mn.to_s
23 | 
24 |     if @hash.has_key?( key )
25 |       puts "calling ctx.#{key}"
26 |       value = @hash[ key ]
27 |       puts "  returning #{value.class.name}:"
28 |       pp value
29 |       value
30 |     else
31 |       puts "*** warning: ctx.#{key} missing"
32 |       super
33 |     end
34 |   end
35 | 
36 | end # class Ctx
37 | end # module Pakman
38 | 


--------------------------------------------------------------------------------
/textutils/test/test_block_reader.rb:
--------------------------------------------------------------------------------
 1 | ###
 2 | #  to run use
 3 | #     ruby -I ./lib -I ./test test/test_block_reader.rb
 4 | #  or better
 5 | #     rake test
 6 | 
 7 | require 'helper'
 8 | 
 9 | 
10 | class TestBlockReader < MiniTest::Test
11 | 
12 |   def test_feedburner
13 |      blocks = BlockReader.from_file( "#{TextUtils.root}/test/data/feedburner.txt" ).read
14 | 
15 |      ## note: regex - use %q - do NOT escape \. or \1 etc.
16 |      pattern = %q{<img[^>]*?src=("|')(:?http:)?//feeds\.feedburner\.com/~r/[^>]+?\1.*?>}
17 | 
18 |      test1   = %q{<img src="//feeds.feedburner.com/~r/Rubyflow/~4/1wUDnBztAJY" height="1" width="1" alt=""/>}
19 | 
20 |      assert_equal 2, blocks.size
21 |      assert_equal pattern, blocks[0].gsub( /[\n ]/, '' )  ## note: need to remove newlines and spaces
22 |      assert_equal test1,   blocks[1]
23 |   end
24 | 
25 | end # class TestBlockReader
26 | 
27 | 


--------------------------------------------------------------------------------
/pakman/lib/pakman/cli/helpers.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module Pakman
 4 | 
 5 | module ManifestHelper
 6 | 
 7 |   def installed_template_manifest_patterns
 8 |     # 1) search .    # that is, working/current dir
 9 |     # 2) search <config_dir>
10 |     # 3) search <gem>/templates
11 | 
12 |     builtin_patterns = [
13 |       "#{Pakman.root}/templates/*.txt"
14 |     ]
15 |     config_patterns  = [
16 |       "#{File.expand_path(opts.config_path)}/*.txt",
17 |       "#{File.expand_path(opts.config_path)}/*/*.txt"
18 |     ]
19 |     current_patterns = [
20 |       "*.txt",
21 |       "*/*.txt"
22 |     ]
23 | 
24 |     patterns = []
25 |     patterns += current_patterns
26 |     patterns += config_patterns
27 |     patterns += builtin_patterns
28 |   end
29 | 
30 |   def installed_template_manifests
31 |     excludes = [
32 |       "Manifest.txt",
33 |       "*/Manifest.txt"
34 |     ]
35 | 
36 |     Finder.new.find_manifests( installed_template_manifest_patterns, excludes )
37 |   end
38 | 
39 | end # module ManifestHelper
40 | end # module Pakman
41 | 


--------------------------------------------------------------------------------
/linkto/lib/linkto/flickr.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module Linkto
 4 |   module FlickrHelper
 5 | 
 6 | 
 7 | #####################
 8 | #  browse tags 
 9 | 
10 |   def link_to_flickr_tags( tags, opts={} )   # fix: add alias for link_to_flickr_tag
11 |     # e.g. use
12 |     #  ottakringer
13 |     #  ottakringer+beer    -- use plus for multiple tags
14 |     link_to tags, "http://www.flickr.com/photos/tags/#{tags}", opts
15 |   end
16 | 
17 | #########################
18 | #  search terms (q)
19 | 
20 |   def link_to_flickr_search( q, opts={} )
21 |      # e.g. use
22 |      #   ottakringer
23 |      #   ottakringer+beer    -- note: + is url encoded for space e.g. equals ottakringer beer
24 |     link_to q, "http://www.flickr.com/search/?q=#{q}", opts
25 |   end
26 | 
27 | ###############################
28 | # shortcuts / aliases
29 | 
30 |   def flickr_tags( tags, opts={} ) link_to_flickr_tags( tags, opts ) end
31 |   def flickr_search( q, opts={} )  link_to_flickr_search( q, opts )  end
32 | 
33 | 
34 |   end # module FlickrHelper
35 | end # module Linkto
36 | 


--------------------------------------------------------------------------------
/pakman/test/test_page.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_page.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | 
11 | class TestPage < MiniTest::Test
12 | 
13 | def test_page1
14 |   page = Pakman::Page.from_file( "#{Pakman.root}/test/pages/page1.txt" )
15 |   assert page.headers?
16 | end  # method test_page1
17 | 
18 | def test_page2
19 |   page = Pakman::Page.from_file( "#{Pakman.root}/test/pages/page2.txt" )
20 |   assert page.headers?
21 | end  # method test_page2
22 | 
23 | def test_page3
24 |   page = Pakman::Page.from_file( "#{Pakman.root}/test/pages/page3.txt" )
25 |   assert page.headers?
26 | end  # method test_page3
27 | 
28 | def test_empty
29 |   page = Pakman::Page.from_file( "#{Pakman.root}/test/pages/empty.txt" )
30 |   assert page.headers? == false
31 | end  # method test_empty
32 | 
33 | def test_text
34 |   page = Pakman::Page.from_file( "#{Pakman.root}/test/pages/text.txt" )
35 |   assert page.headers? == false
36 | end  # method test_text
37 | 
38 | end # class TestPage
39 | 
40 | 
41 | 
42 | 


--------------------------------------------------------------------------------
/textutils/Rakefile:
--------------------------------------------------------------------------------
 1 | require 'hoe'
 2 | require './lib/textutils/version.rb'
 3 | 
 4 | Hoe.spec 'textutils' do
 5 | 
 6 |   self.version = TextUtils::VERSION
 7 |   
 8 |   self.summary = 'textutils - Text Filters, Helpers, Readers and More'
 9 |   self.description = summary
10 | 
11 |   self.urls    = ['https://github.com/textkit/textutils']
12 | 
13 |   self.author  = 'Gerald Bauer'
14 |   self.email   = 'ruby-talk@ruby-lang.org'
15 | 
16 |   # switch extension to .markdown for gihub formatting
17 |   self.readme_file  = 'README.md'
18 |   self.history_file = 'HISTORY.md'
19 | 
20 |   self.extra_deps = [
21 |     ['props',    '>=1.1.2'],
22 |     ['logutils', '>=0.6.1'],
23 |     ### 3rd party gems
24 |     ['rubyzip', '>=1.0.0'],   ## note: 1.0 changed to require zip (pre 1.0 was zip/zip); todo/check: make optional -why? why not??
25 |     ['activesupport']    ## todo/check:  really needed? document what methods get used
26 |   ]
27 | 
28 |   self.licenses = ['Public Domain']
29 | 
30 |   self.spec_extras = {
31 |     required_ruby_version: '>= 1.9.2'
32 |   }
33 | 
34 | end
35 | 


--------------------------------------------------------------------------------
/linkto/lib/linkto.rb:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ## require 'props'
 4 | 
 5 | require 'logutils'
 6 | 
 7 | ## require 'textutils'
 8 | 
 9 | 
10 | # our own code
11 | 
12 | require 'linkto/version'  # let it always go first
13 | 
14 | require 'linkto/bing'
15 | require 'linkto/flickr'
16 | require 'linkto/google'
17 | require 'linkto/untappd'
18 | require 'linkto/wikipedia'
19 | 
20 | 
21 | module Linkto
22 | 
23 |   def self.banner
24 |     "linkto/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
25 |   end
26 | 
27 |   def self.root
28 |     "#{File.expand_path( File.dirname(File.dirname(__FILE__)) )}"
29 |   end  
30 | 
31 | ### convenience - includes all helpers; use include LinktoHelper
32 |   module Helper
33 |     include BingHelper
34 |     include FlickrHelper
35 |     include GoogleHelper
36 |     include UntappdHelper
37 |     include WikipediaHelper
38 |   end
39 | 
40 | end  # module Linkto
41 | 
42 | 
43 | ## for convenience add aliases for module
44 | LinkTo       = Linkto
45 | LinkToHelper = Linkto::Helper 
46 | LinktoHelper = Linkto::Helper
47 | 
48 | 
49 | puts Linkto.banner    # say hello
50 | 


--------------------------------------------------------------------------------
/textutils-more/lib/textutils/table/table_reader.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | #######################################################
 5 | ############# work in progress  #######################
 6 | #
 7 | #  NOTE: do NOT include for now in packaged gem
 8 | 
 9 | 
10 | ####
11 | ## move to csvutils ??? why? why not?
12 | ##
13 | 
14 | #######
15 | ## read data records in csv (comma-separated values) format in plain text
16 | 
17 | 
18 | class TableReader    ## rename to CsvTableReader ? or CsvReader?
19 | 
20 |   include LogUtils::Logging
21 | 
22 |   def self.from_file( path )
23 |     text = 'to be done'
24 |     self.from_string( text )
25 |   end
26 |   
27 |   def self.from_string( text )
28 |     TableReader.new( text )
29 |   end
30 | 
31 |   def initialize( text, opts={} )
32 |     @opts = opts
33 |     @text = text
34 |     ## to be done
35 |   end
36 | 
37 |   def quick_check
38 |     # use a quick scan of all rows (return some stats e.g. no of records)
39 |     #  - throws an exception if any error
40 | 
41 |     ## to be done
42 |   end
43 | 
44 |   ## to be done
45 | 
46 | end # class TableReader
47 | 
48 | 


--------------------------------------------------------------------------------
/pakman/test/test_erb.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_erb.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | 
11 | class TestErb < MiniTest::Test
12 | 
13 | class Ctx   # Context
14 | 
15 |   def initialize( hash )
16 |     @hash = hash
17 |     @headers = hash['headers']
18 |     @slides  = hash['slides']
19 |     
20 |     puts 'hash:'
21 |     pp @hash
22 |     puts 'headers:'
23 |     pp @headers
24 |     puts 'slides:'
25 |     pp @slides
26 |   end
27 | 
28 |   attr_reader :headers
29 |   attr_reader :slides
30 | 
31 |   def ctx
32 |     ### todo: check if method_missing works with binding in erb???
33 |     binding
34 |   end
35 | end
36 | 
37 | def test_merge
38 |   hash = YAML.load_file( "#{Pakman.root}/test/data/test.yml" )
39 |   ctx  = Ctx.new( hash )
40 | 
41 |   manifestsrc = "#{Pakman.root}/test/erb/pak/test.txt"
42 |   outpath = "#{Pakman.root}/tmp/#{Time.now.to_i}"    ## pakpath/output path
43 |   
44 |   Pakman::Templater.new.merge_pak( manifestsrc, outpath, ctx.ctx, 'test' )
45 |     
46 |   assert true
47 | end  # method test_merge
48 | 
49 | end # class TestErb
50 | 
51 | 


--------------------------------------------------------------------------------
/textutils/lib/textutils/core_ext/array.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | class Array
 5 | 
 6 |   ## todo: check if there's already a builtin method for this
 7 |   #
 8 |   #  note:
 9 |   #   in rails ary.in_groups(3)  results in
10 |   #          top-to-bottom, left-to-right.
11 |   #  and not left-to-right first and than top-to-bottom.
12 |   #
13 |   #  rename to in_groups_vertical(3) ???
14 | 
15 |   def in_columns( cols )  # alias for convenience for chunks - needed? why? why not?
16 |     chunks( cols )
17 |   end
18 | 
19 |   def chunks( number_of_chunks )
20 |     ## NB: use chunks - columns might be in use by ActiveRecord! 
21 |     ###
22 |     # e.g.
23 |     #  [1,2,3,4,5,6,7,8,9,10].columns(3)
24 |     #   becomes:
25 |     #  [[1,4,7,10],
26 |     #   [2,5,8],
27 |     #   [3,6,9]]
28 | 
29 |     ## check/todo: make a copy of the array first??
30 |     #  for now reference to original items get added to columns
31 |     chunks = (1..number_of_chunks).collect { [] }
32 |     each_with_index do |item,index|
33 |       chunks[ index % number_of_chunks ] << item
34 |     end
35 |     chunks
36 |   end
37 | 
38 | end # class Array
39 | 
40 | 


--------------------------------------------------------------------------------
/textutils/lib/textutils/reader/code_reader.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | # fix: move into TextUtils namespace/module!!
 4 | 
 5 | class CodeReader
 6 | 
 7 |   include LogUtils::Logging
 8 | 
 9 |   def self.from_file( path )
10 |     ## nb: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
11 |     ## - see textutils/utils.rb
12 |     code = File.read_utf8( path )
13 |     self.from_string( code )
14 |   end
15 | 
16 |   def self.from_string( code )
17 |     CodeReader.new( code: code )
18 |   end
19 | 
20 | 
21 |   def initialize( arg )
22 |     if arg.is_a?( String )  ## old style (deprecated) - pass in filepath as string
23 |       path = arg
24 |       logger.info "CodeReader.new - deprecated API - use CodeReader.from_file() instead"
25 |       @code = File.read_utf8( path )
26 |     else   ## assume it's a hash
27 |       opts = arg
28 |       @code = opts[:code]
29 |     end
30 |   end
31 | 
32 | 
33 |   def eval( klass )
34 |     klass.class_eval( @code )
35 | 
36 |     # NB: same as
37 |     #
38 |     # module WorldDB
39 |     #   include WorldDB::Models
40 |     #  <code here>
41 |     # end
42 |   end
43 | 
44 | end # class CodeReader
45 | 


--------------------------------------------------------------------------------
/pakman/lib/pakman/copier.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module Pakman
 4 | 
 5 | class Copier
 6 | 
 7 |   include LogUtils::Logging
 8 | 
 9 | 
10 |   def copy_pak( manifestsrc, pakpath )
11 | 
12 |     start = Time.now
13 | 
14 |     pakname = Pakman.pakname_from_file( manifestsrc )
15 | 
16 |     logger.info "Copying template pack '#{pakname}'"
17 | 
18 |     ## todo: after depreciate change back to just load_file
19 |     manifest = Manifest.load_file_v2( manifestsrc )
20 | 
21 |     manifest.each do |entry|
22 |       dest   = entry[0]
23 |       source = entry[1]
24 | 
25 |       # get full (absolute) path and make sure path exists
26 |       destfull = File.expand_path( dest, pakpath )
27 |       destpath = File.dirname( destfull )
28 |       FileUtils.makedirs( destpath ) unless File.directory?( destpath )
29 | 
30 |       logger.debug "destfull=>#{destfull}<"
31 |       logger.debug "destpath=>#{destpath}<"
32 | 
33 |       logger.info "  Copying to #{dest} from #{source}..."
34 |       FileUtils.copy( source, destfull )
35 |     end
36 | 
37 |     logger.info "Done (in #{Time.now-start} s)."
38 |   end # method copy_pak
39 | 
40 | end # class Copier
41 | end # module Pakman
42 | 


--------------------------------------------------------------------------------
/textutils/lib/textutils/helper/tag_helper.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module TextUtils
 4 |   module TagHelper
 5 | 
 6 | ####
 7 | # - todo: use new additional sub module ???
 8 | #     e.g. TextUtils::Reader::TagHelper
 9 | #   lets us use "classic" web helpers a la rails
10 | #   find a good name for sub module -  Reader? Fixtures? Values? Parser? 
11 | 
12 | 
13 |   def find_tags( value )
14 |     # logger.debug "   found tags: >>#{value}<<"
15 | 
16 |     tag_keys = value.split('|')
17 | 
18 |     ## unify; replace _w/ space; remove leading n trailing whitespace
19 |     tag_keys = tag_keys.map do |key|
20 |       key = key.gsub( '_', ' ' )
21 |       key = key.strip
22 |       key
23 |     end
24 | 
25 |     tag_keys # return tag keys as ary
26 |   end
27 | 
28 |   def find_tags_in_attribs!( attribs )
29 |     # NB: will remove :tags from attribs hash
30 | 
31 |     if attribs[:tags].present?
32 |       tag_keys = find_tags( attribs[:tags] )
33 |       attribs.delete(:tags)
34 |       tag_keys   # return tag keys as ary of strings
35 |     else
36 |       []  # nothing found; return empty ary
37 |     end
38 |   end
39 | 
40 |   end # module TagHelper
41 | end # module TextUtils
42 | 


--------------------------------------------------------------------------------
/pakman/Manifest.txt:
--------------------------------------------------------------------------------
 1 | History.md
 2 | Manifest.txt
 3 | README.md
 4 | Rakefile
 5 | bin/pakman
 6 | lib/pakman.rb
 7 | lib/pakman/cli/commands/fetch.rb
 8 | lib/pakman/cli/commands/gen.rb
 9 | lib/pakman/cli/commands/list.rb
10 | lib/pakman/cli/ctx.rb
11 | lib/pakman/cli/helpers.rb
12 | lib/pakman/cli/opts.rb
13 | lib/pakman/cli/runner.rb
14 | lib/pakman/copier.rb
15 | lib/pakman/erb/template.rb
16 | lib/pakman/erb/templater.rb
17 | lib/pakman/fetcher.rb
18 | lib/pakman/finder.rb
19 | lib/pakman/liquid/template.rb
20 | lib/pakman/liquid/templater.rb
21 | lib/pakman/manifest.rb
22 | lib/pakman/page.rb
23 | lib/pakman/utils.rb
24 | lib/pakman/version.rb
25 | test/data/test.yml
26 | test/erb/pak/test.html.erb
27 | test/erb/pak/test.txt
28 | test/helper.rb
29 | test/liquid/pak/hello.doc
30 | test/liquid/pak/hello.txt
31 | test/liquid/pak/s9logo.png
32 | test/liquid/pak/test.html
33 | test/liquid/pak/test.txt
34 | test/liquid/pak/testbin.txt
35 | test/liquid/test.html
36 | test/pages/empty.txt
37 | test/pages/page1.txt
38 | test/pages/page2.txt
39 | test/pages/page3.txt
40 | test/pages/text.txt
41 | test/test_erb.rb
42 | test/test_liquid.rb
43 | test/test_liquid_binaries.rb
44 | test/test_liquid_drops.rb
45 | test/test_page.rb
46 | 


--------------------------------------------------------------------------------
/attic/fixture_reader.rb:
--------------------------------------------------------------------------------
 1 | 
 2 |   if @path.ends_with?( '.yml' ) || @path.ends_with?( '.yaml' )
 3 |       ### fix/todo: remove later on!!! - do not use!!
 4 |       puts "deprecated api - FixtureReader w/ yaml format - will get removed; please use new plain text manifest format"
 5 |       @ary = old_deprecated_yaml_reader( text )
 6 |   else
 7 |     ..
 8 |   end
 9 | 
10 | 
11 |   def old_deprecated_yaml_reader( text )
12 |     hash = YAML.load( text )
13 |     
14 |     ### build up array for fixtures from hash
15 |     ary = []
16 |     
17 |     hash.each do |key_wild, value_wild|
18 |       key   = key_wild.to_s.strip
19 |       
20 |       logger.debug "yaml key:#{key_wild.class.name} >>#{key}<<, value:#{value_wild.class.name} >>#{value_wild}<<"
21 |     
22 |       if value_wild.kind_of?( String ) # assume single fixture name
23 |         ary << value_wild
24 |       elsif value_wild.kind_of?( Array ) # assume array of fixture names as strings
25 |         ary = ary + value_wild
26 |       else
27 |         logger.error "unknow fixture type in setup (yaml key:#{key_wild.class.name} >>#{key}<<, value:#{value_wild.class.name} >>#{value_wild}<<); skipping"
28 |       end
29 |     end
30 |     ary  # return fixture ary
31 |   end
32 | 


--------------------------------------------------------------------------------
/textutils/test/test_title_mapper2.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_title_mapper2.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | 
11 | class TestTitleMapper2 < Minitest::Test
12 | 
13 |   ClubStruct =  Struct.new(:key, :title, :synonyms)
14 | 
15 |   def test_title_table
16 | 
17 |     titles_in = [
18 |       ClubStruct.new( 'barcelona',  'Barcelona', 'FC Barcelona' ),
19 |       ClubStruct.new( 'espanyol',   'Espanyol',  'RCD Espanyol|Espanyol Barcelona' ),
20 |       ClubStruct.new( 'sevilla',    'Sevilla',   'Sevilla FC' )
21 |     ]
22 | 
23 |     mapper = TextUtils::TitleMapper2.new( titles_in, 'club' )
24 |     titles_out = mapper.known_titles
25 | 
26 |     puts 'titles_out:'
27 |     pp titles_out
28 | 
29 |     line = "Espanyol Barcelona  1-0  FC Barcelona"
30 |     mapper.map_titles!( line )
31 |     puts "=> #{line}"
32 | 
33 |     club1 = mapper.find_key!( line )
34 |     club2 = mapper.find_key!( line )
35 |     puts "=> #{line}"
36 | 
37 |     assert_equal 'espanyol',  club1
38 |     assert_equal 'barcelona', club2
39 | 
40 |     assert true   ## assume everything ok if we get here
41 | 
42 |   end # method test_title_table
43 | 
44 | 
45 | end # class TestTitleMapper2
46 | 


--------------------------------------------------------------------------------
/pakman/lib/pakman/cli/opts.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module Pakman
 4 | 
 5 | class Opts
 6 | 
 7 |   def list=(value)
 8 |     @list = value
 9 |   end
10 | 
11 |   def list?
12 |     return false if @list.nil?  # default list flag is false
13 |     @list == true
14 |   end
15 | 
16 | 
17 |   def generate=(value)
18 |     @generate = value
19 |   end
20 | 
21 |   def generate?
22 |     return false if @generate.nil?   # default generate flag is false
23 |     @generate == true
24 |   end
25 | 
26 | 
27 |   def fetch_uri=(value)
28 |     @fetch_uri = value
29 |   end
30 | 
31 |   def fetch_uri
32 |     @fetch_uri || '-fetch uri required-'
33 |   end
34 | 
35 |   def fetch?
36 |     @fetch_uri.nil? ? false : true
37 |   end
38 | 
39 | 
40 |   def manifest=(value)
41 |     @manifest = value
42 |   end
43 | 
44 |   ## fix:/todo: use a different default manifest
45 |   def manifest
46 |     @manifest || 's6.txt'
47 |   end
48 | 
49 | 
50 |   def config_path=(value)
51 |     @config_path = value
52 |   end
53 | 
54 |   def config_path
55 |     @config_path || '~/.pak'
56 |   end
57 | 
58 | 
59 |   def output_path=(value)
60 |     @output_path = value
61 |   end
62 | 
63 |   def output_path
64 |     @output_path || '.'
65 |   end
66 | 
67 | end # class Opts
68 | end # module Pakman
69 | 


--------------------------------------------------------------------------------
/pakman/test/test_liquid_drops.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_liquid_drops.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | 
11 | class TestLiquidDrops < MiniTest::Test
12 | 
13 | class HeadersDrop < Liquid::Drop
14 | 
15 |   def initialize( h )
16 |     @h = h
17 |   end
18 | 
19 |   def author()  puts "call author"; @h['author']; end
20 |   def title()   puts "call title";  @h['title'];  end
21 | end
22 | 
23 | class SlideDrop < Liquid::Drop
24 | 
25 |   def initialize( h )
26 |     @h = h
27 |   end
28 | 
29 |   def content()  puts "call content"; @h['content']; end
30 |   def header()   puts "call header";  @h['header'];  end
31 | end
32 | 
33 | def setup
34 |   Liquid::Template.error_mode = :strict
35 | end
36 | 
37 | 
38 | def test_template
39 |   hash = YAML.load_file( "#{Pakman.root}/test/data/test.yml" )
40 |   headers = HeadersDrop.new( hash['headers'] )
41 |   slides  = hash['slides'].map { |h| SlideDrop.new( h ) }
42 |   ctx= { 'headers' => headers, 'slides' => slides }
43 |   pp ctx
44 |     
45 |   path = "#{Pakman.root}/test/liquid/test.html"
46 |   t = Pakman::LiquidTemplate.from_file( path )
47 |   pp t.render( ctx )
48 |     
49 |   assert true
50 | end
51 | 
52 | end # class TestLiquidDrops
53 | 
54 | 


--------------------------------------------------------------------------------
/attic/line_reader_v2.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | # fix: move into TextUtils namespace/module!!
 5 | 
 6 | 
 7 | class LineReaderV2
 8 |   include LogUtils::Logging
 9 | 
10 |   def initialize( name, include_path )
11 |     @name          = name
12 |     @include_path  = include_path
13 |     
14 |     # map name to name_real_path
15 |     #   name might include !/ for virtual path (gets cut off)
16 |     #   e.g. at-austria!/w-wien/beers becomse w-wien/beers
17 | 
18 |     pos = @name.index( '!/')
19 |     if pos.nil?
20 |       @name_real_path = @name   # not found; real path is the same as name
21 |     else
22 |       # cut off everything until !/ e.g.
23 |       #   at-austria!/w-wien/beers becomes
24 |       #   w-wien/beers
25 |       @name_real_path = @name[ (pos+2)..-1 ]
26 |     end
27 |   end
28 | 
29 |   attr_reader :name
30 |   attr_reader :name_real_path
31 |   attr_reader :include_path
32 | 
33 |   def each_line
34 |     path          = "#{include_path}/#{name_real_path}.txt"
35 |     reader        = LineReader.from_file( path )
36 | 
37 |     logger.info "parsing data '#{name}' (#{path})..."
38 | 
39 |     reader.each_line do |line|
40 |       yield( line )
41 |     end
42 | 
43 |     ConfDb::Model::Prop.create_from_fixture!( name, path )
44 |   end
45 | 
46 | end # class LineReaderV2
47 | 
48 | 


--------------------------------------------------------------------------------
/pakman/lib/pakman/cli/commands/gen.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module Pakman
 4 | 
 5 | class Gen
 6 | 
 7 |   include LogUtils::Logging
 8 | 
 9 |   include ManifestHelper
10 | 
11 |   def initialize( opts )
12 |     @opts    = opts
13 |   end
14 | 
15 |   attr_reader :opts
16 | 
17 |   def run( args )
18 |     manifest_name = opts.manifest
19 |     manifest_name = manifest_name.downcase.gsub('.txt', '' )  # remove .txt if present
20 | 
21 |     logger.debug "manifest=#{manifest_name}"
22 | 
23 |     # check for matching manifests
24 |     manifests = installed_template_manifests.select { |m| m[0] == manifest_name+'.txt' }
25 | 
26 |     if manifests.empty?
27 |       puts "*** error: unknown template pack '#{manifest_name}'; use pakman -l to list installed template packs"
28 |       exit 2
29 |     end
30 | 
31 |     manifestsrc = manifests[0][1]
32 |     pakpath     = opts.output_path
33 | 
34 |     if args.empty?
35 |       Copier.new.copy_pak( manifestsrc, pakpath )
36 |     else
37 |       args.each do |arg|
38 |         data = YAML.load_file( arg )
39 |         name = File.basename( arg, '.*' )
40 |         puts "#{name}:"
41 |         pp data
42 |         Templater.new.merge_pak( manifestsrc, pakpath, Ctx.new(data).ctx, name )
43 |       end
44 |     end
45 | 
46 |   end
47 | 
48 | end # class Gen
49 | end # module Pakman
50 | 


--------------------------------------------------------------------------------
/pakman/test/test_liquid_binaries.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_liquid_binaries.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | 
11 | class TestLiquidBinaries < MiniTest::Test
12 | 
13 | 
14 | def setup
15 |   Liquid::Template.error_mode = :strict
16 | end
17 | 
18 | 
19 | def test_rx
20 |     rx = Pakman::LiquidTemplater::REGEX_EXT
21 | 
22 |     pp rx
23 | 
24 |     ## todo: check why assert rx.match( 'test.html' ) == true doesn't work
25 |     ##  (note: regex.match will return MatchData or nil)
26 | 
27 |     assert rx.match( 'test.html' ).nil? == false
28 |     assert rx.match( 'TEST.HTML' ).nil? == false
29 |     assert rx.match( 'test.js' ).nil?   == false
30 |     assert rx.match( 'test.json' ).nil? == false
31 |     assert rx.match( 'test.gif' ).nil?  == true
32 | end
33 | 
34 | 
35 | def test_merge
36 |   hash = YAML.load_file( "#{Pakman.root}/test/data/test.yml" )
37 |   ctx= { 'headers' => hash['headers'], 'slides' => hash['slides'] }
38 |   pp ctx
39 | 
40 |   manifestsrc = "#{Pakman.root}/test/liquid/pak/testbin.txt"
41 |   outpath = "#{Pakman.root}/tmp/#{Time.now.to_i}"    ## pakpath/output path
42 | 
43 |   Pakman::LiquidTemplater.new.merge_pak( manifestsrc, outpath, ctx, 'test' )
44 | 
45 |   assert true
46 | end  # method test_merge
47 | 
48 | end # class TestLiquidBinaries
49 | 


--------------------------------------------------------------------------------
/textutils/lib/textutils/filter/code_filter.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module TextUtils
 4 |   module Filter
 5 | 
 6 |   def code_block_curly_style( content, options={} )
 7 |     # replace {{{  w/ <pre class='code'>
 8 |     # replace }}}  w/ </pre>
 9 |     # use 4-6 { or } to escape back to literal value (e.g. {{{{ or {{{{{{ => {{{ )
10 |     # note: {{{ / }}} are anchored to beginning of line ( spaces and tabs before {{{/}}}allowed )
11 |     
12 |     # track statistics
13 |     code_begin     = 0
14 |     code_begin_esc = 0
15 |     code_end       = 0
16 |     code_end_esc   = 0
17 |         
18 |     content.gsub!( /^[ \t]*(\{{3,6})/ ) do |match|
19 |       escaped = ($1.length > 3)
20 |       if escaped
21 |         code_begin_esc += 1
22 |         "{{{"
23 |       else
24 |         code_begin += 1
25 |         "<pre class='code'>"
26 |       end
27 |     end
28 |     
29 |     content.gsub!( /^[ \t]*(\}{3,6})/ ) do |match|
30 |       escaped = ($1.length > 3)
31 |       if escaped
32 |         code_end_esc += 1
33 |         "}}}"
34 |       else
35 |         code_end += 1
36 |         "</pre>"
37 |       end
38 |     end
39 |         
40 |     puts "  Patching {{{/}}}-code blocks (#{code_begin}/#{code_end} blocks, " +
41 |          "#{code_begin_esc}/#{code_end_esc} escaped blocks)..."
42 |     
43 |     content
44 |   end
45 | 
46 |   end  # module Filter
47 | end   # module TextUtils


--------------------------------------------------------------------------------
/linkto/README.md:
--------------------------------------------------------------------------------
 1 | # linkto
 2 | 
 3 | linkto gem - link_to helpers for google search, bing search, flickr photo search, flickr photo tag, etc.
 4 | 
 5 | * home  :: [github.com/rubylibs/linkto](https://github.com/rubylibs/linkto)
 6 | * bugs  :: [github.com/rubylibs/linkto/issues](https://github.com/rubylibs/linkto/issues)
 7 | * gem   :: [rubygems.org/gems/linkto](https://rubygems.org/gems/linkto)
 8 | * rdoc  :: [rubydoc.info/gems/linkto](http://rubydoc.info/gems/linkto)
 9 | 
10 | 
11 | ## Usage
12 | 
13 |     link_to_google_search 'open mundi' 
14 | 
15 | will become
16 | 
17 |     https://www.google.com/search?q=open+mundi
18 | 
19 | 
20 | ### Google
21 | 
22 | - `link_to_google_search`
23 | - `link_to_google_de_search`
24 | - `link_to_google_search_images`
25 | 
26 | ### Bing
27 | 
28 | - `link_to_bing_search_images`
29 | 
30 | ### Flickr
31 | 
32 | - `link_to_flickr_tags`
33 | - `link_to_flickr_search`
34 | 
35 | ### Wikipedia
36 | 
37 | - `link_to_wikipedia_search`
38 | - `link_to_wikipedia_de_search`
39 | 
40 | ### Untappd
41 | 
42 | - `link_to_untappd_search`
43 | 
44 | 
45 | 
46 | ## Real World Usage
47 | 
48 | - [beer.db.admin](https://github.com/geraldb/beer.db.admin) - open source world beer guide; beer.db browser
49 | 
50 | 
51 | ## Alternatives
52 | 
53 | 
54 | ## License
55 | 
56 | The `linkto` scripts are dedicated to the public domain.
57 | Use it as you please with no restrictions whatsoever.
58 | 


--------------------------------------------------------------------------------
/textutils/lib/textutils/utils.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module TextUtils
 4 |   # make helpers available as class methods e.g. TextUtils.convert_unicode_dashes_to_plain_ascii
 5 |   extend UnicodeHelper
 6 |   extend TitleHelper
 7 |   extend AddressHelper
 8 |   
 9 |   extend StringFilter # adds asciify and slugify
10 | end
11 | 
12 | 
13 | 
14 | def title_esc_regex( title_unescaped )
15 |   puts "*** warn: depreceated fn call: use TextUtils.title_esc_regex() or include TextUtils::TitleHelpers"
16 |   TextUtils.title_esc_regex( title_unescaped )
17 | end
18 | 
19 | 
20 | def find_data_path_from_gemfile_gitref( name )
21 |   puts "[textutils] find_data_path( name='#{name}' )..."
22 |   puts "load path:"
23 |   pp $LOAD_PATH
24 | 
25 |   # escape chars for regex e.g. . becomes \.
26 |   name_esc = name.gsub( '.', '\.' )
27 | 
28 | 
29 |   # note:
30 |   #  - hexdigest must be 12 chars e.g. b7d1c9619a54 or similar
31 |   
32 |   # e.g. match /\/(beer\.db-[a-z0-9]+)|(beer\.db)\//
33 | 
34 |   name_regex = /\/((#{name_esc}-[a-z0-9]{12})|(#{name_esc}))\/lib$/
35 |   candidates = []
36 |   $LOAD_PATH.each do |path|
37 |     if path =~ name_regex
38 |       # cutoff trailing /lib
39 |       candidates << path[0..-5]
40 |     end
41 |   end
42 | 
43 |   puts 'found candidates:'
44 |   pp candidates
45 | 
46 |   ## use first candidate
47 |   candidates[0]
48 | end
49 | 
50 | 


--------------------------------------------------------------------------------
/textutils/test/test_slugify.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | require 'helper'
 4 | 
 5 | class TestSlugify < Minitest::Test
 6 | 
 7 |   def test_slugify
 8 | 
 9 |     txt_io = [
10 |       [ 'São Paulo',   'sao-paulo' ],
11 |       [ 'São Gonçalo', 'sao-goncalo' ],
12 |       [ 'Výčepní',     'vycepni' ],
13 |       [ 'Żubr', 'zubr' ],
14 |       [ 'Żywiec', 'zywiec' ],
15 |       [ 'Lomża Export', 'lomza-export' ],
16 |       [ 'Nogne Ø Imperial Stout', 'nogne-o-imperial-stout' ],
17 |       [ 'Xyauyù', 'xyauyu' ],
18 |       [ 'Águila', 'aguila' ],
19 |       [ 'Arena Amazônia', 'arena-amazonia' ],
20 |       [ 'Tōkyō', 'tokyo' ],
21 |       [ 'Ōsaka', 'osaka' ],
22 |       [ 'El Djazaïr', 'el-djazair' ],
23 |       [ 'Al-Kharṭūm', 'al-khartum' ],
24 |       [ 'Ṭarābulus', 'tarabulus' ],
25 |       [ 'Al-Iskandarīyah', 'al-iskandariyah' ],
26 |       [ 'Pishōr', 'pishor' ],
27 |       [ 'Pishāwar', 'pishawar' ],
28 |       [ 'Islām ābād', 'islam-abad' ],
29 |       [ 'Thành Phố Hồ Chí Minh', 'thanh-pho-ho-chi-minh' ],
30 |       [ 'Åland Islands', 'aland-islands' ],
31 |       [ "Pe\u{030C}awar", 'pexawar']  ## note: use unicode literal; Pex̌awar  -- see en.wikipedia.org/wiki/Peshawar
32 |     ]
33 | 
34 |     txt_io.each do |txt|
35 |       assert_equal txt[1], TextUtils.slugify( txt[0] )
36 |     end
37 |   end # method test_slugify
38 | 
39 | 
40 | end # class TestSlugify
41 | 


--------------------------------------------------------------------------------
/attic/values_reader_v2.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | # fix: move into TextUtils namespace/module!!
 4 | 
 5 | ## todo/fix: find a better name than HashReaderV2 (HashReaderPlus?) ??
 6 | 
 7 | class ValuesReaderV2
 8 |   include LogUtils::Logging
 9 | 
10 |   def initialize( name, include_path, more_attribs={} )
11 |     @name          = name
12 |     @include_path  = include_path
13 |     @more_attribs  = more_attribs
14 |     
15 |     # map name to name_real_path
16 |     #   name might include !/ for virtual path (gets cut off)
17 |     #   e.g. at-austria!/w-wien/beers becomse w-wien/beers
18 | 
19 |     pos = @name.index( '!/')
20 |     if pos.nil?
21 |       @name_real_path = @name   # not found; real path is the same as name
22 |     else
23 |       # cut off everything until !/ e.g.
24 |       #   at-austria!/w-wien/beers becomes
25 |       #   w-wien/beers
26 |       @name_real_path = @name[ (pos+2)..-1 ]
27 |     end
28 |   end
29 | 
30 |   attr_reader :name
31 |   attr_reader :name_real_path
32 |   attr_reader :include_path
33 |   attr_reader :more_attribs
34 | 
35 |   def each_line
36 |     path          = "#{include_path}/#{name_real_path}.txt"
37 |     reader        = ValuesReader.new( path, more_attribs )
38 | 
39 |     logger.info "parsing data '#{name}' (#{path})..."
40 | 
41 |     reader.each_line do |attribs, values|
42 |       yield( attribs, values )
43 |     end
44 | 
45 |     ConfDb::Model::Prop.create_from_fixture!( name, path )
46 |   end
47 | 
48 | end # class ValuesReaderV2
49 | 
50 | 


--------------------------------------------------------------------------------
/pakman/lib/pakman.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | # Note: for local testing run like:
 5 | #
 6 | # 1.9.x: ruby -Ilib lib/pakman.rb
 7 | 
 8 | # core and stlibs
 9 | 
10 | require 'yaml'
11 | require 'pp'
12 | require 'erb'
13 | require 'logger'
14 | require 'optparse'
15 | require 'fileutils'
16 | 
17 | # rubygems
18 | 
19 | require 'logutils'
20 | require 'fetcher'   # fetch (download) files
21 | 
22 | 
23 | # 3rd party rubygems
24 | require 'liquid'
25 | 
26 | # our own code
27 | 
28 | require 'pakman/copier'
29 | require 'pakman/fetcher'
30 | require 'pakman/finder'
31 | require 'pakman/manifest'
32 | 
33 | require 'pakman/erb/template'
34 | require 'pakman/erb/templater'
35 | 
36 | require 'pakman/liquid/template'
37 | require 'pakman/liquid/templater'
38 | 
39 | require 'pakman/page'
40 | require 'pakman/utils'
41 | require 'pakman/version'
42 | 
43 | require 'pakman/cli/ctx'
44 | require 'pakman/cli/helpers'
45 | require 'pakman/cli/opts'
46 | require 'pakman/cli/runner'
47 | require 'pakman/cli/commands/fetch'
48 | require 'pakman/cli/commands/gen'
49 | require 'pakman/cli/commands/list'
50 | 
51 | 
52 | module Pakman
53 | 
54 |   def self.banner
55 |     "pakman #{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
56 |   end
57 | 
58 |   def self.root
59 |     "#{File.expand_path( File.dirname(File.dirname(__FILE__)) )}"
60 |   end
61 | 
62 |   def self.main
63 |     Runner.new.run(ARGV)
64 |   end
65 | 
66 | end  # module Pakman
67 | 
68 | 
69 | Pakman.main if __FILE__ == $0
70 | 


--------------------------------------------------------------------------------
/pakman/test/test_liquid.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_liquid.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | 
11 | class TestLiquid < MiniTest::Test
12 | 
13 | 
14 | def setup
15 |   Liquid::Template.error_mode = :strict
16 | end
17 | 
18 | 
19 | def test_template
20 |   hash = YAML.load_file( "#{Pakman.root}/test/data/test.yml" )
21 |   ctx= { 'headers' => hash['headers'], 'slides' => hash['slides'] }
22 |   pp ctx
23 |     
24 |   path = "#{Pakman.root}/test/liquid/test.html"
25 |   t = Pakman::LiquidTemplate.from_file( path )
26 |   pp t.render( ctx )
27 |     
28 |   assert true
29 | end
30 | 
31 | def test_page_template
32 |   hash = YAML.load_file( "#{Pakman.root}/test/data/test.yml" )
33 |   ctx= { 'headers' => hash['headers'], 'slides' => hash['slides'] }
34 |   pp ctx
35 |     
36 |   path = "#{Pakman.root}/test/liquid/pak/test.html"
37 |   t = Pakman::LiquidPageTemplate.from_file( path )
38 |   pp t.render( ctx )
39 |     
40 |   assert true
41 | end
42 | 
43 | def test_merge
44 |   hash = YAML.load_file( "#{Pakman.root}/test/data/test.yml" )
45 |   ctx= { 'headers' => hash['headers'], 'slides' => hash['slides'] }
46 |   pp ctx
47 | 
48 |   manifestsrc = "#{Pakman.root}/test/liquid/pak/test.txt"
49 |   outpath = "#{Pakman.root}/tmp/#{Time.now.to_i}"    ## pakpath/output path
50 |   
51 |   Pakman::LiquidTemplater.new.merge_pak( manifestsrc, outpath, ctx, 'test' )
52 | 
53 |   assert true
54 | end  # method test_merge
55 | 
56 | end # class TestLiquid
57 | 
58 | 


--------------------------------------------------------------------------------
/pakman/lib/pakman/finder.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module Pakman
 4 | 
 5 | class Finder
 6 | 
 7 |   include LogUtils::Logging
 8 | 
 9 | 
10 |   def find_manifests( patterns, excludes=[] )
11 |     manifests = []
12 | 
13 |     patterns.each do |pattern|
14 |       pattern.gsub!( '\\', '/')  # normalize path; make sure all path use / only
15 |       logger.debug "Checking >#{pattern}<"
16 |       Dir.glob( pattern ) do |file|
17 |         logger.debug "  Found manifest candidate >#{file}<"
18 |         if File.directory?( file ) # NB: do not include directories
19 |           logger.debug "  Skipping match; it's a directory"
20 |         else
21 |           unless exclude?( file, excludes )  # check for excludes; skip if excluded
22 |             logger.debug "  Adding match >#{file}<"
23 | 
24 |             ## todo/fix:
25 |             # array first entry - downcase and gsub('.txt','') ??
26 |             # use Pakman.pakname_from_file()
27 | 
28 |             manifests << [ File.basename( file ), file ]
29 |           end
30 |         end
31 |       end
32 |     end
33 | 
34 |     manifests
35 |   end
36 | 
37 | private
38 |   def exclude?( file, excludes )
39 |     excludes.each do |pattern|
40 |       ## todo: FNM_DOTMATCH helps or not?? (make up some tests??)
41 |       if File.fnmatch?( pattern, file, File::FNM_CASEFOLD | File::FNM_DOTMATCH )
42 |         logger.debug "  Skipping match; it's excluded by pattern >#{pattern}<"
43 |         return true
44 |       end
45 |     end
46 |     false
47 |   end
48 | 
49 | end # class Finder
50 | end # module Pakman
51 | 


--------------------------------------------------------------------------------
/textutils/lib/textutils/sanitizier.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module TextUtils
 4 | 
 5 | class Sanitizier
 6 | 
 7 |   include LogUtils::Logging
 8 | 
 9 |   @@ignore_tags = %w{ head script style }
10 |   @@inline_tags = %w{ span b i u }
11 |   @@block_tags  = %w{ p div ul ol }
12 | 
13 | 
14 |   def initialize( ht )
15 |     @ht = ht  # hypertext (html source)
16 |   end
17 | 
18 |   def to_plain_text
19 |     
20 |     ht = @ht
21 |     ht = handle_ignore_tags( ht )
22 | 
23 | ## handle_pre_tags ??  - special rule for preformatted (keep whitespace)
24 | 
25 |     ht = handle_inline_tags( ht )
26 |     ht = handle_block_tags( ht )
27 |     ht = handle_other_tags( ht )  # rules for remain/left over tags
28 | 
29 |     ht = handle_entities( ht )
30 | 
31 |     ht
32 |   end
33 | 
34 |   def handle_entities( ht )
35 |     ## unescape entities
36 |     #  - check if it also works for generic entities like &#20; etc.
37 |     #  or only for &gt; &lt; etc.
38 |     ht = CGI.unescapeHTML( ht )
39 |   end
40 | 
41 |   def tag_regex( tag )
42 |     # note use non-greedy .*? for content
43 | 
44 |     /<#{tag}[^>]*>(.*?)<\/#{tag}>/mi
45 |   end
46 | 
47 |   def handle_ignore_tags( ht )
48 |     @@ignore_tags.each do |tag|
49 |       ht.gsub!( tag_regex(tag), '' )
50 |     end
51 |     ht
52 |   end
53 | 
54 |   def handle_inline_tags( ht )
55 |     @@inline_tags.each do |tag|
56 |       # add a space after
57 |       ht.gsub!( tag_regex(tag), '\1 ' )
58 |     end
59 |     ht
60 |   end
61 | 
62 |   def handle_block_tags( ht )
63 |     @@block_tags.each do |tag|
64 |       ht.gsub!( tag_regex(tag), "\n\1\n" )
65 |     end
66 |     ht
67 |   end
68 | 
69 | 
70 | end # class Sanitizier
71 | 
72 | end # module TextUtils
73 | 


--------------------------------------------------------------------------------
/attic/hash_reader_v2.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | # fix: move into TextUtils namespace/module!!
 4 | 
 5 | ## todo/fix: find a better name than HashReaderV2 (HashReaderPlus?) ??
 6 | 
 7 | class HashReaderV2
 8 |   include LogUtils::Logging
 9 | 
10 |   def initialize( name, include_path )
11 |     @name          = name
12 |     @include_path  = include_path
13 | 
14 |     # map name to name_real_path
15 |     #   name might include !/ for virtual path (gets cut off)
16 |     #   e.g. at-austria!/w-wien/beers becomse w-wien/beers
17 | 
18 |     pos = @name.index( '!/')
19 |     if pos.nil?
20 |       @name_real_path = @name   # not found; real path is the same as name
21 |     else
22 |       # cut off everything until !/ e.g.
23 |       #   at-austria!/w-wien/beers becomes
24 |       #   w-wien/beers
25 |       @name_real_path = @name[ (pos+2)..-1 ]
26 |     end
27 |   end
28 | 
29 |   attr_reader :name
30 |   attr_reader :name_real_path
31 |   attr_reader :include_path
32 | 
33 |   def each
34 |     path          = "#{include_path}/#{name_real_path}.yml"
35 |     reader        = HashReader.from_file( path )
36 | 
37 |     logger.info "parsing data '#{name}' (#{path})..."
38 | 
39 |     reader.each do |key, value|
40 |       yield( key, value )
41 |     end
42 | 
43 |     ConfDb::Model::Prop.create_from_fixture!( name, path )
44 |   end
45 | 
46 | 
47 |   def each_typed
48 |     path          = "#{include_path}/#{name_real_path}.yml"
49 |     reader        = HashReader.from_file( path )
50 | 
51 |     logger.info "parsing data '#{name}' (#{path})..."
52 | 
53 |     reader.each_typed do |key, value|
54 |       yield( key, value )
55 |     end
56 | 
57 |     ConfDb::Model::Prop.create_from_fixture!( name, path )
58 |   end
59 | 
60 | 
61 | end # class HashReaderV2
62 | 


--------------------------------------------------------------------------------
/pakman/lib/pakman/page.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module Pakman
 4 | 
 5 | 
 6 | ## Jekyll-style page
 7 | ##   with optional front-matter (yaml block)
 8 | 
 9 | class Page
10 | 
11 |   def self.from_file( path )
12 |     puts "  Loading page (from file) >#{path}<..."
13 |     text = File.open( path, 'r:bom|utf-8' ).read     ## note: assume utf8
14 |     self.new( text, path: path )   ## note: pass along path as an option
15 |   end
16 | 
17 |   def self.from_string( text )  ### use parse as alias - why?? why not??
18 |     self.new( text )
19 |   end
20 | 
21 |   attr_reader :contents
22 |   attr_reader :headers
23 | 
24 |   ## has headers/metadata (front matter block) - yes/no - use hash for check for now
25 |   def headers?()  @headers.kind_of?( Hash ); end
26 | 
27 |   ## check if \s includes newline too?
28 |   ## fix/check ^ - just means start of newline (use /A or something --- MUST always be first
29 |   ##
30 |   ##  note: include --- in headers
31 |   ##    e.g. ---  results in nil
32 |   ##         empty string (without leading ---) results in false! (we want nil if no headers for empty block)
33 |   HEADERS_PATTERN = /
34 |       ^(?<headers>---\s*\n
35 |          .*?)
36 |       ^(---\s*$\n?)
37 |      /xm
38 | 
39 |   def initialize( text, opts={} )
40 |     ## todo/fix: check regex in jekyll (add link to source etc.)
41 |     if m=HEADERS_PATTERN.match( text )
42 |       @contents  = m.post_match
43 |       pp m
44 |       pp m[:headers]
45 |       @headers  = YAML.load( m[:headers] )
46 |       pp @headers
47 |       @headers = {}  if @headers.nil?  ##  check if headers is nil use/assign empty hash
48 |     else
49 |       @contents = text
50 |       @headers  = nil
51 |     end
52 |   end
53 | 
54 | end # class Page
55 | end # module Pakman
56 | 


--------------------------------------------------------------------------------
/pakman/lib/pakman/erb/templater.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module Pakman
 4 | 
 5 | ### todo:
 6 | ##  rename to ErbTemplater  (or RubyTemplater) - why? why not?
 7 | 
 8 | 
 9 | class Templater
10 | 
11 |   include LogUtils::Logging
12 | 
13 | 
14 |   def merge_pak( manifestsrc, pakpath, binding, name )
15 | 
16 |     start = Time.now
17 | 
18 |     pakname = Pakman.pakname_from_file( manifestsrc )
19 | 
20 |     logger.info "Merging template pack '#{pakname}'"
21 | 
22 |     # todo: rename to load_file once depreated API got removed
23 |     manifest = Manifest.load_file_v2( manifestsrc )
24 | 
25 |     manifest.each do |entry|
26 |       dest   = entry[0]
27 |       source = entry[1]
28 | 
29 |       if dest =~ /__file__/   # replace w/ name
30 |         dest = dest.gsub( '__file__', name )
31 |       end
32 | 
33 |       # get full (absolute) path and make sure path exists
34 |       destfull = File.expand_path( dest, pakpath )
35 |       destpath = File.dirname( destfull )
36 |       FileUtils.makedirs( destpath ) unless File.directory?( destpath )
37 | 
38 |       logger.debug "destfull=>#{destfull}<"
39 |       logger.debug "destpath=>#{destpath}<"
40 | 
41 |       if source =~ /\.erb\.|.erb$/
42 |         logger.info "  Merging to #{dest}..."
43 | 
44 |         out = File.new( destfull, 'w+:utf-8' )   ## note: use utf8 (by default)
45 |         out << ErbTemplate.from_file( source ).render( binding )
46 |         out.flush
47 |         out.close
48 |       else
49 |         logger.info "  Copying to #{dest} from #{source}..."
50 | 
51 |         FileUtils.copy( source, destfull )
52 |       end
53 |     end # each entry in manifest
54 | 
55 |     logger.info "Done (in #{Time.now-start} s)."
56 |   end # method merge_pak
57 | 
58 | end # class Templater
59 | end # module Pakman
60 | 


--------------------------------------------------------------------------------
/textutils/lib/textutils/filter/comment_filter.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module TextUtils
 4 |   module Filter
 5 | 
 6 |   def comments_percent_style( content, options={} )
 7 | 
 8 |     # remove comments
 9 |     # % comments
10 |     # %begin multiline comment
11 |     # %end multiline comment
12 | 
13 |     # track statistics
14 |     comments_multi  = 0
15 |     comments_single = 0
16 |     comments_end    = 0
17 | 
18 |     # remove multi-line comments
19 |     content.gsub!(/^%(begin|comment|comments).*?%end/m) do |match|
20 |       comments_multi += 1
21 |       ""
22 |     end
23 |     
24 |      # remove everyting starting w/ %end (note, can only be once in file) 
25 |     content.sub!(/^%end.*/m) do |match|
26 |       comments_end += 1
27 |       ""
28 |     end
29 | 
30 |     # hack/note: 
31 |     #  note multi-line erb expressions/stmts might cause trouble
32 |     #  
33 |     #  %> gets escaped as special case (not treated as comment)
34 |     # <%
35 |     #   whatever
36 |     # %> <!-- trouble here; would get removed as comment!
37 |     #  todo: issue warning?
38 |     
39 |     # remove single-line comments    
40 |     content.gsub!(/(^%$)|(^%[^>].*)/ ) do |match|
41 |       comments_single += 1
42 |       ""
43 |     end
44 |     
45 |     puts "  Removing %-comments (#{comments_single} lines, " +
46 |        "#{comments_multi} begin/end-blocks, #{comments_end} end-blocks)..."
47 |     
48 |     content
49 |   end
50 | 
51 |   def skip_end_directive( content, options={} )
52 |     # codex-style __SKIP__, __END__ directive
53 |     # ruby note: .*? is non-greedy (shortest-possible) regex match
54 |     content.gsub!(/__SKIP__.*?__END__/m, '')
55 |     content.sub!(/__END__.*/m, '')
56 |     content
57 |   end
58 | 
59 | 
60 |   end  # module Filter
61 | end   # module TextUtils


--------------------------------------------------------------------------------
/textutils/lib/textutils/helper/unicode_helper.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | module TextUtils
 5 |   module UnicodeHelper
 6 | 
 7 |   # NB:
 8 |   #  U_HYPHEN_MINUS is standard ascii hyphen/minus e.g. - 
 9 |   #
10 |   #  see en.wikipedia.org/wiki/Dash
11 | 
12 |   U_HYPHEN              = "\u2010"  # unambigous hyphen
13 |   U_NON_BREAKING_HYPHEN = "\u2011"  # unambigous non-breaking hyphen
14 |   U_MINUS               = "\u2212"  # unambigous minus sign (html => &minus;)
15 |   U_NDASH               = "\u2013"  # ndash (html => &ndash; ascii => --)
16 |   U_MDASH               = "\u2014"  # mdash (html => &mdash; ascii => ---)
17 | 
18 |   def convert_unicode_dashes_to_plain_ascii( text, opts = {} )
19 | 
20 |     text = text.gsub( /(#{U_HYPHEN}|#{U_NON_BREAKING_HYPHEN}|#{U_MINUS}|#{U_NDASH}|#{U_MDASH})/ ) do |_|
21 | 
22 |       # puts "found U+#{'%04X' % $1.ord} (#{$1})"
23 | 
24 |       msg = ''
25 | 
26 |       if $1 == U_HYPHEN
27 |         msg << "found hyhpen U+2010 (#{$1})"
28 |       elsif $1 == U_NON_BREAKING_HYPHEN
29 |         msg << "found non_breaking_hyhpen U+2011 (#{$1})"
30 |       elsif $1 == U_MINUS
31 |         msg << "found minus U+2212 (#{$1})"
32 |       elsif $1 == U_NDASH
33 |         msg << "found ndash U+2013 (#{$1})"
34 |       elsif $1 == U_MDASH
35 |         msg << "found mdash U+2014 (#{$1})"
36 |       else
37 |         msg << "found unknown unicode dash U+#{'%04X' % $1.ord} (#{$1})"
38 |       end
39 | 
40 |       msg << " in file >#{opts[:path]}<"   if opts[:path]
41 |       msg << "; converting to plain ascii hyphen_minus (-)"
42 |   
43 |       puts "*** warning: #{msg}"
44 | 
45 |       '-'
46 |     end
47 | 
48 |     text
49 |   end # method convert_unicode_dashes_to_plain_ascii
50 | 
51 | 
52 |   end # module UnicodeHelper
53 | end # module TextUtils
54 | 


--------------------------------------------------------------------------------
/textutils/test/test_asciify.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_asciify.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | 
11 | class TestAsciify < Minitest::Test
12 | 
13 |   def test_asciify
14 | 
15 |     txt_io = [
16 |       [ 'São Paulo',   'Sao Paulo' ],
17 |       [ 'São Gonçalo', 'Sao Goncalo' ],
18 |       [ 'Výčepní',     'Vycepni' ],
19 |       [ 'Żubr', 'Zubr' ],
20 |       [ 'Żywiec', 'Zywiec' ],
21 |       [ 'Lomża Export', 'Lomza Export' ],
22 |       [ 'Nogne Ø Imperial Stout', 'Nogne O Imperial Stout' ],
23 |       [ 'Xyauyù', 'Xyauyu' ],
24 |       [ 'Águila', 'Aguila' ],
25 |       [ 'Arena Amazônia', 'Arena Amazonia' ],
26 |       [ 'Tōkyō', 'Tokyo' ],
27 |       [ 'Ōsaka', 'Osaka' ],
28 |       [ 'El Djazaïr', 'El Djazair' ],
29 |       [ 'Al-Kharṭūm', 'Al-Khartum' ],
30 |       [ 'Ṭarābulus', 'Tarabulus' ],
31 |       [ 'Al-Iskandarīyah', 'Al-Iskandariyah' ],
32 |       [ 'Pishōr', 'Pishor' ],
33 |       [ 'Pishāwar', 'Pishawar' ],
34 |       [ 'Islām ābād', 'Islam abad' ],
35 |       [ 'Thành Phố Hồ Chí Minh', 'Thanh Pho Ho Chi Minh' ],
36 |       [ 'Åland Islands', 'Aland Islands' ],
37 |       [ 'Bistrița', 'Bistrita' ],
38 |       [ 'Piatra-Neamț', 'Piatra-Neamt' ],
39 |       [ 'Constanța', 'Constanta' ],
40 |       [ 'Galați', 'Galati' ],
41 |       [ 'Reșița', 'Resita' ],
42 |       [ 'Chișinău', 'Chisinau' ],
43 |       [ "Pe\u{030C}awar", 'Pexawar'],  ## note: use unicode literal; Pex̌awar  -- see en.wikipedia.org/wiki/Peshawar
44 |       [ 'Übelbach', 'Uebelbach' ]
45 |     ]
46 | 
47 |     txt_io.each do |txt|
48 |       assert_equal txt[1], TextUtils.asciify( txt[0] )
49 |     end
50 |   end # method test_asciify
51 | 
52 | 
53 | end # class TestAsciify
54 | 
55 | 


--------------------------------------------------------------------------------
/textutils/test/test_title_mapper.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | require 'helper'
 5 | 
 6 | 
 7 | class TestTitleMapper < Minitest::Test
 8 | 
 9 |   WineryStruct =  Struct.new(:key, :title, :synonyms)
10 | 
11 |   def test_title_table
12 | 
13 |     ### todo/fix: auto-add year and remove (1971) or (????) etc. from title!!!!
14 | 
15 |     titles_in = [
16 |       WineryStruct.new( 'antonbauer',     'Anton Bauer (1971)' ),
17 |       WineryStruct.new( 'josefbauer',      'Weingut Josef Bauer', 'Joe Bauer|Josef Bauer (????)' ),
18 |       WineryStruct.new( 'bernhardott',     'Weingut Ott', 'Weingut Bernhard Ott|Bernhard Ott (1972)' ),
19 |       WineryStruct.new( 'andreaspolsterer', 'Weingut Andreas B. Polsterer', 'Andreas B. Polsterer (1970)' )
20 |     ]
21 | 
22 |     ## note: for regex the following must get escaped
23 |     #   (  => \(
24 |     #   )  => \)
25 |     #   .  => \.
26 |     #   ?  => \?
27 | 
28 |     titles_out2 = [
29 |       ['antonbauer',       [ 'Anton Bauer \(1971\)', 'Anton Bauer']],
30 |       ['josefbauer',       [ 'Weingut Josef Bauer', 'Josef Bauer \(\?\?\?\?\)', 'Josef Bauer', 'Joe Bauer' ]],
31 |       ['bernhardott',      [ 'Weingut Bernhard Ott', 'Bernhard Ott \(1972\)', 'Bernhard Ott', 'Weingut Ott' ]],
32 |       ['andreaspolsterer', [ 'Weingut Andreas B\. Polsterer', 'Andreas B\. Polsterer \(1970\)', 'Andreas B\. Polsterer' ]]
33 |     ]
34 | 
35 |     mapper = TextUtils::TitleMapper.new( titles_in, 'winery' )
36 |     titles_out = mapper.known_titles
37 | 
38 |     puts 'titles_out:'
39 |     pp titles_out
40 |     puts titles_out.to_s
41 | 
42 |     puts 'titles_out2:'
43 |     pp titles_out2
44 |     puts titles_out.to_s
45 | 
46 |     assert_equal titles_out2.to_s, titles_out.to_s
47 | 
48 |   end # method test_title_table
49 | 
50 | 
51 | end # class TestTitleMapper
52 | 


--------------------------------------------------------------------------------
/textutils/lib/textutils/reader/block_reader.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | # fix: move into TextUtils namespace/module!!
 5 | 
 6 | class BlockReader
 7 | 
 8 |   include LogUtils::Logging
 9 | 
10 |   def self.from_file( path )
11 |     ## nb: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
12 |     ## - see textutils/utils.rb
13 |    text = File.read_utf8( path )
14 |    self.from_string( text )
15 |   end
16 | 
17 |   def self.from_string( text )
18 |     self.new( text )
19 |   end
20 | 
21 |   def initialize( text )
22 |     @text = text
23 |   end
24 | 
25 |   def read
26 |     ## note returns an array of (line) strings e.g.
27 |     ## [
28 |     ##  "line1\nline2",         ## -- block1
29 |     ##  "line1\nline2\nline3"   ## -- block2
30 |     ## ]
31 | 
32 |     blocks = []
33 |     buf = ""
34 | 
35 |     @text.each_line do |line|
36 |        # comments allow:
37 |        # 1) ##### (shell/ruby style)
38 |        if line =~ /^\s*#/ 
39 |           # skip komments and do NOT copy to result (keep comments secret!)
40 |          logger.debug 'skipping comment line'
41 |          next
42 |        end
43 | 
44 | #       if line =~ /^\s*$/
45 | #         # kommentar oder leerzeile überspringen
46 | #         logger.debug 'skipping blank line'
47 | #         next
48 | #       end
49 | 
50 |        # pass 2) remove leading and trailing whitespace
51 |        line = line.strip
52 | 
53 |        if line =~ /^-{3,}$/   ## three or more lines
54 |          logger.debug 'block separator'
55 |          blocks << buf.strip   ## note: strip leading and trailing whitespace
56 |          buf = ""
57 |        else
58 |          buf << "#{line}\n"
59 |        end
60 |     end # each lines
61 | 
62 |     blocks << buf.strip ## note: strip leading and trailing whitespace
63 |     blocks
64 |   end # method read
65 | 
66 | end # class BlockReader
67 | 
68 | 


--------------------------------------------------------------------------------
/textutils/test/test_title_finder.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | # to run use
 5 | # ruby -I ./lib -I ./test test/test_title_finder.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | 
11 | class TestTitleFinder < Minitest::Test
12 | 
13 |   include TextUtils::ValueHelper   #  lets us use find_grade, etc.
14 | 
15 | 
16 |   def test_find_key_n_title
17 |     attribs, _ = find_key_n_title( ['München [Munich]'] )  ## skip returned more_values (use _)
18 |     assert_equal 'muenchen',  attribs[:key]
19 |     assert_equal 'München',   attribs[:title]
20 |     assert_equal '[Munich]',  attribs[:synonyms]
21 |   end
22 | 
23 |   def test_find_key_n_title_w_tree
24 |     attribs, _ = find_key_n_title( ['München [Munich] › Oberbayern › Bayern'] )  ## skip returned more_values (use _)
25 |     assert_equal 'muenchen',  attribs[:key]
26 |     assert_equal 'München',   attribs[:title]
27 |     assert_equal '[Munich]',  attribs[:synonyms]
28 |   end
29 | 
30 | 
31 |   def test_title_tokenizer
32 |     names = NameTokenizer.new.tokenize( 'München [Munich]' )
33 |     assert_equal 2, names.size
34 |     assert_equal 'München',  names[0]
35 |     assert_equal '[Munich]', names[1]
36 | 
37 |     names = NameTokenizer.new.tokenize( 'FC Bayern Muenchen|Bayern Muenchen|Bayern' )
38 |     assert_equal 3, names.size
39 |     assert_equal 'FC Bayern Muenchen', names[0]
40 |     assert_equal 'Bayern Muenchen',    names[1]
41 |     assert_equal 'Bayern',             names[2]
42 |   end
43 | 
44 |   def test_grade
45 |     assert_equal [1,'Anton Bauer'], find_grade( '*** Anton Bauer' )
46 |     assert_equal [2,'Anton Bauer'], find_grade( '** Anton Bauer' )
47 |     assert_equal [3,'Anton Bauer'], find_grade( '* Anton Bauer' )
48 |     assert_equal [4,'Anton Bauer'], find_grade( 'Anton Bauer' )
49 | 
50 |     assert_equal [1,'Anton Bauer'], find_grade( 'Anton Bauer ***' )
51 |   end
52 | 
53 | end # class TestTitleFinder
54 | 
55 | 


--------------------------------------------------------------------------------
/textutils/lib/textutils/parser/name_tokenizer.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | # fix: move into TextUtils namespace/module!! ??
 4 | 
 5 | 
 6 | class NameTokenizer   ## - rename to NameScanner, NameSplitter, NameSeparator, etc.
 7 | 
 8 |   ## split (single) string value into array of names
 9 |   ##   e.g.
10 |   ##   'München [Munich]'             => ['München', '[Munich]']
11 |   ##   'Wr. Neustadt | Wiener Neustadt' => ['Wr. Neustadt', 'Wiener Neustadt']
12 |   include LogUtils::Logging
13 | 
14 |   def tokenize( value )   ## rename to/use split - why? why not??
15 |     names = []
16 | 
17 |     # 1)  split by | (pipe) -- remove leading n trailing whitespaces
18 |     parts = value.split( /[ \t]*\|[ \t]*/ )
19 | 
20 |     # 2)  split "inline" translations e.g. München [Munich]
21 | 
22 |     ## todo: add support for  Munich [en]  e.g. trailing lang tag
23 |     ## todo: add support for bullet (official bi-lingual names w/ tags ??) - see brussels - why, why not??
24 | 
25 |     parts.each do |part|
26 |         s = StringScanner.new( part )
27 |         s.skip( /[ \t]+/)   # skip whitespaces
28 | 
29 |         while s.eos? == false
30 |           if s.check( /\[/ )
31 |             ## scan everything until the end of bracket (e.g.])
32 |             name = s.scan( /\[[^\]]+\]/)
33 |             ## todo/fix: if name nil - issue warning??
34 |             #  starting w/ [  but no closing ] found !!!! - possible? fix!!
35 |           else
36 |             ## scan everything until the begin of bracket (e.g.[)
37 |             name = s.scan( /[^\[]+/)
38 |             name = name.rstrip   ## remove trailing spaces (if present)
39 |           end
40 |           names << name
41 | 
42 |           s.skip( /[ \t]+/)  # skip whitespaces
43 |           logger.debug( "[NameTokenizer] eos?: #{s.eos?}, rest: >#{s.rest}<" )
44 |         end
45 |     end # each part
46 | 
47 |     logger.debug( "[NameTokenizer] names=#{names.inspect}")
48 |     names
49 |   end # method split
50 | end # class NameTokenizer
51 | 
52 | 


--------------------------------------------------------------------------------
/textutils/lib/textutils/patterns.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module TextUtils
 4 | 
 5 | # collection of regex patterns for reuse
 6 | 
 7 | ### todo: add a patterns.md page to  github ??
 8 | ##  - add regexper pics??
 9 | 
10 | ############
11 | # about ruby regexps
12 | #
13 | # try the rubular - Ruby regular expression editor and tester
14 | #  -> http://rubular.com
15 | #   code -> ??  by ??
16 | #
17 | #
18 | # Jeff Avallone's Regexper - Shows State-Automata Diagrams
19 | #  try -> http://regexper.com
20 | #    code -> https://github.com/javallone/regexper
21 | #
22 | #
23 | #  Regular Expressions | The Bastards Book of Ruby by Dan Nguyen
24 | # http://ruby.bastardsbook.com/chapters/regexes/
25 | #
26 | # move to notes  regex|patterns on  geraldb.github.io ??
27 | #
28 | 
29 |   EMPTY_LINE_PATTERN = '^\s*$'
30 |   
31 |   #################################
32 |   ### Start of Line Comment Patterns
33 |   
34 |   COMMENT_LINE_PATTERN = '^\s*#'   # e.g. Ruby/Shell style  starting w/  # this is a comment
35 | 
36 |   COMMENT_LINE_HASKELL_PATTERN = '^\s*--'   # e.g. Haskell/Ada? style starting w/ -- 
37 |   COMMENT_LINE_ALT_PATTERN = COMMENT_LINE_HASKELL_PATTERN
38 | 
39 |   COMMENT_LINE_TEX_PATTERN = '^\s*%'   # e.g. TeX/LaTeX style starting w/ %
40 |   COMMENT_LINE_ALT_II_PATTERN = COMMENT_LINE_TEX_PATTERN
41 | 
42 |   #############################
43 |   ### End of Line (EOL) Comment Patterns
44 | 
45 |   EOL_COMMENT_PATTERN = '\s+#.+$'    # fix: use \b word boundry instead of \s - why why not?
46 |   # why /b  - everything but a-z0-9, that is, spaces but also includes umlauts, special chars etc.
47 | 
48 |   ##############
49 |   ## Dates
50 |   #
51 |   # some info at www.regular-expressions.info/dates.html
52 | 
53 |   YYYY_STRICT_19_20_PATTERN = '(?:19|20)\d\d'
54 |   YYYY_STRICT_20_PATTERN = '20\d\d'
55 | 
56 |   MM_STRICT_PATTERN = '0[1-9]|1[012]'
57 |   M_STRICT_PATTERN =  '0?[1-9]|1[012]'
58 | 
59 |   DD_STRICT_PATTERN = '0[1-9]|[12][0-9]|3[01]'
60 |   D_STRICT_PATTERN =  '0?[1-9]|[12][0-9]|3[01]'
61 | 
62 |   ######
63 |   ## Time
64 | 
65 | 
66 | end # TextUtils


--------------------------------------------------------------------------------
/textutils/lib/textutils/helper/date_helper.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module TextUtils
 4 |   module DateHelper
 5 | 
 6 | 
 7 |   def time_ago_in_words( from_time )
 8 |     ## note:
 9 |     #  for code/source examples
10 |     #   check rails helper or padrino(sinatra) helper
11 |     #    for now this is just a simplified version e.g. no i18n
12 | 
13 |     from_time = from_time.to_time
14 |     to_time   = Time.now
15 |     
16 |     ### todo: will not handle future dates??
17 |     ## what todo do??
18 |     ## use -1..-50000000000 ??  "future"
19 | 
20 |     ## from_time, to_time = to_time, from_time if from_time > to_time
21 | 
22 |     distance_in_minutes = ((to_time - from_time)/60.0).round
23 | 
24 |     case distance_in_minutes
25 |       when 0..1             then  "just now"
26 |       when 2...45           then  "%d minutes ago" % distance_in_minutes
27 |       when 45...90          then  "an hour ago"   ## use one instead of 1 ?? why? why not?
28 |       # 90 mins up to 24 hours
29 |       when 90...1440        then  "%d hours ago" % (distance_in_minutes.to_f / 60.0).round
30 |       # 24 hours up to 42 hours
31 |       when 1440...2520      then "a day ago"   ## use one day ago - why? why not?
32 |       # 42 hours up to 30 days
33 |       when 2520...43200     then "%d days ago" % (distance_in_minutes.to_f / 1440.0).round
34 |       # 30 days up to 60 days
35 |       #  fix: use pluralize for months  - fix: change when - use just for a month ago
36 |       when 43200...86400    then "%d months ago" % (distance_in_minutes.to_f / 43200.0).round
37 |       # 60 days up to 365 days
38 |       when 86400...525600   then "%d months ago" % (distance_in_minutes.to_f / 43200.0).round
39 |       ## fix - add number of years ago
40 |       else                       "over a year ago"  #todo: use over a year ago???
41 |                                                     # fix: split into two - use
42 |                                                     #  1) a year ago
43 |                                                     #  2) (x) years ago
44 |     end
45 |   end
46 | 
47 |   end # module DateHelper
48 | end # module TextUtils
49 | 


--------------------------------------------------------------------------------
/pakman/lib/pakman/liquid/template.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module Pakman
 4 | 
 5 | 
 6 | class LiquidTemplate
 7 | 
 8 |   def self.from_file( path )
 9 |     puts "  Loading template (from file) >#{path}<..."
10 |     text = File.open( path, 'r:bom|utf-8' ).read     ## note: assume utf8
11 |     self.new( text, path: path )   ## note: pass along path as an option
12 |   end
13 | 
14 |   def self.from_string( text )  ### use parse as alias - why?? why not??
15 |     self.new( text )
16 |   end
17 | 
18 |   def initialize( text, opts={} )
19 |     @template = Liquid::Template.parse( text )   # parses and compiles the template
20 |   end
21 | 
22 |   def render( hash )
23 |     ## note: hash keys MUST be strings (not symbols) e.g. 'name' => 'Toby'
24 |     ## pp hash
25 |     res = @template.render( hash,  { strict_variables: true, strict_filters: true } )
26 | 
27 |     ###
28 |     ##  issue warnings/errors if present
29 |     errors = @template.errors
30 |     if errors.size > 0
31 |       puts "!! WARN - #{errors.size} liquid error(s) when rendering template:"
32 |       pp errors
33 |     end
34 | 
35 |     res
36 |   end
37 | 
38 | end # class LiquidTemplate
39 | 
40 | 
41 | #########################
42 | ## convenience helper for pages (with headers/front matter)
43 | 
44 | class LiquidPageTemplate
45 |   def self.from_file( path )
46 |     ## todo: (auto)-add headers as page.title etc. -- why? why not??
47 |     puts "  Loading page template (from file) >#{path}<..."
48 |     page = Page.from_file( path )     ## use/todo: use read utf8 - why? why not??
49 |     self.new( page.contents, path: path )   ## note: pass along path as an option
50 |   end
51 | 
52 |   def self.from_string( text )  ### use parse as alias - why?? why not??
53 |     ## todo: (auto)-add headers as page.title etc. -- why? why not??
54 |     page = Page.from_string( text )
55 |     self.new( page.contents )
56 |   end
57 | 
58 |   def initialize( text, opts={} )
59 |     @template = LiquidTemplate.new( text, opts )
60 |   end
61 | 
62 |   def render( hash )
63 |     @template.render( hash )
64 |   end
65 | 
66 | end ## class LiquidPageTemplate
67 | 
68 | 
69 | end # module Pakman
70 | 


--------------------------------------------------------------------------------
/textutils/Manifest.txt:
--------------------------------------------------------------------------------
 1 | HISTORY.md
 2 | Manifest.txt
 3 | README.md
 4 | Rakefile
 5 | lib/textutils.rb
 6 | lib/textutils/classifier.rb
 7 | lib/textutils/core_ext/array.rb
 8 | lib/textutils/core_ext/file.rb
 9 | lib/textutils/core_ext/time.rb
10 | lib/textutils/filter/code_filter.rb
11 | lib/textutils/filter/comment_filter.rb
12 | lib/textutils/filter/erb_django_filter.rb
13 | lib/textutils/filter/erb_filter.rb
14 | lib/textutils/filter/string_filter.rb
15 | lib/textutils/helper/address_helper.rb
16 | lib/textutils/helper/date_helper.rb
17 | lib/textutils/helper/hypertext_helper.rb
18 | lib/textutils/helper/tag_helper.rb
19 | lib/textutils/helper/title_helper.rb
20 | lib/textutils/helper/unicode_helper.rb
21 | lib/textutils/helper/value_helper_i.rb
22 | lib/textutils/helper/value_helper_ii.rb
23 | lib/textutils/helper/value_helper_iii_numbers.rb
24 | lib/textutils/helper/xml_helper.rb
25 | lib/textutils/page.rb
26 | lib/textutils/parser/name_parser.rb
27 | lib/textutils/parser/name_tokenizer.rb
28 | lib/textutils/patterns.rb
29 | lib/textutils/reader/block_reader.rb
30 | lib/textutils/reader/code_reader.rb
31 | lib/textutils/reader/fixture_reader.rb
32 | lib/textutils/reader/hash_reader.rb
33 | lib/textutils/reader/line_reader.rb
34 | lib/textutils/reader/tree_reader.rb
35 | lib/textutils/reader/values_reader.rb
36 | lib/textutils/sanitizier.rb
37 | lib/textutils/title.rb
38 | lib/textutils/title_mapper.rb
39 | lib/textutils/title_mapper2.rb
40 | lib/textutils/utils.rb
41 | lib/textutils/version.rb
42 | test/data/at-austria/1--n-niederoesterreich/orte.txt
43 | test/data/cl_all.txt
44 | test/data/de-deutschland/3--by-bayern/4--oberfranken/orte.txt
45 | test/data/de-deutschland/3--by-bayern/4--oberfranken/orte_ii.txt
46 | test/data/de-deutschland/orte.txt
47 | test/data/feedburner.txt
48 | test/helper.rb
49 | test/test_address_helper.rb
50 | test/test_asciify.rb
51 | test/test_block_reader.rb
52 | test/test_fixture_reader.rb
53 | test/test_hypertext_helper.rb
54 | test/test_slugify.rb
55 | test/test_taglist.rb
56 | test/test_title_finder.rb
57 | test/test_title_helper.rb
58 | test/test_title_mapper.rb
59 | test/test_title_mapper2.rb
60 | test/test_tree_reader.rb
61 | test/test_tree_reader_ii.rb
62 | test/test_unicode_helper.rb
63 | test/test_values_reader.rb
64 | 


--------------------------------------------------------------------------------
/textutils/lib/textutils.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | # core and stlibs
 4 | 
 5 | require 'json'
 6 | require 'yaml'
 7 | require 'erb'
 8 | require 'pp'
 9 | require 'fileutils'
10 | require 'time'
11 | require 'date'
12 | 
13 | 
14 | # 3rd party gems / libs
15 | require 'zip'       ### used for .from_zip for readers
16 | 
17 | ### todo/check - document active_support methods that get used
18 | require 'active_support/all'   # String.starts_with?, Object.blank?, etc.
19 | 
20 | 
21 | require 'props'
22 | require 'logutils'
23 | 
24 | 
25 | # our own code
26 | 
27 | require 'textutils/version'      ## let version always go first
28 | 
29 | require 'textutils/patterns'   # regex patterns for reuse
30 | require 'textutils/sanitizier'
31 | 
32 | require 'textutils/filter/code_filter'
33 | require 'textutils/filter/comment_filter'
34 | require 'textutils/filter/erb_django_filter'
35 | require 'textutils/filter/erb_filter'
36 | require 'textutils/filter/string_filter'
37 | 
38 | require 'textutils/helper/date_helper'
39 | require 'textutils/helper/hypertext_helper'
40 | require 'textutils/helper/xml_helper'
41 | 
42 | require 'textutils/helper/unicode_helper'
43 | require 'textutils/helper/tag_helper'
44 | require 'textutils/helper/title_helper'
45 | require 'textutils/helper/address_helper'
46 | require 'textutils/helper/value_helper_i'
47 | require 'textutils/helper/value_helper_ii'
48 | require 'textutils/helper/value_helper_iii_numbers'
49 | 
50 | require 'textutils/utils'
51 | require 'textutils/core_ext/file'
52 | require 'textutils/core_ext/time'
53 | require 'textutils/core_ext/array'
54 | 
55 | require 'textutils/parser/name_parser'
56 | require 'textutils/parser/name_tokenizer'
57 | 
58 | require 'textutils/reader/code_reader'
59 | require 'textutils/reader/hash_reader'
60 | require 'textutils/reader/line_reader'
61 | require 'textutils/reader/values_reader'
62 | require 'textutils/reader/fixture_reader'
63 | require 'textutils/reader/block_reader'
64 | require 'textutils/reader/tree_reader'
65 | 
66 | require 'textutils/classifier'
67 | require 'textutils/title'    # title table/mapper/finder utils
68 | require 'textutils/title_mapper'
69 | require 'textutils/title_mapper2'
70 | 
71 | require 'textutils/page'   # for book pages and page templates
72 | 
73 | 
74 | 
75 | # say hello
76 | puts TextUtils.banner   if defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG
77 | 


--------------------------------------------------------------------------------
/textutils/lib/textutils/parser/name_parser.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | # fix: move into TextUtils namespace/module!! ??
 4 | 
 5 | class NameParser
 6 | 
 7 |   include LogUtils::Logging
 8 | 
 9 |   def parse( chunks )
10 |     ## todo/fix: (re)use nameparser - for now "simple" inline version
11 |     ##  fix!!! - note: for now lang gets ignored
12 |     ##  fix: add hanlde
13 |     ##  Leuven[nl]|Louvain[fr] Löwen[de]
14 |     ##  Antwerpen[nl]|Anvers[fr] [Antwerp]
15 |     ##  Brussel[nl]•Bruxelles[fr]   -> official bi-lingual name
16 |     ##  etc.
17 | 
18 |     ## values - split into names (name n lang pairs)
19 |     ## note: assumes (default) lang from more_attribs unless otherwise marked e.g. [] assume en etc.
20 | 
21 |     ## split chunks into values
22 |     values = []
23 |     chunks.each do |chunk|
24 |       next if chunk.nil? || chunk.blank?  ## skip nil or empty/blank chunks
25 | 
26 |       parts = chunk.split( '|' )   # 1)  split |
27 | 
28 |       parts.each do |part|
29 |         s = StringScanner.new( part )
30 |         s.skip( /[ \t]+/)   # skip whitespaces
31 | 
32 |         while s.eos? == false
33 |           if s.check( /\[/ )
34 |             ## scan everything until the end of bracket (e.g.])
35 |             ##  fix!!! - note: for now lang gets ignored
36 |             value = s.scan( /\[[^\]]+\]/)
37 |             value = value[1...-1]   # strip enclosing [] e.g. [Bavaria] => Bavaria
38 |           else
39 |             ## scan everything until the begin of bracket (e.g.[)
40 |             value = s.scan( /[^\[]+/)
41 |             value = value.strip
42 |           end
43 |           values << value
44 | 
45 |           s.skip( /[ \t]+/)  # skip whitespaces
46 |           logger.debug( "[NameParser] eos?: #{s.eos?}, rest: >#{s.rest}<" )
47 |         end
48 |       end
49 |     end
50 | 
51 |     logger.debug( "[NameParser] values=#{values.inspect}")
52 | 
53 |     names = []
54 |     values.each do |value|
55 |       name = value
56 |       ## todo: split by bullet ? (official multilang name) e.g. Brussel • Bruxelles
57 |       ## todo: process variants w/ () e.g. Krems (a. d. Donau) etc. ??
58 |       names << name
59 |     end
60 | 
61 |     logger.debug( "[NameParser] names=#{names.inspect}")
62 | 
63 |     names
64 |   end # method parse
65 | end # class NameParser
66 | 
67 | 


--------------------------------------------------------------------------------
/textutils/lib/textutils/helper/value_helper_iii_numbers.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | # match numbers (units)
 5 | #  e.g  km_squared, abv, etc.
 6 | 
 7 | module TextUtils
 8 |   module ValueHelper
 9 | 
10 | 
11 |   def match_number( value )
12 |     ## numeric
13 |     ##   note: can use any _ or spaces inside digits e.g. 1_000_000 or 1 000 000)
14 |     if value =~ /^([0-9][0-9 _]+[0-9])|([0-9]{1,2})$/
15 |       num = value.gsub(/[ _]/, '').to_i
16 |       yield( num )
17 |       true # bingo - match found
18 |     else
19 |       false # no match found
20 |     end
21 |   end
22 | 
23 | 
24 |   ###########################
25 |   ## numbers w/ units
26 | 
27 |   def match_km_squared( value )
28 |     ## allow numbers like 453 km² or 45_000 km2
29 |     if value =~ /^([0-9][0-9 _]+[0-9]|[0-9]{1,2})(?:\s*(?:km2|km²)\s*)$/
30 |       num = value.gsub( 'km2', '').gsub( 'km²', '' ).gsub(/[ _]/, '').to_i
31 |       yield( num )
32 |       true # bingo - match found
33 |     else
34 |       false # no match found
35 |     end
36 |   end
37 | 
38 |   def match_abv( value )  # alcohol by volume (abv) e.g. 5.2% 
39 |     if value =~ /^<?\s*(\d+(?:\.\d+)?)\s*%$/
40 |       # nb: allow leading < e.g. <0.5%
41 |       yield( $1.to_f )  # convert to decimal? how? use float?
42 |       true # bingo - match found
43 |     else
44 |       false # no match found
45 |     end
46 |   end
47 | 
48 |   def match_og( value ) # plato (stammwuerze/gravity?) e.g. 11.2°
49 |     if value =~ /^(\d+(?:\.\d+)?)°$/
50 |       # nb: no whitespace allowed between ° and number e.g. 11.2°
51 |       yield( $1.to_f )  # convert to decimal? how? use float?
52 |       true # bingo - match found
53 |     else
54 |       false # no match found
55 |     end
56 |   end
57 | 
58 |   def match_kcal( value )
59 |     if value =~ /^(\d+(?:\.\d+)?)\s*kcal(?:\/100ml)?$/  # kcal
60 |       # nb: allow 44.4 kcal/100ml or 44.4 kcal or 44.4kcal
61 |       yield( $1.to_f )  # convert to decimal? how? use float?
62 |       true # bingo - match found
63 |     else
64 |       false # no match found
65 |     end
66 |   end
67 | 
68 |   def match_hl( value )  # hector liters (hl) 1hl = 100l
69 |     if value =~ /^(?:([0-9][0-9_ ]+[0-9]|[0-9]{1,2})\s*hl)$/  # e.g. 20_000 hl or 50hl etc.
70 |       yield( $1.gsub( /[ _]/, '' ).to_i )
71 |       true # bingo - match found
72 |     else
73 |       false # no match found
74 |     end
75 |   end
76 | 
77 |   end # module ValueHelper
78 | end # module TextUtils
79 | 


--------------------------------------------------------------------------------
/pakman/lib/pakman/manifest.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module Pakman
 4 | 
 5 | class Manifest
 6 | 
 7 |   include LogUtils::Logging
 8 | 
 9 | 
10 |   def initialize()
11 |     @manifest = []
12 |   end
13 | 
14 |   def self.load_file_core( old_logger_do_not_use, path )
15 |     puts "*** deprecated API call [Pakman::Manifest.load_file_core] - do NOT pass in logger; no longer required/needed; logger arg will get removed"
16 | 
17 |     obj = self.new
18 |     obj.load_file_core_worker( path )
19 |     obj
20 |   end
21 | 
22 |   def self.load_file( old_logger_do_not_use, path )
23 |     puts "*** deprecated API call [Pakman::Manifest.load_file] - do NOT pass in logger; no longer required/needed; logger arg will get removed"
24 | 
25 |     obj = self.new
26 |     obj.load_file_worker( path )
27 |     obj
28 |   end
29 | 
30 | 
31 |   def self.load_file_core_v2( path )
32 |     obj = self.new
33 |     obj.load_file_core_worker( path )
34 |     obj
35 |   end
36 | 
37 |   def self.load_file_v2( path )
38 |     obj = self.new
39 |     obj.load_file_worker( path )
40 |     obj
41 |   end
42 | 
43 | 
44 | 
45 | 
46 |   def each
47 |     @manifest.each { |ary| yield ary }
48 |   end
49 | 
50 | 
51 | 
52 |   def load_file_core_worker( path )
53 |     @manifest = []
54 | 
55 |     File.open( path, 'r:bom|utf-8' ).readlines.each_with_index do |line,i|
56 |       case line
57 |       when /^\s*$/
58 |         # skip empty lines
59 |       when /^\s*#.*$/
60 |         # skip comment lines
61 |       else
62 |         logger.debug "line #{i+1}: #{line.strip}"
63 |         values = line.strip.split( /[ <,+]+/ )
64 | 
65 |         # add source for shortcuts (assumes relative path; if not issue warning/error)
66 |         values << values[0] if values.size == 1
67 | 
68 |         @manifest << values
69 |       end
70 |     end
71 |   end
72 | 
73 |   def load_file_worker( path )
74 |     filename = path
75 | 
76 |     logger.info "  Loading template manifest #{filename}..."
77 |     load_file_core_worker( filename )
78 | 
79 |     # post-processing
80 |     # normalize all source paths (1..-1) /make full path/add template dir
81 | 
82 |     templatesdir = File.dirname( path )
83 |     logger.debug "templatesdir=#{templatesdir}"
84 | 
85 |     @manifest.each do |values|
86 |       (1..values.size-1).each do |i|
87 |         values[i] = "#{templatesdir}/#{values[i]}"
88 |         logger.debug "  path[#{i}]=>#{values[i]}<"
89 |       end
90 |     end
91 |   end
92 | 
93 | 
94 | end  # class Manifest
95 | end  # module Pakman
96 | 


--------------------------------------------------------------------------------
/pakman/lib/pakman/cli/runner.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module Pakman
 4 | 
 5 | class Runner
 6 | 
 7 |   include LogUtils::Logging
 8 | 
 9 |   def initialize
10 |     @opts = Opts.new
11 |   end
12 | 
13 |   attr_reader :opts
14 | 
15 |   def run( args )
16 |     opt=OptionParser.new do |cmd|
17 | 
18 |       cmd.banner = "Usage: pakman [options]"
19 | 
20 |       cmd.on( '-f', '--fetch URI', 'Fetch Templates' ) do |uri|
21 |         opts.fetch_uri = uri
22 |       end
23 | 
24 |       cmd.on( '-t', '--template MANIFEST',  'Generate Templates' ) do |manifest|
25 |         opts.generate = true
26 |         opts.manifest = manifest
27 |       end
28 | 
29 |       cmd.on( '-l', '--list', "List Installed Templates" ) { opts.list = true }
30 | 
31 |       cmd.on( '-c', '--config PATH', "Configuration Path (default is #{opts.config_path})" ) do |path|
32 |         opts.config_path = path
33 |       end
34 | 
35 |       cmd.on( '-o', '--output PATH', "Output Path (default is #{opts.output_path})" ) { |path| opts.output_path = path }
36 | 
37 |       cmd.on( '-v', '--version', "Show version" ) do
38 |         puts Pakman.banner
39 |         exit
40 |       end
41 | 
42 |       cmd.on( "--verbose", "Show debug trace" )  do
43 |         ## logger.datetime_format = "%H:%H:%S"
44 |         ## logger.level = Logger::DEBUG
45 |         # fix: use logutils - set to debug
46 |       end
47 | 
48 |       cmd.on_tail( "-h", "--help", "Show this message" ) do
49 |         puts <<EOS
50 | 
51 | pakman - Lets you manage template packs.
52 | 
53 | #{cmd.help}
54 | 
55 | Examples:
56 |     pakman -f URI                             # to be done
57 |     pakman -f URI  -c ~/.slideshow/templates
58 | 
59 |     pakman -l                                 # to be done
60 |     pakman -l -c ~/.slideshow/templates
61 | 
62 |     pakman -t s6
63 |     pakman -t s6 ruby19.yml
64 |     pakman -t s6 ruby19.yml tagging.yml
65 |     pakman -t s6 -o o
66 |     pakman -t s6 -c ~/.slideshow/templates
67 | 
68 | Further information:
69 |   http://geraldb.github.com/pakman
70 | 
71 | EOS
72 |         exit
73 |       end
74 |     end
75 | 
76 |     opt.parse!( args )
77 | 
78 |     puts Pakman.banner
79 | 
80 |     if opts.list?
81 |       List.new( opts ).run
82 |     elsif opts.generate?
83 |       Gen.new( opts ).run( args )
84 |     elsif opts.fetch?
85 |       Fetch.new( opts ).run
86 |     else
87 |       puts "-- No command do nothing for now.  --"  ## run help??
88 |       puts "Done."
89 |     end
90 |   end   # method run
91 | 
92 | end # class Runner
93 | end # module Pakman
94 | 


--------------------------------------------------------------------------------
/pakman/lib/pakman/fetcher.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module Pakman
 4 | 
 5 | class Fetcher
 6 | 
 7 |   include LogUtils::Logging
 8 | 
 9 | 
10 |   def fetch_pak( manifestsrc, pakpath )
11 | 
12 |     start = Time.now
13 | 
14 |     uri = URI.parse( manifestsrc )
15 | 
16 |     logger.debug "scheme: #{uri.scheme}, host: #{uri.host}, port: #{uri.port}, path: #{uri.path}"
17 | 
18 |     dirname  = File.dirname( uri.path )
19 |     filename = File.basename( uri.path )       # e.g. fullerscreen.txt (with extension)
20 | 
21 |     pakname = Pakman.pakname_from_file( uri.path )
22 | 
23 |     logger.debug "dirname >#{dirname}<"
24 |     logger.debug "filename >#{filename}<"
25 |     logger.debug "pakname >#{pakname}<"
26 | 
27 |     dlbase = "#{uri.scheme}://#{uri.host}:#{uri.port}#{dirname}"
28 |     logger.debug "dlbase: #{dlbase}"
29 |     logger.debug "pakpath: #{pakpath}"
30 | 
31 |     FileUtils.makedirs( pakpath ) unless File.directory?( pakpath )
32 | 
33 |     logger.info "Fetching template pack '#{pakname}'"
34 |     logger.info "    from '#{dlbase}'"
35 |     logger.info "    saving to '#{pakpath}'"
36 | 
37 |     # step 1: download manifest
38 |     manifestdest = "#{pakpath}/#{filename}"
39 | 
40 |     logger.info "  Downloading manifest '#{filename}'..."
41 | 
42 |     fetch_file( manifestsrc, manifestdest )
43 | 
44 |     ## todo: change back to load_file_core after deprecated api got removed
45 |     manifest = Manifest.load_file_core_v2( manifestdest )
46 | 
47 |     # step 2: download files & templates listed in manifest
48 |     manifest.each do |entry|
49 |       source = entry[1]
50 | 
51 |       # get full (absolute) path and make sure path exists
52 |       destfull = File.expand_path( source, pakpath )  # NB: turning source into dest
53 |       destpath = File.dirname( destfull )
54 |       FileUtils.makedirs( destpath ) unless File.directory?( destpath )
55 | 
56 |       logger.debug "destfull=>#{destfull}<"
57 |       logger.debug "destpath=>#{destpath}<"
58 | 
59 |       sourcefull = "#{dlbase}/#{source}"
60 | 
61 |       if source =~ /\.erb\.|.erb$/
62 |         logger.info "  Downloading template '#{source}'..."
63 |       else
64 |         logger.info "  Downloading file '#{source}'..."
65 |       end
66 | 
67 |       fetch_file( sourcefull, destfull )
68 |     end
69 |     logger.info "Done (in #{Time.now-start} s)."
70 |   end # method fetch_pak
71 | 
72 | private
73 | 
74 |   def fetch_file( src, dest )
75 |      ## note: code moved to its own gem, that is, fetcher
76 |      ## see https://github.com/geraldb/fetcher
77 | 
78 |     ::Fetcher::Worker.new.copy( src, dest )
79 |   end
80 | 
81 | end # class Fetcher
82 | end # module Pakman
83 | 


--------------------------------------------------------------------------------
/textutils/lib/textutils/helper/value_helper_ii.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | module TextUtils
 5 |   module ValueHelper
 6 | 
 7 |   #####
 8 |   ## fix!!!!: move to beerdb ??? why? why not?? - yes, move to beerdb-models
 9 | 
10 |   def match_brewery( value )
11 |     if value =~ /^by:/   ## by:  -brewed by/brewery
12 |       brewery_key = value[3..-1]  ## cut off by: prefix
13 |       brewery = BeerDb::Model::Brewery.find_by_key!( brewery_key )
14 |       yield( brewery )
15 |       true # bingo - match found
16 |     else
17 |       false # no match found
18 |     end
19 |   end
20 | 
21 | 
22 |   def is_year?( value )
23 |     # founded/established year e.g. 1776
24 |     match_result =  value =~ /^[0-9]{4}$/
25 |     # match found if 0,1,2,3 etc or no match if nil
26 |     # note: return bool e.g. false|true  (not 0,1,2,3 etc. and nil)
27 |     match_result != nil
28 |   end
29 | 
30 | 
31 |   def match_year( value )
32 |     if is_year?( value )  # founded/established year e.g. 1776
33 |       yield( value.to_i )
34 |       true # bingo - match found
35 |     else
36 |       false # no match found
37 |     end
38 |   end
39 | 
40 | 
41 |   def is_address?( value )
42 |     # if value includes // assume address e.g. 3970 Weitra // Sparkasseplatz 160
43 |     match_result =  value =~ /\/{2}/
44 |     # match found if 0,1,2,3 etc or no match if nil
45 |     # note: return bool e.g. false|true  (not 0,1,2,3 etc. and nil)
46 |     match_result != nil
47 |   end
48 | 
49 |   def is_taglist?( value )
50 |     ### note: cannot start w/ number must be letter for now
51 |     ##  -- in the future allow free standing years (e.g. 1980 etc.?? why? why not?)
52 |     ##  e.g. not allowed  14 ha or 5_000 hl etc.
53 |     match_result =  value =~ /^([a-z][a-z0-9\|_ ]*[a-z0-9]|[a-z])$/
54 |     # match found if 0,1,2,3 etc or no match if nil
55 |     # note: return bool e.g. false|true  (not 0,1,2,3 etc. and nil)
56 |     match_result != nil
57 |   end
58 | 
59 | 
60 |   def is_website?( value )
61 |     # check for url/internet address e.g. www.ottakringer.at
62 |     #  - must start w/  www. or
63 |     #  - must end w/   .com
64 |     #
65 |     # fix: support more url format (e.g. w/o www. - look for .com .country code etc.)
66 |     match_result =  value =~ /^www\.|\.com$/
67 |     # match found if 0,1,2,3 etc or no match if nil
68 |     # note: return bool e.g. false|true  (not 0,1,2,3 etc. and nil)
69 |     match_result != nil
70 |   end
71 | 
72 |   def match_website( value )
73 |     if is_website?( value )   # check for url/internet address e.g. www.ottakringer.at
74 |       # fix: support more url format (e.g. w/o www. - look for .com .country code etc.)
75 |       yield( value )
76 |       true # bingo - match found
77 |     else
78 |       false # no match found
79 |     end
80 |   end
81 | 
82 |   end # module ValueHelper
83 | end # module TextUtils
84 | 


--------------------------------------------------------------------------------
/pakman/README.md:
--------------------------------------------------------------------------------
 1 | # `pakman` - Template Pack Manager in Ruby (incl. Embedded Ruby, Liquid, etc.)
 2 | 
 3 | 
 4 | * home  :: [github.com/rubylibs/pakman](https://github.com/rubylibs/pakman)
 5 | * bugs  :: [github.com/rubylibs/pakman/issues](https://github.com/rubylibs/pakman/issues)
 6 | * gem   :: [rubygems.org/gems/pakman](https://rubygems.org/gems/pakman)
 7 | * rdoc  :: [rubydoc.info/gems/pakman](http://rubydoc.info/gems/pakman)
 8 | * forum :: [groups.google.com/group/wwwmake](http://groups.google.com/group/wwwmake)
 9 | 
10 | ## Usage - Ruby Code
11 | 
12 | Fetch a template pack:
13 | 
14 | ```ruby
15 | Pakman::Fetcher.new.fetch_pak( src, pakpath )
16 | ```
17 | 
18 | Copy a template pack from your cache:
19 | 
20 | ```ruby
21 | Pakman::Copier.new.copy_pak( src, pakpath )
22 | ```
23 | 
24 | Merge a template pack from your cache:
25 | 
26 | ```ruby
27 | Pakman::Templater.new.merge_pak( src, pakpath, binding, name )
28 | ```
29 | 
30 | 
31 | List all template packs in your cache (using passed in search path):
32 | 
33 | ```ruby
34 | patterns  = [
35 |   "#{File.expand_path('~/.pak')}/*.txt",
36 |   "#{File.expand_path('~/.pak')}/*/*.txt",
37 |   "*.txt",
38 |   "*/*.txt"
39 | ]
40 | 
41 | Pakman::Finder.new.find_manifests( patterns )
42 | ```
43 | 
44 | 
45 | ## Usage - Command Line
46 | 
47 | The `pakman` gem includes a little command line tool. Try `pakman -h` for details:
48 | 
49 | ```
50 | pakman - Lets you manage template packs.
51 | 
52 | Usage: pakman [options]
53 |     -f, --fetch URI                  Fetch Templates
54 |     -t, --template MANIFEST          Generate Templates
55 |     -l, --list                       List Installed Templates
56 |     -c, --config PATH                Configuration Path (default is ~/.pak)
57 |     -o, --output PATH                Output Path (default is .)
58 |     -v, --version                    Show version
59 |         --verbose                    Show debug trace
60 |     -h, --help                       Show this message
61 | ```
62 | 
63 | ## Install
64 | 
65 | Just install the gem:
66 | 
67 |     $ gem install pakman
68 | 
69 | 
70 | ## Real World Usage
71 | 
72 | The [`slideshow`](http://slideshow-s9.github.io) (also known as Slide Show (S9)) gem
73 | that lets you create slide shows
74 | and author slides in plain text using a wiki-style markup language that's easy-to-write and easy-to-read.
75 | 
76 | ## Real World Template Packs
77 | 
78 | * [S6 Template Pack](https://github.com/slideshow-templates/slideshow-s6-blank)
79 | * [impress.js Template Pack](https://github.com/slideshow-templates/slideshow-impress.js)
80 | * [deck.js Template Pack](https://github.com/slideshow-templates/slideshow-deck.js)
81 | 
82 | 
83 | ## License
84 | 
85 | The `pakman` scripts are dedicated to the public domain.
86 | Use it as you please with no restrictions whatsoever.
87 | 
88 | ## Questions? Comments?
89 | 
90 | Send them along to the [wwwmake forum/mailing list](http://groups.google.com/group/wwwmake).
91 | Thanks!
92 | 


--------------------------------------------------------------------------------
/NOTES.md:
--------------------------------------------------------------------------------
  1 | # Notes
  2 | 
  3 | ## Alternatives
  4 | 
  5 | ### ascii-ify /asciify  (transliterate from unicode (UTF-8) to ASCII (7-bit))
  6 | 
  7 | - [stringex gem][github.com/rsl/stringex] - string extensions includes Unidecoder (aka asciify, that is from unicode to ascii); also includes acts_as_url (aka slugifier)
  8 | 
  9 | ~~~
 10 | def decode(string)
 11 |   string.chars.map{|char| decoded(char)}.join
 12 | end
 13 | 
 14 | def decoded(character)
 15 |   localized(character) || from_yaml(character)
 16 | end
 17 | 
 18 | def localized(character)
 19 |   Localization.translate(:transliterations, character)
 20 | end
 21 | 
 22 | # Contains Unicode codepoints, loading as needed from YAML files
 23 | CODEPOINTS = Hash.new{|h, k|
 24 |   h[k] = ::YAML.load_file(File.join(File.expand_path(File.dirname(__FILE__)), "unidecoder_data", "#{k}.yml"))
 25 | } unless defined?(CODEPOINTS)
 26 |     
 27 | def from_yaml(character)
 28 |   return character unless character.ord > 128
 29 |   unpacked = character.unpack("U")[0]
 30 |   CODEPOINTS[code_group(unpacked)][grouped_point(unpacked)]
 31 | rescue
 32 |   # Hopefully this won't come up much
 33 |   # TODO: Make this note something to the user that is reportable to me perhaps
 34 |   "?"
 35 | end
 36 | 
 37 | # Returns the Unicode codepoint grouping for the given character
 38 | def code_group(unpacked_character)
 39 |   "x%02x" % (unpacked_character >> 8)
 40 | end
 41 | 
 42 | # Returns the index of the given character in the YAML file for its codepoint group
 43 | def grouped_point(unpacked_character)
 44 |   unpacked_character & 255
 45 | end
 46 | ~~~
 47 | 
 48 | [(Source - unidecorder.rb)](https://github.com/rsl/stringex/blob/master/lib/stringex/unidecoder.rb)
 49 | 
 50 | 
 51 | - [asciify gem](https://github.com/levinalex/asciify) - uses iconv
 52 | 
 53 | ~~~
 54 | def convert(str)
 55 |   u16s = @from_input_enc.iconv(str)
 56 |   
 57 |   s = u16s.unpack(PackFormat).collect { |codepoint|
 58 |        codepoint < 128 ? codepoint : @mapping[codepoint]
 59 |   }.flatten.compact.pack(PackFormat)
 60 |   
 61 |   return @to_output_enc.iconv(s)
 62 | end
 63 | ~~~
 64 | 
 65 | 
 66 | ### slugify
 67 | 
 68 | - [slugify gem](https://github.com/Slicertje/Slugify)
 69 | 
 70 | ~~~
 71 | SLUGGY_MAPPING = {
 72 |         'Ȃ' => 'a',
 73 |         'ȃ' => 'a',
 74 |         'Ȅ' => 'e',
 75 |         'ȅ' => 'e',
 76 |         ...
 77 | }
 78 | 
 79 | def convert( str )
 80 |   result = ''
 81 | 
 82 |   str.each_char do |kar|
 83 |     if SLUGGY_MAPPING.include?(kar)
 84 |       result << SLUGGY_MAPPING[kar]
 85 |     end
 86 |   end
 87 | ~~~
 88 | 
 89 | 
 90 | - [string_helpers gem](https://github.com/RaphaelIvan/string_helpers)  -- super simple (e.g. just replaces spaces with dashes!, that's it)
 91 | 
 92 | ~~~
 93 | "Jhon Doe".slug! #=> "Jhon-Doe"
 94 | 
 95 | def slug!
 96 |   self.gsub( " ", "-" )
 97 | end
 98 | ~~~
 99 | 
100 | #### PHP
101 | 
102 | - [Slugify.php](https://github.com/cocur/slugify)
103 | 
104 | 


--------------------------------------------------------------------------------
/textutils/test/test_title_helper.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | require 'helper'
 4 | 
 5 | 
 6 | class TestTitleHelper < Minitest::Test
 7 | 
 8 |   def test_title_to_key
 9 |     
10 |     puts '[debug] enter test_title_to_key()'
11 | 
12 |     txt_io = [
13 |       [ 'São Paulo',   'saopaulo' ],
14 |       [ 'São Gonçalo', 'saogoncalo' ],
15 |       [ 'Výčepní',     'vycepni' ],
16 |       [ 'Bock ‹Damm›', 'bockdamm' ],
17 |       [ '‹Estrella› ‹Damm› Inedit', 'estrelladamminedit' ],
18 |       [ '‹Hirter› Pils', 'hirterpils' ],
19 |       [ '‹Villacher› Märzen', 'villachermaerzen' ],
20 |       [ 'Bock <Damm>', 'bockdamm' ],
21 |       [ '<Estrella> <Damm> Inedit', 'estrelladamminedit' ],
22 |       [ 'Żubr', 'zubr' ],
23 |       [ 'Żywiec', 'zywiec' ],
24 |       [ 'Lomża Export', 'lomzaexport' ],
25 |       [ 'Nogne Ø Imperial Stout', 'nogneoimperialstout' ],
26 |       [ 'Xyauyù', 'xyauyu' ],
27 |       [ 'Águila', 'aguila' ],
28 |       [ '+Lupulus', 'lupulus' ],
29 |       [ '+Malta', 'malta' ],
30 |       [ 'Minerva 8:60', 'minerva860' ],
31 |       [ 'Hop Crisis!', 'hopcrisis' ],
32 |       [ '$Alianz$ Arena', 'alianzarena' ],
33 |       [ 'Arena Amazônia', 'arenaamazonia' ],
34 |       [ 'Tōkyō [Tokyo]', 'tokyo' ],
35 |       [ 'Ōsaka [Osaka]', 'osaka' ],
36 |       [ 'El Djazaïr [Algiers]', 'eldjazair' ],
37 |       [ 'Al-Kharṭūm [Khartoum]', 'alkhartum' ],
38 |       [ 'Ṭarābulus [Tripoli]', 'tarabulus' ],
39 |       [ 'Al-Iskandarīyah [Alexandria]', 'aliskandariyah' ],
40 |       [ 'Pishōr', 'pishor' ],
41 |       [ 'Pishāwar', 'pishawar' ],
42 |       [ 'Islām ābād', 'islamabad' ],
43 |       [ 'Thành Phố Hồ Chí Minh [Saigon]', 'thanhphohochiminh' ],
44 |       [ 'Hà Nội [Hanoi]', 'hanoi' ],
45 |       [ 'Donets’k', 'donetsk' ],
46 |       [ 'Baghdād [Baghdad]', 'baghdad'],
47 |       [ 'Al-Mawṣil [Mosul]', 'almawsil'],
48 |       [ 'Al-Baṣrah [Basra]', 'albasrah'],
49 |       [ 'Arbīl [Erbil]', 'arbil' ],
50 |       [ 'Kirkūk [Kirkuk]', 'kirkuk' ],
51 |       [ 'Tehrān [Tehran]', 'tehran' ],
52 |       [ 'Eṣfahān [Isfahan]', 'esfahan' ],
53 |       [ 'Shīrāz [Shiraz]', 'shiraz' ],
54 |       [ 'Tabrīz [Tabriz]', 'tabriz' ],
55 |       [ 'Ahvāz [Ahvaz]', 'ahvaz' ],
56 |       [ 'Ad-Dawḥah [Doha]', 'addawhah'],
57 |       [ 'Ḥalab [Aleppo]', 'halab'],
58 |       [ 'Al-Madīnah [Medina]', 'almadinah'],
59 |       [ 'Ad-Dammām [Dammam]', 'addammam' ],
60 |       [ 'Aṭ-Ṭā’if', 'attaif'], 
61 |       [ 'Ḫamīs Mušayṭ', 'hamismusayt'],
62 |       [ "Ṣan'ā' [Sana'a]", 'sana'],
63 |       [ "P'yŏngyang [Pyongyang]", 'pyongyang' ],
64 |       [ 'Kāṭhmāḍaũ [Kathmandu]', 'kathmadau' ],
65 |       [ "Pe\u{030C}awar", 'pexawar'],  ## note: use unicode literal; Pex̌awar  -- see en.wikipedia.org/wiki/Peshawar
66 |       [ '1850 München', '1850muenchen'],
67 |     ]
68 | 
69 |     txt_io.each do |txt|
70 |       assert_equal txt[1], TextUtils.title_to_key( txt[0] )
71 |     end
72 | 
73 |     puts '[debug] leave test_title_to_key()'
74 | 
75 |   end # method test_title_to_key
76 | 
77 | 
78 | end # class TestTitleHelper


--------------------------------------------------------------------------------
/pakman/lib/pakman/liquid/templater.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module Pakman
 4 | 
 5 | 
 6 | class LiquidTemplater
 7 | 
 8 |   include LogUtils::Logging
 9 | 
10 | 
11 |   ###
12 |   ## check these file extensions for processing
13 |   ##   processing (w/ liquid) if front matter header found / present
14 | 
15 |   EXTS = ['html', 'svg',
16 |           'js',   'json',
17 |           'css',
18 |           'txt',  'text',
19 |           'md',   'markdown']
20 | 
21 |   ## convert html to \.html$ (e.g. match end-of-string and leading dot)
22 |   ##  e.g.  /\.(html|svg|...)$|/ etc.
23 |   REGEX_EXT = Regexp.new( "\\.(#{EXTS.join('|')})$", Regexp::IGNORECASE )
24 | 
25 | 
26 |   ## rename binding/hash to assigns or something more specific - why? why not?
27 |   def merge_pak( manifestsrc, pakpath, binding, name )
28 | 
29 |     start = Time.now
30 | 
31 |     pakname = Pakman.pakname_from_file( manifestsrc )
32 | 
33 |     puts "Merging template pack '#{pakname}'"
34 | 
35 |     # todo: rename to load_file once depreated API got removed
36 |     manifest = Manifest.load_file_v2( manifestsrc )
37 | 
38 |     manifest.each do |entry|
39 |       dest   = entry[0]
40 |       source = entry[1]
41 | 
42 |       if dest =~ /__file__/   # replace w/ name
43 |         dest = dest.gsub( '__file__', name )
44 |       end
45 | 
46 |       # get full (absolute) path and make sure path exists
47 |       destfull = File.expand_path( dest, pakpath )
48 |       destpath = File.dirname( destfull )
49 |       FileUtils.makedirs( destpath ) unless File.directory?( destpath )
50 | 
51 |       logger.debug "destfull=>#{destfull}<"
52 |       logger.debug "destpath=>#{destpath}<"
53 | 
54 |       ###
55 |       # note:
56 |       #  use jekyll convention for now
57 |       #   check if file starts with front matter (yaml block)
58 |       #   if yes, process (with liquid) otherwise copy as is 1:1
59 | 
60 |       ####
61 |       # note: for now only check files with known source extensions!!!
62 |       #   do NOT check binaries (e.g. gif, png, ico, etc. -- regex will fail w/ encoding error)
63 |       #    todo/check:  check how jekyll works e.g. does jekyll check binaries for front-matter etc. ????
64 | 
65 |       is_source_page = REGEX_EXT.match( source )   # note: returns nil or MatchData - do NOT use check == false or true (will NOT work)
66 | 
67 |       if is_source_page.nil?
68 |         puts "    No (pre-)processing for '#{source}' (copy 1:1) - no matching (known) source extension e.g. #{EXTS.join('|')}"
69 |         source_page = nil
70 |       else
71 |         source_page = Page.from_file( source )
72 |       end
73 | 
74 |       if source_page && source_page.headers?
75 |         puts "  Bingo! Front matter (e.g. ---) found. Merging template to #{dest}..."
76 | 
77 |         out = File.new( destfull, 'w+:utf-8' )     ## note: use utf8
78 |         ## note: only pass along contents (not headers e.g. front matter for now)
79 |         ##  (auto-)add front matter headers as page.[xxx] - why? why not??
80 |         out << LiquidTemplate.from_string( source_page.contents ).render( binding )
81 |         out.flush
82 |         out.close
83 |       else
84 |         puts "  Copying to #{dest} from #{source}..."
85 | 
86 |         FileUtils.copy( source, destfull )
87 |       end
88 |     end # each entry in manifest
89 | 
90 |     puts "Done (in #{Time.now-start} s)."
91 |   end # method merge_pak
92 | 
93 | end # class LiquidTemplater
94 | end # module Pakman
95 | 


--------------------------------------------------------------------------------
/textutils/README.md:
--------------------------------------------------------------------------------
  1 | # `textutils`
  2 | 
  3 | Text Filters, Helpers, Readers and More in Ruby
  4 | 
  5 | * home  :: [github.com/textkit/textutils](https://github.com/textkit/textutils)
  6 | * bugs  :: [github.com/textkit/textutils/issues](https://github.com/textkit/textutils/issues)
  7 | * gem   :: [rubygems.org/gems/textutils](https://rubygems.org/gems/textutils)
  8 | * rdoc  :: [rubydoc.info/gems/textutils](http://rubydoc.info/gems/textutils)
  9 | * forum :: [ruby-talk@ruby-lang.org](www.ruby-lang.org/en/community/mailing-lists/)
 10 | 
 11 | 
 12 | 
 13 | ## Filters
 14 | 
 15 | ### `comments_percent_style` Filter
 16 | 
 17 | Strip comment lines starting with percent (that is, %). Example:
 18 | 
 19 |     %%%%%%%%%%%%%%%%
 20 |     % Some Headers
 21 |     
 22 |     Title: Web Services REST-Style: Universal Identifiers, Formats & Protocols
 23 |     
 24 |     %%%%%%%%%%%%%%%%%%%
 25 |     % Some Extra CSS
 26 |     
 27 |     table { width: 100%; }
 28 |     table#restspeak th:nth-child(1) { width: 20%; }
 29 |     table#restspeak th:nth-child(2) { width: 5%; }
 30 | 
 31 | Becomes
 32 | 
 33 |     Title: Web Services REST-Style: Universal Identifiers, Formats & Protocols
 34 |     
 35 |     table { width: 100%; }
 36 |     table#restspeak th:nth-child(1) { width: 20%; }
 37 |     table#restspeak th:nth-child(2) { width: 5%; }
 38 | 
 39 | Also supports multiline comments with `%begin`|`comment`|`comments`/`%end` pairs. Example:
 40 | 
 41 |     %begin
 42 |     Using modern browser such as Firefox, Chrome and Safari you can
 43 |     now theme your slide shows using using "loss-free" vector graphics
 44 |     in plain old CSS. Thanks to gradient support in backgrounds in CSS3.
 45 |     %end
 46 | 
 47 | or
 48 | 
 49 |     %comment
 50 |     Using modern browser such as Firefox, Chrome and Safari you can
 51 |     now theme your slide shows using using "loss-free" vector graphics
 52 |     in plain old CSS. Thanks to gradient support in backgrounds in CSS3.
 53 |     %end
 54 | 
 55 | Note: As a shortcut using a single `%end` directive (that is, without a leading `%begin`)
 56 | will skip everything until the end of the document.
 57 | 
 58 | 
 59 | ### `skip_end_directive` Filter
 60 | 
 61 | Skip (comment out) text blocks in your document by
 62 | enclosing with `__SKIP__`/`__END__`. Example:
 63 | 
 64 |     __SKIP__
 65 |     Using modern browser such as Firefox, Chrome and Safari you can
 66 |     now theme your slide shows using using "loss-free" vector graphics
 67 |     in plain old CSS. Thanks to gradient support in backgrounds in CSS3.
 68 |     __END__
 69 | 
 70 | Note: As a shortcut using just `__END__` (without `__SKIP__`)
 71 | will skip everything from `__END__` until the end of the document.
 72 | 
 73 | 
 74 | TBD
 75 | 
 76 | ## Helpers
 77 | 
 78 | TBD
 79 | 
 80 | 
 81 | ## Install
 82 | 
 83 | Just install the gem:
 84 | 
 85 |     $ gem install textutils
 86 | 
 87 | 
 88 | ## Real World Usage
 89 | 
 90 | The [`slideshow`](http://slideshow-s9.github.io) gem (also known as Slide Show (S9))
 91 | that lets you create slide shows
 92 | and author slides in plain text using a wiki-style markup language that's easy-to-write and easy-to-read.
 93 | 
 94 | The [`markdown`](https://github.com/writekit/markdown) gem that lets you use your markdown library
 95 | of choice.
 96 | 
 97 | The [`worlddb`](https://github.com/worlddb/world.db.ruby) gem that offers a command line tool for the open world database (`world.db`).
 98 | 
 99 | The [`sportdb`](https://github.com/sportdb/sport.db.ruby) gem that offers a command line tool for the open sport/football database (`sport.db`/`football.db`).
100 | 
101 | 
102 | ## License
103 | 
104 | The `textutils` scripts are dedicated to the public domain.
105 | Use it as you please with no restrictions whatsoever.
106 | 


--------------------------------------------------------------------------------
/textutils/lib/textutils/reader/line_reader.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | # fix: move into TextUtils namespace/module!!
  4 | 
  5 | 
  6 | class StringLineReader
  7 |   ## fix/todo:
  8 |   ##   remove - deprecated/obsolete - do NOT use
  9 |   ##   use LineReader.from_string
 10 | 
 11 |   include LogUtils::Logging
 12 | 
 13 |   def initialize( text )
 14 |     logger.info "StringLineReader.new - deprecated API - use LineReader.from_string() instead"
 15 |     @reader = LineReader.from_string( text )
 16 |   end
 17 | 
 18 |   def each_line
 19 |     @reader.each_line do |line|    
 20 |       yield( line )
 21 |     end # each lines
 22 |   end # method each_line
 23 | end
 24 | 
 25 | 
 26 | 
 27 | class LineReader
 28 | 
 29 |   include LogUtils::Logging
 30 | 
 31 |   def self.from_zip( zip_file, entry_path )
 32 |     entry = zip_file.find_entry( entry_path )
 33 | 
 34 |     ## todo/fix: add force encoding to utf-8 ??
 35 |     ##  check!!!
 36 |     ##  clean/prepprocess lines
 37 |     ##  e.g. CR/LF (/r/n) to LF (e.g. /n)
 38 |     text = entry.get_input_stream().read()
 39 | 
 40 |     ## NOTE: needs logger ref; only available in instance methods; use global logger for now
 41 |     logger = LogUtils::Logger.root
 42 |     logger.debug "text.encoding.name (before): #{text.encoding.name}"
 43 | #####
 44 | # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
 45 | ## NB:
 46 | # for now "hardcoded" to utf8 - what else can we do?
 47 | # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
 48 |     text = text.force_encoding( Encoding::UTF_8 )
 49 |     logger.debug "text.encoding.name (after): #{text.encoding.name}"     
 50 | 
 51 |     ## todo:
 52 |     # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
 53 |     ## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
 54 | 
 55 |     self.from_string( text )
 56 |   end
 57 | 
 58 |   def self.from_file( path )
 59 |     ## nb: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
 60 |     ## - see textutils/utils.rb
 61 |     text = File.read_utf8( path )
 62 |     self.from_string( text )
 63 |   end
 64 | 
 65 |   def self.from_string( text )
 66 |     LineReader.new( text: text )
 67 |   end
 68 | 
 69 | 
 70 |   def initialize( arg )
 71 |     if arg.is_a?( String )  ## old style (deprecated) - pass in filepath as string
 72 |       path = arg
 73 |       logger.info "LineReader.new - deprecated API - use LineReader.from_file() instead"
 74 |       @text = File.read_utf8( path )
 75 |     else   ## assume it's a hash
 76 |       opts = arg
 77 |       @text = opts[:text]
 78 |     end
 79 |   end
 80 | 
 81 |   def each_line
 82 |     @text.each_line do |line|
 83 | 
 84 |       # comments allow:
 85 |       # 1) #####  (shell/ruby style)
 86 |       # 2) --  comment here (haskel/?? style)
 87 |       # 3) % comment here (tex/latex style)
 88 | 
 89 |       if line =~ /^\s*#/ || line =~ /^\s*--/ || line =~ /^\s*%/
 90 |         # skip komments and do NOT copy to result (keep comments secret!)
 91 |         logger.debug 'skipping comment line'
 92 |         next
 93 |       end
 94 |         
 95 |       if line =~ /^\s*$/ 
 96 |         # kommentar oder leerzeile überspringen 
 97 |         logger.debug 'skipping blank line'
 98 |         next
 99 |       end
100 | 
101 |       # pass 1) remove possible trailing eol comment
102 |       ##  e.g    -> nyc, New York   # Sample EOL Comment Here (with or without commas,,,,)
103 |       ## becomes -> nyc, New York
104 | 
105 |       line = line.sub( /\s+#.+$/, '' )
106 | 
107 |       # pass 2) remove leading and trailing whitespace
108 |       
109 |       line = line.strip
110 |  
111 |       yield( line )
112 |     end # each lines
113 |   end # method each_line
114 |   
115 | end # class LineReader
116 | 


--------------------------------------------------------------------------------
/textutils/lib/textutils/filter/erb_django_filter.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | module TextUtils
  4 |   module Filter
  5 | 
  6 |   def erb_django_style( content, options={} )
  7 | 
  8 |     # replace expressions (support for single lines only)
  9 |     #  {{ expr }}  ->  <%= expr %>
 10 |     #  {% stmt %}  ->  <%  stmt %>   !! add in do if missing (for convenience)
 11 |     #
 12 |     # use use {{{ or {{{{ to escape expr back to literal value
 13 |     # and use {%% %} to escape stmts
 14 | 
 15 |     erb_expr = 0
 16 |     erb_stmt_beg = 0
 17 |     erb_stmt_end = 0
 18 | 
 19 |     content.gsub!( /(\{{2,4})([^{}\n]+?)(\}{2,4})/ ) do |match|
 20 |       escaped = ($1.length > 2)
 21 |       if escaped
 22 |         "{{#{$2}}}"
 23 |       else
 24 |         erb_expr += 1
 25 |         "<%= #{erb_django_simple_params($2)} %>"
 26 |       end
 27 |     end
 28 | 
 29 |     content.gsub!( /(\{%{1,2})([ \t]*end[ \t]*)%\}/ ) do |match|
 30 |       escaped = ($1.length > 2)
 31 |       if escaped
 32 |         "{%#{$2}%}"
 33 |       else
 34 |         erb_stmt_end += 1
 35 |         "<% end %>"
 36 |       end
 37 |     end
 38 | 
 39 |     content.gsub!( /(\{%{1,2})([^%\n]+?)%\}/ ) do |match|
 40 |       escaped = ($1.length > 2)
 41 |       if escaped
 42 |         "{%#{$2}%}"
 43 |       else
 44 |         erb_stmt_beg += 1
 45 |         "<% #{erb_django_simple_params($2)} do %>"
 46 |       end
 47 |     end
 48 | 
 49 |     puts "  Patching embedded Ruby (erb) code Django-style (#{erb_expr} {{-expressions," +
 50 |        " #{erb_stmt_beg}/#{erb_stmt_end} {%-statements)..."
 51 |          
 52 |     content
 53 |   end
 54 | 
 55 | 
 56 | 
 57 | ######################
 58 | ## "private" helpers - do NOT use as filters - todo: add :nodoc: how?
 59 | 
 60 |   def erb_django_simple_params( code )
 61 |     
 62 |     # split into method/directive and parms plus convert params
 63 |     code.sub!( /^[ \t]([\w.]+)(.*)/ ) do |match|
 64 |       directive = $1
 65 |       params    = $2
 66 |       
 67 |       "#{directive} #{params ? erb_simple_params(directive,params) : ''}"
 68 |     end
 69 |     
 70 |     code
 71 |   end
 72 | 
 73 |   def erb_simple_params( method, params )
 74 |     
 75 |     # replace params to support html like attributes e.g.
 76 |     #  plus add comma separator
 77 |     #
 78 |     #  class=part       -> :class => 'part'   
 79 |     #  3rd/tutorial     -> '3rd/tutorial'
 80 |     #  :css             -> :css
 81 |     
 82 |     return params   if params.nil? || params.strip.empty?
 83 | 
 84 |     params.strip!    
 85 |     ## todo: add check for " ??
 86 |     if params.include?( '=>' )
 87 |       puts "** warning: skipping patching of params for helper '#{method}'; already includes '=>':"
 88 |       puts "  #{params}"
 89 |       
 90 |       return params
 91 |     end
 92 |     
 93 |     before = params.clone
 94 |     
 95 |     # 1) string-ify values and keys (that is, wrap in '')
 96 |     #  plus separate w/ commas
 97 |     params.gsub!( /([:a-zA-Z0-9#][\w\/\-\.#()]*)|('[^'\n]*')/) do |match|
 98 |       symbol = ( Regexp.last_match( 0 )[0,1] == ':' )
 99 |       quoted = ( Regexp.last_match( 0 )[0,1] == "'" )
100 |       if symbol || quoted  # return symbols or quoted string as is
101 |         "#{Regexp.last_match( 0 )},"
102 |       else
103 |         "'#{Regexp.last_match( 0 )}',"
104 |       end
105 |     end
106 |         
107 |     # 2) symbol-ize hash keys
108 |     #    change = to =>
109 |     #    remove comma for key/value pairs
110 |     params.gsub!( /'(\w+)',[ \t]*=/ ) do |match|
111 |       ":#{$1}=>"
112 |     end
113 |     
114 |     # 3) remove trailing comma
115 |     params.sub!( /[ \t]*,[ \t]*$/, '' ) 
116 |      
117 |     puts "    Patching params for helper '#{method}' from '#{before}' to:"
118 |     puts "      #{params}"
119 |        
120 |     params
121 |   end
122 | 
123 | 
124 |   end  # module Filter
125 | end   # module TextUtils


--------------------------------------------------------------------------------
/textutils/lib/textutils/reader/fixture_reader.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | ### read in a list of fixtures (that is, fixture names/files)
  4 | 
  5 | # fix: move into TextUtils namespace/module!!
  6 | 
  7 | ##
  8 | #  use ManifestReader ?? why? why not?  - reuse in manifest gem (or manman e.g. manifest manger) ??
  9 | #
 10 | 
 11 | class FixtureReader
 12 | 
 13 |   include LogUtils::Logging
 14 | 
 15 | 
 16 |   def self.from_zip( zip_file, entry_path )
 17 |     entry = zip_file.find_entry( entry_path )
 18 | 
 19 |     ## todo/fix: add force encoding to utf-8 ??
 20 |     ##  check!!!
 21 |     ##  clean/prepprocess lines
 22 |     ##  e.g. CR/LF (/r/n) to LF (e.g. /n)
 23 |     text = entry.get_input_stream().read()
 24 | 
 25 |     ## NOTE: needs logger ref; only available in instance methods; use global logger for now
 26 |     logger = LogUtils::Logger.root
 27 |     logger.debug "text.encoding.name (before): #{text.encoding.name}"
 28 | #####
 29 | # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
 30 | ## NB:
 31 | # for now "hardcoded" to utf8 - what else can we do?
 32 | # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
 33 |     text = text.force_encoding( Encoding::UTF_8 )
 34 |     logger.debug "text.encoding.name (after): #{text.encoding.name}"     
 35 | 
 36 |     ## todo:
 37 |     # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
 38 |     ## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
 39 | 
 40 |     self.from_string( text )
 41 |   end
 42 | 
 43 |   def self.from_file( path )
 44 |     ## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
 45 |     ## - see textutils/utils.rb
 46 |     text = File.read_utf8( path )
 47 |     self.from_string( text )
 48 |   end
 49 | 
 50 |   def self.from_string( text )
 51 |     FixtureReader.new( { text: text } )
 52 |   end
 53 | 
 54 | 
 55 |   def initialize( arg )
 56 | 
 57 |     if arg.is_a?( String )  ## old style (deprecated) - pass in filepath as string
 58 |       path = arg
 59 |       logger.info "FixtureReader.new - deprecated API - use FixtureReader.from_file() instead"
 60 |       text = File.read_utf8( path )
 61 |     else   ## assume it's a hash
 62 |       opts = arg
 63 |       text = opts[:text]
 64 |     end
 65 | 
 66 |     @ary = []
 67 | 
 68 |     ## nb: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
 69 |     ## - see textutils/utils.rb
 70 |   
 71 |     @ary = plain_text_reader( text )
 72 | 
 73 |     logger.debug "fixture setup:"
 74 |     logger.debug @ary.to_json
 75 |   end
 76 | 
 77 | 
 78 |   def plain_text_reader( text )     ## find a better name - just read?
 79 | 
 80 |     ## use LineReader ?? for (re)use of comment processing - why? why not???
 81 |     ### build up array for fixtures from hash
 82 |     ary = []
 83 | 
 84 |     text.each_line do |line|
 85 | 
 86 |       # comments allow:
 87 |       # 1) #####  (shell/ruby style)
 88 |       # 2) --  comment here (haskel/?? style)
 89 |       # 3) % comment here (tex/latex style)
 90 | 
 91 |       if line =~ /^\s*#/ || line =~ /^\s*--/ || line =~ /^\s*%/
 92 |         # skip komments and do NOT copy to result (keep comments secret!)
 93 |         logger.debug 'skipping comment line'
 94 |         next
 95 |       end
 96 | 
 97 |       if line =~ /^\s*$/ 
 98 |         # skip blank lines
 99 |         logger.debug 'skipping blank line'
100 |         next
101 |       end
102 | 
103 |       # pass 1) remove possible trailing eol comment
104 |       ##  e.g    -> nyc, New York   # Sample EOL Comment Here (with or without commas,,,,)
105 |       ## becomes -> nyc, New York
106 | 
107 |       line = line.sub( /\s+#.+$/, '' )
108 | 
109 |       # pass 2) remove leading and trailing whitespace
110 |       
111 |       line = line.strip
112 |  
113 |       ary << line
114 |     end # each lines
115 |     ary  # return fixture ary
116 |   end # method plain_text_reader
117 | 
118 | 
119 |   def each
120 |     @ary.each do |fixture|
121 |       yield( fixture )
122 |     end
123 |   end # method each
124 | 
125 | end # class FixtureReader
126 | 


--------------------------------------------------------------------------------
/textutils/lib/textutils/classifier.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | module TextUtils
  4 | 
  5 | class Classifier
  6 | 
  7 |   include LogUtils::Logging
  8 | 
  9 |   def initialize
 10 |     @h = Hash.new( [] )  # hash w/ words - default value is empty ary (word_list)
 11 |   end
 12 | 
 13 |   def train( key, ary_or_hash_or_str )
 14 | 
 15 |     ## add words to lang/topic key
 16 | 
 17 |     if ary_or_hash_or_str.kind_of?( Array )
 18 |       words = ary_or_hash_or_str
 19 |     elsif ary_or_hash_or_str.kind_of?( Hash )
 20 |       words = []
 21 |       ary_or_hash_or_str.each do |_, values|
 22 |         words += values.strip.split('|')
 23 |       end
 24 |     else  # assume string (allow list separated by |)
 25 |       words = ary_or_hash_or_str.strip.split('|')
 26 |     end
 27 | 
 28 |     @h[ key ] += words
 29 |   end
 30 | 
 31 |   def classify_file( path )
 32 |     classify( File.read_utf8( path ) )
 33 |   end
 34 | 
 35 |   def classify( text_with_comments )
 36 | 
 37 |     ## check encoding
 38 |     logger.debug "  classify - text.encoding: #{text_with_comments.encoding.name}"
 39 |     
 40 |     # nb: strip comments first
 41 |     text = strip_comments( text_with_comments )
 42 | 
 43 |     counts = []
 44 |       ## e.g. [[ 'en', 20], # 20 words
 45 |       ##       [ 'de',  2]] # 2 words
 46 | 
 47 |     @h.each_with_index do |(key,words),i|
 48 |       logger.debug "key #{key} (#{i+1}/#{@h.size}) - #{words.size} words"
 49 |       counts << [key, count_words_in_text( words, text )]
 50 |     end
 51 | 
 52 |     # sort by word count (reverse sort e.g. highest count goes first)
 53 |     counts = counts.sort {|l,r| r[1] <=> l[1] }
 54 |     
 55 |     # dump stats
 56 |     
 57 |     logger.debug "results:"
 58 |     counts.each_with_index do |entry,i|
 59 |       ## e.g. 1. en: 20 words
 60 |       ##      2. de: 2 words
 61 |       logger.debug " #{i+1}. #{entry[0]}: #{entry[1]}"
 62 |     end
 63 |     
 64 |     logger.debug "classifier - using key >>#{counts[0][0]}<<"
 65 |     
 66 |     ## return key/lang code w/ highest count
 67 |     counts[0][0]
 68 |   end
 69 | 
 70 | 
 71 |   def dump
 72 |     # for debugging dump setup (that is, keys w/ words etc.)
 73 | 
 74 |     @h.each_with_index do |(key, words), i|
 75 |       logger.debug "key #{key} (#{i+1}/#{@h.size}) - #{words.size} words:"
 76 |       logger.debug words.inspect
 77 |       
 78 |       ## check encoding of words (trouble w/ windows cp850 argh!!!)
 79 |       last_encoding_name = ''
 80 |       words.each do |word|
 81 |         if last_encoding_name != word.encoding.name
 82 |           logger.debug "  encoding: #{word.encoding.name}"
 83 |           last_encoding_name = word.encoding.name
 84 |         end
 85 |       end
 86 |     end 
 87 |   end
 88 | 
 89 | 
 90 | private
 91 |   def strip_comments( text )
 92 |     new_text = ''
 93 | 
 94 |     text.each_line do |line|
 95 |   
 96 |       # comments allow:
 97 |       # 1) #####  (shell/ruby style)
 98 |       # 2) --  comment here (haskel/?? style)
 99 |       # 3) % comment here (tex/latex style)
100 | 
101 |       if line =~ /^\s*#/ || line =~ /^\s*--/ || line =~ /^\s*%/
102 |         # skip komments and do NOT copy to result (keep comments secret!)
103 |         logger.debug 'skipping comment line'
104 |         next
105 |       end
106 | 
107 |       ## todo: strip inline comments  - why not?
108 | 
109 |       # pass 1) remove possible trailing eol comment
110 |       ##  e.g    -> nyc, New York   # Sample EOL Comment Here (with or without commas,,,,)
111 |       ## becomes -> nyc, New York
112 | 
113 |       line = line.sub( /\s+#.+$/, '' )
114 | 
115 |       new_text << line
116 |       new_text << "\n"
117 |     end
118 | 
119 |     new_text
120 |   end
121 | 
122 | 
123 |   def count_word_in_text( word, text )
124 |     count = 0
125 |     pos = text.index( word )
126 |     while pos.nil? == false
127 |       count += 1
128 |       logger.debug "bingo - found >>#{word}<< on pos #{pos}, count: #{count}"
129 |       ### todo: check if pos+word.length/size needs +1 or similar
130 |       pos = text.index( word, pos+word.length)
131 |     end
132 |     count
133 |   end
134 | 
135 |   def count_words_in_text( words, text )
136 |     count = 0
137 |     words.each do |word|
138 |       count += count_word_in_text( word, text )
139 |     end
140 |     count
141 |   end
142 | 
143 | 
144 | end # class Classifier
145 | 
146 | end # module TextUtils
147 | 


--------------------------------------------------------------------------------
/textutils/test/data/de-deutschland/3--by-bayern/4--oberfranken/orte.txt:
--------------------------------------------------------------------------------
  1 | 2     Bayern
  2 | 24    .. Oberfranken
  3 | 241   .... Bamberg (Stadt)     ## Kreisfreie Stadt
  4 |       ...... Bamberg
  5 |       ........ Bamberg
  6 | 242   .... Bayreuth (Stadt)    ## Kreisfreie Stadt
  7 |       ...... Bayreuth
  8 |       ........ Bayreuth
  9 | 243   .... Coburg (Stadt)      ## Kreisfreie Stadt
 10 |       ...... Coburg
 11 |       ........ Coburg 
 12 | 244   .... Hof (Stadt)         ## Kreisfreie Stadt
 13 |       ...... Hof
 14 |       ........ Hof
 15 | 
 16 | 245   .... Bamberg (Land)      ## Landkreis   -- 36 Gemeinden; see de.wikipedia.org/wiki/Landkreis_Bamberg
 17 |              ## 4 Städte
 18 |       ...... Baunach        ## (4013, 30,9 km²)
 19 |       ........ Baunach
 20 |       ...... Hallstadt      ## (8364, 14,5 km²)
 21 |       ........ Hallstadt    ## (7588)
 22 |       ........ Dörfleins    ## (1380)
 23 |       ...... Scheßlitz      ## (7184, 94,9 km²)
 24 |       ........ Scheßlitz
 25 |       ........ Köttensdorf
 26 |       ........ Würgau
 27 |       ...... Schlüsselfeld  ## (5712, 70,2 km²)
 28 | 
 29 |              ## 8 Märkte
 30 |       ...... Burgebrach             ## (6553, 87,9 km²)
 31 |       ...... Burgwindheim           ## (1311, 37,4 km²)
 32 |       ...... Buttenheim             ## (3472, 30 km²)
 33 |       ...... Ebrach                 ## (1830, 29,6 km²)
 34 |       ...... Heiligenstadt i. OFr.  ## (3525, 76,7 km²)
 35 |       ........ Heiligenstadt i. OFr.
 36 |       ........ Oberleinleiter
 37 |       ...... Hirschaid              ## (11.919, 41 km²)
 38 |       ...... Rattelsdorf            ## (4568, 39,6 km²)
 39 |       ........ Rattelsdorf
 40 |       ........ Mürsbach
 41 |       ........ Freudeneck
 42 |       ........ Höfen
 43 |       ........ Ebing
 44 |       ...... Zapfendorf             ## (4954, 30,6 km²)
 45 | 
 46 |              ## 24 Gemeinden
 47 |       ...... Altendorf              ## (2012, 8,6 km²)
 48 |       ...... Bischberg              ## (6012, 17,5 km²)
 49 |       ...... Breitengüßbach         ## (4586, 16,9 km²)
 50 |       ........ Breitengüßbach
 51 |       ...... Frensdorf              ## (4865, 44 km²)
 52 |       ...... Gerach                 ## (946, 7,8 km²)
 53 |       ...... Gundelsheim            ## (3378, 3,8 km²)
 54 |       ...... Kemmern                ## (2544, 8,3 km²)
 55 |       ........ Kemmern
 56 |       ...... Königsfeld             ## (1335, 42,7 km²)
 57 |       ........ Königsfeld
 58 |       ........ Huppendorf
 59 |       ...... Lauter                 ## (1139, 12,8 km²)
 60 |       ........ Lauter               ## (601)
 61 |       ........ Appendorf            ## (213)
 62 |       ...... Lisberg                ## (1813, 8,4 km²)
 63 |       ...... Litzendorf             ## (6057, 25,9 km²)
 64 |       ........ Litzendorf
 65 |       ........ Schammelsdorf
 66 |       ........ Melkendorf
 67 |       ........ Lohndorf
 68 |       ........ Tiefenellern
 69 |       ...... Memmelsdorf            ## (8854, 26,2 km²)
 70 |       ........ Memmelsdorf
 71 |       ........ Merkendorf
 72 |       ........ Drosendorf      
 73 |       ...... Oberhaid               ## (4590, 27,2 km²)
 74 |       ........ Oberhaid
 75 |       ........ Staffelbach
 76 |       ...... Pettstadt                  ## (1940, 9,9 km²)
 77 |       ...... Pommersfelden              ## (2851, 35,7 km²)
 78 |       ...... Priesendorf                ## (1470, 8,4 km²)
 79 |       ...... Reckendorf                 ## (2033, 13,1 km²)
 80 |       ........ Reckendorf
 81 |       ...... Schönbrunn im Steigerwald  ## (1880, 24,7 km²)
 82 |       ...... Stadelhofen                ## (1250, 41 km²)
 83 |       ........ Stadelhofen
 84 |       ........ Steinfeld
 85 |       ........ Schederndorf
 86 |       ...... Stegaurach                 ## (6842, 23,9 km²)
 87 |       ...... Strullendorf               ## (7807, 31,7 km²)
 88 |       ........ Strullendorf
 89 |       ........ Geisfeld
 90 |       ........ Roßdorf am Forst
 91 |       ...... Viereth-Trunstadt          ## (3562, 15,8 km²)
 92 |       ...... Walsdorf                   ## (2575, 16,2 km²)
 93 |       ...... Wattendorf                 ## (679, 22,2 km²)
 94 |       ........ Wattendorf
 95 | 
 96 | 246   .... Bayreuth (Land)     ## Landkreis
 97 | 247   .... Coburg (Land)       ## Landkreis
 98 | 248   .... Forchheim
 99 | 249   .... Hof (Land)          ## Landkreis
100 | 24A   .... Kronach
101 | 24B   .... Kulmbach
102 | 24C   .... Lichtenfels
103 | 24D   .... Wunsiedel i. Fichtelgebirge
104 | 


--------------------------------------------------------------------------------
/textutils/lib/textutils/reader/tree_reader.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | # fix: move into TextUtils namespace/module!!
  4 | 
  5 | class TreeReader
  6 | 
  7 |   include LogUtils::Logging
  8 | 
  9 |   def self.from_file( path )
 10 |     ## nb: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
 11 |     ## - see textutils/utils.rb
 12 |    text = File.read_utf8( path )
 13 |    self.from_string( text )
 14 |   end
 15 | 
 16 |   def self.from_string( text )
 17 |     self.new( text )
 18 |   end
 19 | 
 20 |   def initialize( text )
 21 |     @text = text
 22 |   end
 23 | 
 24 |   TreeItem = Struct.new( :level, :key, :value )
 25 | 
 26 |   KEY_REGEX     = /
 27 |                       ([0-9][0-9A-Za-z]*)   ## key starting with a nummer
 28 |                         |
 29 |                       ([a-z]+)   ## key all lowercase e.g. bt,n,etc.
 30 |                         |
 31 |                       ([A-Z]+)   ## key all uppercase e.g. BT,N,etc
 32 |                   /x
 33 | 
 34 |   LEVEL_REGEX   = /[.*\-]+/     ## e.g. .. or .... etc. allow --/** too (e.g. lets you use markdown or ascii doc lists etc.)
 35 | 
 36 | 
 37 |   def each_line  
 38 |     stack    = []     # note: last_level  => stack.size; starts w/ 0
 39 |     times    = 2      # assume two indents factor (e.g. .. =2, ....=3 etc. ) for now
 40 | 
 41 |     reader = LineReader.from_string( @text )
 42 |     reader.each_line do |line|
 43 | 
 44 |       logger.debug "[TreeReader]  line (before) => >#{line}<"
 45 | 
 46 |       s = StringScanner.new( line )
 47 |       s.skip( /[ \t]+/ )   # remove whitespace
 48 | 
 49 |       key = s.scan( KEY_REGEX )
 50 |       if key
 51 |         s.skip( /[ \t]+/ )   # remove whitespace
 52 |       end
 53 | 
 54 |       level_str = s.scan( LEVEL_REGEX )
 55 |       if level_str
 56 |         ## FIX!! todo/check: make sure level_str.size is a multiple of two !! (e.g. 2,4,6,etc.)
 57 |         level = (level_str.size/times)+1
 58 |         s.skip( /[ \t]+/ )   # remove whitespace
 59 |       else
 60 |         level = 1   ## no level found; assume top level (start w/ 1)
 61 |       end
 62 | 
 63 |       ## assume rest is record
 64 |       rest = s.rest.rstrip  ## note: remove trailing whitespaces
 65 | 
 66 |       level_diff = level - stack.size
 67 | 
 68 |       if level_diff > 0
 69 |         logger.debug "[TreeReader]    up  +#{level_diff}"
 70 |         ## FIX!!! todo/check/verify/assert: always must be +1
 71 |       elsif level_diff < 0
 72 |         logger.debug "[TreeReader]    down #{level_diff}"
 73 |         level_diff.abs.times { stack.pop }
 74 |         stack.pop
 75 |       else
 76 |         ## same level
 77 |         stack.pop
 78 |       end
 79 | 
 80 |       item = TreeItem.new
 81 |       item.level = level
 82 |       item.key   = key
 83 |       item.value = rest
 84 | 
 85 |       stack.push( item )
 86 | 
 87 |       ## for debugging - show tree item (note) hierarchy 
 88 |       names = stack.map { |it| "(#{it.level}) #{it.value}" }
 89 |       logger.debug "[TreeReader]    #{names.join( ' › ' )}  -- key: >#{key}<, level: >#{level}<, rest: >#{rest}<"
 90 | 
 91 |       yield( stack )
 92 |     end
 93 | 
 94 |   end # method each_line
 95 | 
 96 | 
 97 |   def check   ## rename to lint/analyze/etc. - why? why not??
 98 | 
 99 |     ## track stats for debugging (linting/checking)
100 |     stats = {
101 |       levels: Hash.new( 0 ),   ## note: set default to 0
102 |       ## check for duplicate entries (values/names)
103 |       values: {}
104 |     }
105 | 
106 |     each_line do |stack|
107 |       node = stack.last
108 | 
109 |       ## track stats for number of nodes
110 |       levels = stats[:levels]
111 |       levels[node.level] += 1
112 | 
113 |       ## collect all values (for a level) in an array
114 |       values = stats[:values][node.level] || []
115 |       values << node.value
116 |       stats[:values][node.level] = values
117 |     end
118 | 
119 |     puts "stats:"
120 |     pp stats[:levels]  
121 | 
122 | #    puts "values:"
123 | #    pp stats[:values]
124 | 
125 |     ## check for duplicates (using group_by)
126 |     values = stats[:values]
127 |     values.each do |l,ary|
128 |       puts "checking level #{l} - #{ary.size} node(s)..."
129 |       duplicates = ary.group_by { |e| e }.select { |k, v| v.size > 1 }
130 |       if duplicates.size > 0
131 |         puts "  #{duplicates.size} duplicate(s) in level #{l}:"
132 |         pp duplicates
133 |       end
134 |     end
135 | 
136 |   end # method check
137 | 
138 | end # class TreeReader
139 | 
140 | 


--------------------------------------------------------------------------------
/textutils/test/test_address_helper.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | 
  4 | require 'helper'
  5 | 
  6 | class TestAddressHelper < Minitest::Test
  7 | 
  8 |   def test_normalize_addr
  9 |     
 10 |     txt_io = [
 11 |       ['Alte Plauener Straße 24 // 95028 Hof', nil, 'Alte Plauener Straße 24 // 95028 Hof'],
 12 |       ['Alte Plauener Straße 24 // 95028 Hof', 'de', '95028 Hof // Alte Plauener Straße 24'],
 13 |       ['Mautner Markhof-Straße 11 // 2320 Schwechat', nil, 'Mautner Markhof-Straße 11 // 2320 Schwechat'],
 14 |       ['Mautner Markhof-Straße 11 // 2320 Schwechat', 'at', '2320 Schwechat // Mautner Markhof-Straße 11']
 15 |     ]
 16 | 
 17 |     txt_io.each_with_index do |txt,i|
 18 |       puts "testing [#{i}] #{txt[0]}"
 19 |       assert_equal txt[2], TextUtils.normalize_addr( txt[0], txt[1] )
 20 |     end
 21 | 
 22 |   end  # method test_normalize_addr
 23 | 
 24 | 
 25 |   def test_addr_without_postal_code  # aka generic rule
 26 | 
 27 |     txt_io = [
 28 |       ['London //',  'London'],
 29 |       ['// London',  'London'],
 30 |       ['// London  ',  'London'],
 31 |       ['  // London',  'London'],
 32 |       ['// London, W4 2QB', nil],
 33 |       ['// London | W4 2QB', nil],
 34 |       ['// London  W4 2QB', nil],
 35 |       ['Chiswick Lane South // London, W4 2QB', nil],
 36 |       ['The Griffin Brewery // Chiswick Lane South // London', nil], # three lines will NOT work, sorry
 37 |       ['// New York, NY', nil],
 38 |       ['// New York NY', nil]    # check: does it exist in the real world (e.g. w/o comma or pipe?) support it?
 39 |     ]
 40 | 
 41 |     txt_io.each_with_index do |txt,i|
 42 |       puts "testing [#{i}] #{txt[0]}"
 43 |       assert_equal txt[1], TextUtils.find_city_in_addr_without_postal_code( txt[0] )
 44 |     end
 45 |   end # method test_addr_without_postal_code
 46 | 
 47 | 
 48 |   def test_addr_with_postal_code
 49 | 
 50 |     txt_io = [
 51 |       ['2320 Schwechat // Mautner Markhof-Straße 11', 'at', 'Schwechat'],
 52 |       ['Mautner Markhof-Straße 11 // 2320 Schwechat', 'at', 'Schwechat'],
 53 |       ['3910 Zwettl // Syrnauer Straße 22-25', 'at', 'Zwettl'],
 54 |       ['Syrnauer Straße 22-25 // 3910 Zwettl', 'at', 'Zwettl'],
 55 |       ['2018 Antwerpen', 'be', 'Antwerpen'],
 56 |       ['2870 Breendonk-Puurs', 'be', 'Breendonk-Puurs'],
 57 |       ['Alte Plauener Straße 24 // 95028 Hof', 'de', 'Hof'],
 58 |       ['95028 Hof // Alte Plauener Straße 24', 'de', 'Hof'],
 59 |       ['284 15 Kutná Hora', 'cz', 'Kutná Hora'],
 60 |       ['288 25 Nymburk', 'cz', 'Nymburk'],
 61 |       ['036 42 Martin', 'sk', 'Martin'],
 62 |       ['974 05 Banská Bystrica', 'sk', 'Banská Bystrica'],
 63 |       ['Brooklyn | NY 11249', 'us', 'Brooklyn'],
 64 |       ['Brooklyn, NY 11249', 'us', 'Brooklyn'],
 65 |       ['Brooklyn | NY', 'us', 'Brooklyn'],
 66 |       ['Brooklyn, NY', 'us', 'Brooklyn'],
 67 |     ]
 68 | 
 69 |     txt_io.each_with_index do |txt,i|
 70 |       puts "testing [#{i}] #{txt[0]}"
 71 |       assert_equal txt[2], TextUtils.find_city_in_addr_with_postal_code( txt[0], txt[1] )
 72 |     end
 73 |   end # method test_addr_with_postal_code
 74 | 
 75 | 
 76 |   def test_addr
 77 | 
 78 |     txt_io = [
 79 |       ['London //', nil, 'London'],
 80 |       ['// London', nil, 'London'],
 81 |       ['// London  ', nil,  'London'],
 82 |       ['  // London', nil, 'London'],
 83 |       ['2320 Schwechat // Mautner Markhof-Straße 11', 'at', 'Schwechat'],
 84 |       ['Mautner Markhof-Straße 11 // 2320 Schwechat', 'at', 'Schwechat'],
 85 |       ['3910 Zwettl // Syrnauer Straße 22-25', 'at', 'Zwettl'],
 86 |       ['Syrnauer Straße 22-25 // 3910 Zwettl', 'at', 'Zwettl'],
 87 |       ['2018 Antwerpen', 'be', 'Antwerpen'],
 88 |       ['2870 Breendonk-Puurs', 'be', 'Breendonk-Puurs'],
 89 |       ['Alte Plauener Straße 24 // 95028 Hof', 'de', 'Hof'],
 90 |       ['95028 Hof // Alte Plauener Straße 24', 'de', 'Hof'],
 91 |       ['284 15 Kutná Hora', 'cz', 'Kutná Hora'],
 92 |       ['288 25 Nymburk', 'cz', 'Nymburk'],
 93 |       ['036 42 Martin', 'sk', 'Martin'],
 94 |       ['974 05 Banská Bystrica', 'sk', 'Banská Bystrica'],
 95 |       ['Brooklyn | NY 11249', 'us', 'Brooklyn'],
 96 |       ['Brooklyn, NY 11249', 'us', 'Brooklyn'],
 97 |       ['Brooklyn | NY', 'us', 'Brooklyn'],
 98 |       ['Brooklyn, NY', 'us', 'Brooklyn'],
 99 |     ]
100 | 
101 |     txt_io.each_with_index do |txt,i|
102 |       puts "testing [#{i}] #{txt[0]}"
103 |       assert_equal txt[2], TextUtils.find_city_in_addr( txt[0], txt[1] )
104 |     end
105 |   end # method test_addr
106 | 
107 | 
108 | end # class TestAddressHelper


--------------------------------------------------------------------------------
/attic/values_reader.rb:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 |   def each_line_old_single_line_records_only
  4 |       
  5 |     @data.each_line do |line|
  6 |   
  7 |       ## allow alternative comment lines
  8 |       ## e.g. -- comment or
  9 |       ##      % comment
 10 |       ##  why?  # might get used by markdown for marking headers, for example
 11 | 
 12 |       ## NB: for now alternative comment lines not allowed as end of line style e.g
 13 |       ##  some data, more data   -- comment here
 14 | 
 15 |       if line =~ /^\s*#/ || line =~ /^\s*--/ || line =~ /^\s*%/
 16 |         # skip komments and do NOT copy to result (keep comments secret!)
 17 |         logger.debug 'skipping comment line'
 18 |         next
 19 |       end
 20 | 
 21 |       if line =~ /^\s*$/
 22 |         # kommentar oder leerzeile überspringen 
 23 |         logger.debug 'skipping blank line'
 24 |         next
 25 |       end
 26 | 
 27 | 
 28 |       # pass 1) remove possible trailing eol comment
 29 |       ##  e.g    -> nyc, New York   # Sample EOL Comment Here (with or without commas,,,,)
 30 |       ## becomes -> nyc, New York
 31 | 
 32 |       line = line.sub( /\s+#.+$/, '' )
 33 | 
 34 |       # pass 2) remove leading and trailing whitespace
 35 |       
 36 |       line = line.strip
 37 | 
 38 |       ### guard escaped commas (e.g. \,)
 39 |       line = line.gsub( '\,', '@commma@' )
 40 |       
 41 |       ## use generic separator (allow us to configure separator)
 42 |       line = line.gsub( ',', '@sep@')
 43 |       
 44 |       ## restore escaped commas (before split)
 45 |       line = line.gsub( '@commma@', ',' )
 46 | 
 47 | 
 48 |       logger.debug "line: >>#{line}<<"
 49 | 
 50 |       values = line.split( '@sep@' )
 51 |       
 52 |       # pass 1) remove leading and trailing whitespace for values
 53 | 
 54 |       values = values.map { |value| value.strip }
 55 | 
 56 |       ##### todo remove support of comment column? (NB: must NOT include commas)
 57 |       # pass 2) remove comment columns
 58 |       
 59 |       values = values.select do |value|
 60 |         if value =~ /^#/  ## start with # treat it as a comment column; e.g. remove it
 61 |           logger.debug "   removing column with value >>#{value}<<"
 62 |           false
 63 |         else
 64 |           true
 65 |         end
 66 |       end
 67 |       
 68 |       logger.debug "  values: >>#{values.join('<< >>')}<<"
 69 |       
 70 |       
 71 |       ### todo/fix: allow check - do NOT allow mixed use of with key and w/o key
 72 |       ##  either use keys or do NOT use keys; do NOT mix in a single fixture file
 73 |       
 74 |       
 75 |       ### support autogenerate key from first title value
 76 |       
 77 |       # if it looks like a key (only a-z lower case allowed); assume it's a key
 78 |       #   - also allow . in keys e.g. world.quali.america, at.cup, etc.
 79 |       #   - also allow 0-9 in keys e.g. at.2, at.3.1, etc.
 80 | 
 81 |       # fix/todo: add support for leading underscore _
 82 |       #   or allow keys starting w/ digits?
 83 |       if values[0] =~ /^([a-z][a-z0-9.]*[a-z0-9]|[a-z])$/    # NB: key must start w/ a-z letter (NB: minimum one letter possible)
 84 |         key_col         = values[0]
 85 |         title_col       = values[1]
 86 |         more_cols       = values[2..-1]
 87 |       else
 88 |         key_col         = '<auto>'
 89 |         title_col       = values[0]
 90 |         more_cols       = values[1..-1]
 91 |       end
 92 | 
 93 |       attribs = {}
 94 | 
 95 |       ## title (split of optional synonyms)
 96 |       # e.g. FC Bayern Muenchen|Bayern Muenchen|Bayern
 97 |       titles = title_col.split('|')
 98 |       
 99 |       attribs[ :title ]    =  titles[0]
100 |      
101 |       ## add optional synonyms if present
102 |       attribs[ :synonyms ] =  titles[1..-1].join('|')  if titles.size > 1
103 |       
104 |       if key_col == '<auto>'
105 |         ## autogenerate key from first title
106 |         key_col = TextUtils.title_to_key( titles[0] )
107 |         logger.debug "   autogen key >#{key_col}< from title >#{titles[0]}<, textutils version #{TextUtils::VERSION}"
108 |       end
109 |       
110 |       attribs[ :key ] = key_col
111 |       
112 |       attribs = attribs.merge( @more_values )  # e.g. merge country_id and other defaults if present
113 |                         
114 |       yield( attribs, more_cols )
115 | 
116 |     end # each lines
117 | 
118 |   end # method each_line
119 | 
120 | 
121 | def find_values
122 |     # pass 2) remove comment columns
123 |     #  todo/fix: check if still possible ?? - add an example here how it looks like/works
124 | 
125 |     values = values.select do |value|
126 |       if value =~ /^#/  ## start with # treat it as a comment column; e.g. remove it
127 |         logger.info "   removing column with value »#{value}«"
128 |         false
129 |       else
130 |         true
131 |       end
132 |     end
133 | end
134 | 


--------------------------------------------------------------------------------
/textutils/lib/textutils/helper/value_helper_i.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | 
  4 | module TextUtils
  5 |   module ValueHelper
  6 | 
  7 |   # if it looks like a key (only a-z lower case allowed); assume it's a key
  8 |   #   - also allow . in keys e.g. world.quali.america, at.cup, etc.
  9 |   #   - also allow 0-9 in keys e.g. at.2, at.3.1, etc.
 10 |   #   - also allow leading digits e.g. 1850muenchen, 3kronen, etc.
 11 | 
 12 |   TITLE_KEY_REGEX = /^(
 13 |                      [a-z][a-z0-9.]*[a-z0-9]
 14 |                        |
 15 |                      [a-z]         # allow single letter keys e.g. n,s,etc.
 16 |                        |
 17 |                      [1-9][0-9]*[a-z]+  # NOTE: also allow starts with leading digits e.g. 1850muenchen, 3kronen etc.;
 18 |                                    #   *MUST* be followed by letter;
 19 |                                    #   note: leading zero for now *NOT* allowed
 20 |                      )$
 21 |                     /x
 22 | 
 23 | 
 24 |   def find_key_n_title( values )  # note: returns ary [attribs,more_values] / two values
 25 |     # todo/fix:
 26 |     ##  change title to name 
 27 |     ##  change synonyms to alt_names (!!!)
 28 |     ##   => use new method e.g. find_key_n_name(s) - why?? why not??
 29 | 
 30 | 
 31 |     ## fix: add/configure logger for ActiveRecord!!!
 32 |     logger = LogKernel::Logger.root
 33 | 
 34 | 
 35 |     ### support autogenerate key from first title value
 36 |     if values[0] =~ TITLE_KEY_REGEX
 37 |       key_col         = values[0]
 38 |       title_col       = values[1]
 39 |       more_values     = values[2..-1]
 40 |     else
 41 |       key_col         = '<auto>'
 42 |       title_col       = values[0]
 43 |       more_values     = values[1..-1]
 44 |     end
 45 | 
 46 |     attribs = {}
 47 | 
 48 |     ## check title_col for grade (e.g. ***/**/*) and use returned stripped title_col if exits
 49 |     grade, title_col = find_grade( title_col )
 50 | 
 51 |     # NB: for now - do NOT include default grade e.g. if grade (***/**/*) not present; attrib will not be present too
 52 |     if grade == 1 || grade == 2 || grade == 3  # grade found/present
 53 |       logger.debug "   found grade #{grade} in title"
 54 |       attribs[:grade] = grade
 55 |     end
 56 | 
 57 |  
 58 |     ## fix/todo: add find parts ??
 59 |     #  e.g. ‹Estrella› ‹Damm› Inedit
 60 |     #    becomes =>   title: 'Estrella Damm Inedit'  and  parts: ['Estrella','Damm']
 61 | 
 62 | 
 63 | 
 64 |     ## title (split of optional tree hierarchy)
 65 |     ##  e.g. Leverkusen › Köln/Bonn › Nordrhein-Westfalen
 66 |     ##       Gelsenkirchen › Ruhrgebiet › Nordrhein-Westfalen
 67 |     ##       München [Munich] › Bayern  etc.
 68 | 
 69 |     ##  fix!!!! - trailing hierarchy get *ignored* for now!!! - fix!!
 70 |     ##    pass along in  :tree (or :hierarchy) ??
 71 | 
 72 | 
 73 |     ## note: must include leading and trailing space for now (fix!! later)
 74 |     ##   hack for avoiding conflict w/ parts; fix: read/parse parts first
 75 |     ##  todo: also allow > (as an alternative to ›)
 76 | 
 77 |     title_tree = title_col.split( /[ ]+[›][ ]+/ )
 78 | 
 79 |     ## title (split of optional synonyms)
 80 |     # e.g. FC Bayern Muenchen|Bayern Muenchen|Bayern
 81 |     #      München [Munich]
 82 |     titles = NameTokenizer.new.tokenize( title_tree[0] )
 83 | 
 84 |     attribs[ :title ]    =  titles[0]
 85 | 
 86 |     ## add optional synonyms if present    
 87 |     attribs[ :synonyms ] =  titles[1..-1].join('|')  if titles.size > 1
 88 | 
 89 |     if key_col == '<auto>'
 90 |       ## autogenerate key from first title
 91 |       key_col = TextUtils.title_to_key( titles[0] )
 92 |       logger.debug "   autogen key »#{key_col}« from title »#{titles[0]}«"
 93 |     end
 94 | 
 95 |     attribs[ :key ] = key_col
 96 | 
 97 |     [attribs, more_values]
 98 |   end
 99 | 
100 | 
101 |   def find_grade( value )  # NB: returns ary [grade,value] / two values
102 |     grade = 4  # defaults to grade 4  e.g  *** => 1, ** => 2, * => 3, -/- => 4
103 | 
104 |     # NB: stars must end field/value or start field/value
105 |     #  e.g.
106 |     #  *** Anton Bauer   or
107 |     #  Anton Bauer ***
108 | 
109 |     value = value.sub( /^\s*(\*{1,3})\s+/ ) do |_|
110 |       if $1 == '***'
111 |         grade = 1
112 |       elsif $1 == '**'
113 |         grade = 2
114 |       elsif $1 == '*'
115 |         grade = 3
116 |       else
117 |         # unknown grade; not possible, is'it?
118 |       end
119 |       ''  # remove * from title if found
120 |     end
121 | 
122 |     value = value.sub( /\s+(\*{1,3})\s*$/ ) do |_|
123 |       if $1 == '***'
124 |         grade = 1
125 |       elsif $1 == '**'
126 |         grade = 2
127 |       elsif $1 == '*'
128 |         grade = 3
129 |       else
130 |         # unknown grade; not possible, is'it?
131 |       end
132 |       ''  # remove * from title if found
133 |     end
134 | 
135 |     [grade,value]
136 |   end
137 | 
138 |   end # module ValueHelper
139 | end # module TextUtils
140 | 


--------------------------------------------------------------------------------
/textutils/test/test_hypertext_helper.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | ###
  4 | #  to run use
  5 | #     ruby -I ./lib -I ./test test/test_rss.rb
  6 | #  or better
  7 | #     rake test
  8 | 
  9 | require 'helper'
 10 | 
 11 | 
 12 | class TestHypertextHelper < Minitest::Test
 13 | 
 14 |   include TextUtils::HypertextHelper   #  lets us use textify, etc.
 15 | 
 16 |   def test_strip_tags
 17 |     ## empty tags
 18 |     assert_equal '', strip_tags( '<hr />' )
 19 |     assert_equal '', strip_tags( '<hr/>' )
 20 |     assert_equal '', strip_tags( '<my-emtpy/>' )
 21 |     assert_equal '', strip_tags( '<my-emtpy />' )
 22 | 
 23 |     assert_equal 'hello', strip_tags( '<h1>hello</h1>' )
 24 |     assert_equal 'hello', strip_tags( '<h2>hello</h2>' )
 25 |     assert_equal 'hello', strip_tags( '<p>hello</p>' )
 26 |     assert_equal 'hello', strip_tags( '<div>hello</div>' )
 27 |     assert_equal 'hello', strip_tags( '<my-header>hello</my-header>' )
 28 | 
 29 |     assert_equal 'hello', strip_tags( '<h1 id="test">hello</h1>' )
 30 |     assert_equal 'hello', strip_tags( '<p id="test">hello</p>' )
 31 |     assert_equal 'hello', strip_tags( '<div id="test">hello</div>' )
 32 |     assert_equal 'hello', strip_tags( '<my-header id="test">hello</my-header>' )
 33 |     
 34 |     ## check case in-sensitive
 35 |     assert_equal '', strip_tags( '<HR />' )
 36 |     assert_equal '', strip_tags( '<hR />' )
 37 |     assert_equal '', strip_tags( '<Hr />' )
 38 |     assert_equal '', strip_tags( '<HR/>' )
 39 |     assert_equal '', strip_tags( '<My-EmTpY/>' )
 40 |     assert_equal '', strip_tags( '<My-EmTpY />' )
 41 | 
 42 |     assert_equal 'hello', strip_tags( '<H1>hello</H1>' )
 43 |     assert_equal 'hello', strip_tags( '<H2>hello</h2>' )
 44 |     assert_equal 'hello', strip_tags( '<P>hello</P>' )
 45 |     assert_equal 'hello', strip_tags( '<DiV>hello</dIv>' )
 46 |     assert_equal 'hello', strip_tags( '<mY-hEaDer>hello</MY-HEADER>' )
 47 | 
 48 |     assert_equal 'hello', strip_tags( '<H1 ID="test">hello</h1>' )
 49 |     assert_equal 'hello', strip_tags( '<P id="test">hello</p>' )
 50 |     assert_equal 'hello', strip_tags( '<DIV Id="test">hello</dIV>' )
 51 |     assert_equal 'hello', strip_tags( '<MY-HEADER iD="test">hello</mY-hEaDeR>' )
 52 |   end
 53 | 
 54 | 
 55 |   def test_stylesheet_link_tag
 56 |     hyout = "<link rel='stylesheet' type='text/css' href='hello.css'>"
 57 | 
 58 |     assert_equal( hyout, stylesheet_link_tag( 'hello' ))
 59 |     assert_equal( hyout, stylesheet_link_tag( 'hello.css' ))
 60 |   end
 61 | 
 62 | 
 63 |   def test_sanitize
 64 |     hyin =<<EOS
 65 | <p><img style="float:left; margin-right:4px" src="http://photos1.meetupstatic.com/photos/event/7/c/b/2/event_244651922.jpeg" alt="photo" class="photo" />vienna.rb</p>
 66 | <p>
 67 |   <p>The cool guys from <a href="http://platogo.com/">Platogo</a> will sponsor (y)our drinks. Which is awesome.</p>
 68 |   <p><strong>Talks</strong>*</p>
 69 |   <p>Jakob Sommerhuber - sponsor talk</p>
 70 |   <p>Martin Schürrer - Erlang/OTP in production for highly-available, scalable systems</p>
 71 |   <p>Markus Prinz - How to improve your code</p>
 72 |   <p>Gerald Bauer - working with Sinatra</p>
 73 |   <p>Kathrin Folkendt - 'Chapter one' (lightning talk on getting started with Rails, and building her first web app)</p>
 74 |   <p><em>*preliminary program</em></p>
 75 | </p>
 76 | <p>Vienna   - Austria</p>
 77 | <p>Friday, October 11 at 6:00 PM</p>
 78 | <p>Attending: 21</p>
 79 | <p>Details: http://www.meetup.com/vienna-rb/events/130346182/</p>
 80 | EOS
 81 | 
 82 |     hystep1 = <<EOS
 83 | ‹p›♦vienna.rb‹/p›
 84 | ‹p›
 85 |   ‹p›The cool guys from Platogo will sponsor (y)our drinks. Which is awesome.‹/p›
 86 |   ‹p›Talks*‹/p›
 87 |   ‹p›Jakob Sommerhuber - sponsor talk‹/p›
 88 |   ‹p›Martin Schürrer - Erlang/OTP in production for highly-available, scalable systems‹/p›
 89 |   ‹p›Markus Prinz - How to improve your code‹/p›
 90 |   ‹p›Gerald Bauer - working with Sinatra‹/p›
 91 |   ‹p›Kathrin Folkendt - 'Chapter one' (lightning talk on getting started with Rails, and building her first web app)‹/p›
 92 |   ‹p›*preliminary program‹/p›
 93 | ‹/p›
 94 | ‹p›Vienna   - Austria‹/p›
 95 | ‹p›Friday, October 11 at 6:00 PM‹/p›
 96 | ‹p›Attending: 21‹/p›
 97 | ‹p›Details: www.meetup.com/vienna-rb/events/130346182/‹/p›
 98 | EOS
 99 | 
100 |     hyout = <<EOS
101 | <p>♦vienna.rb</p>
102 | <p>
103 |   <p>The cool guys from Platogo will sponsor (y)our drinks. Which is awesome.</p>
104 |   <p>Talks*</p>
105 |   <p>Jakob Sommerhuber - sponsor talk</p>
106 |   <p>Martin Schürrer - Erlang/OTP in production for highly-available, scalable systems</p>
107 |   <p>Markus Prinz - How to improve your code</p>
108 |   <p>Gerald Bauer - working with Sinatra</p>
109 |   <p>Kathrin Folkendt - 'Chapter one' (lightning talk on getting started with Rails, and building her first web app)</p>
110 |   <p>*preliminary program</p>
111 | </p>
112 | <p>Vienna   - Austria</p>
113 | <p>Friday, October 11 at 6:00 PM</p>
114 | <p>Attending: 21</p>
115 | <p>Details: www.meetup.com/vienna-rb/events/130346182/</p>
116 | EOS
117 | 
118 |     assert_equal( hyout, sanitize( hyin ) )
119 | 
120 |     assert_equal( hystep1, sanitize( hyin, skip_restore: true ) )
121 |   end
122 | 
123 | end # class TestHypertextHelper
124 | 


--------------------------------------------------------------------------------
/textutils/lib/textutils/title_mapper.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | 
  4 | ## see textutils/title.rb
  5 | ##   for existing code
  6 | ##   move over here
  7 | 
  8 | ####
  9 | ## fix: turn it into a class w/ methods
 10 | #
 11 | #e.g  t =TitleMapper.new( records, name )  # e.g. name='team'
 12 | #  t.map!( line )
 13 | #  t.find_key!( line )
 14 | # etc.
 15 | 
 16 | 
 17 | module TextUtils
 18 | 
 19 | class TitleMapper      ## todo/check: rename to NameMapper ? why? why not??
 20 | 
 21 |   include LogUtils::Logging
 22 | 
 23 |   attr_reader :known_titles   ## rename to mapping or mappings or just titles - why? why not?
 24 | 
 25 |   def initialize( records, tag )
 26 |     @known_titles = build_title_table_for( records )   ## build mapping lookup table
 27 | 
 28 |     ## todo: rename tag to attrib or attrib_name - why ?? why not ???
 29 |     @tag = tag   # e.g. tag name use for @@brewery@@ @@team@@ etc.
 30 |   end
 31 | 
 32 | 
 33 |   def map_titles!( line )   ## rename to just map! - why?? why not???
 34 |     @known_titles.each do |rec|
 35 |       key    = rec[0]
 36 |       values = rec[1]
 37 |       map_title_for!( @tag, line, key, values )
 38 |     end
 39 |   end
 40 | 
 41 | 
 42 |   def find_key!( line )
 43 |     find_key_for!( @tag, line )
 44 |   end
 45 | 
 46 |   def find_keys!( line )  # NB: keys (plural!) - will return array
 47 |     counter = 1
 48 |     keys = []
 49 | 
 50 |     key = find_key_for!( "#{@tag}#{counter}", line )
 51 |     while key.present?
 52 |       keys << key
 53 |       counter += 1
 54 |       key = find_key_for!( "#{@tag}#{counter}", line )
 55 |     end
 56 |     keys
 57 |   end
 58 | 
 59 | 
 60 | private
 61 |   def build_title_table_for( records )
 62 | 
 63 |     #### fix/todo:
 64 |     ###  reorder - sort by largest strings etc.
 65 |     ##   do NOT use lookup w/ array per key; use 1:1 one key per lookup
 66 |     ##     -> lets us sort by find largest first
 67 | 
 68 | 
 69 |     ## build known tracks table w/ synonyms e.g.
 70 |     #
 71 |     # [[ 'wolfsbrug', [ 'VfL Wolfsburg' ]],
 72 |     #  [ 'augsburg',  [ 'FC Augsburg', 'Augi2', 'Augi3' ]],
 73 |     #  [ 'stuttgart', [ 'VfB Stuttgart' ]] ]
 74 | 
 75 |     known_titles = []
 76 | 
 77 |     records.each_with_index do |rec,index|
 78 | 
 79 |       title_candidates = []
 80 |       title_candidates << rec.title
 81 | 
 82 |       title_candidates += rec.synonyms.split('|') if rec.synonyms.present?
 83 | 
 84 | 
 85 |       ## check if title includes subtitle e.g. Grand Prix Japan (Suzuka Circuit)
 86 |       #  make subtitle optional by adding title w/o subtitle e.g. Grand Prix Japan
 87 | 
 88 |       titles = []
 89 |       title_candidates.each do |t|
 90 |         titles << t
 91 |         if t =~ /\(.+\)/
 92 |           extra_title = t.gsub( /\(.+\)/, '' ) # remove/delete subtitles
 93 |           extra_title.strip!   # strip leading n trailing withspaces too!
 94 |           titles << extra_title
 95 |         end
 96 |       end
 97 | 
 98 | 
 99 |       ## NB: sort here by length (largest goes first - best match)
100 |       #  exclude code and key (key should always go last)
101 |       titles = titles.sort { |left,right| right.length <=> left.length }
102 |       
103 |       ## escape for regex plus allow subs for special chars/accents
104 |       titles = titles.map { |title| TextUtils.title_esc_regex( title )  }
105 | 
106 |       ## NB: only include code field - if defined
107 |       titles << rec.code          if rec.respond_to?(:code) && rec.code.present?
108 | 
109 |       known_titles << [ rec.key, titles ]
110 | 
111 |       logger.debug "  #{rec.class.name}[#{index+1}] #{rec.key} >#{titles.join('|')}<"
112 |     end
113 | 
114 |     known_titles
115 |   end
116 | 
117 | 
118 |   def map_title_for!( tag, line, key, values )
119 | 
120 |     downcase_tag = tag.downcase
121 | 
122 |     values.each do |value|
123 |       ## nb: \b does NOT include space or newline for word boundry (only alphanums e.g. a-z0-9)
124 |       ## (thus add it, allows match for Benfica Lis.  for example - note . at the end)
125 | 
126 |       ## check add $ e.g. (\b| |\t|$) does this work? - check w/ Benfica Lis.$
127 |       regex = /\b#{value}(\b| |\t|$)/   # wrap with world boundry (e.g. match only whole words e.g. not wac in wacker) 
128 |       if line =~ regex
129 |         logger.debug "     match for #{downcase_tag}  >#{key}< >#{value}<"
130 |         # make sure @@oo{key}oo@@ doesn't match itself with other key e.g. wacker, wac, etc.
131 |         line.sub!( regex, "@@oo#{key}oo@@ " )    # NB: add one space char at end
132 |         return true    # break out after first match (do NOT continue)
133 |       end
134 |     end
135 |     return false
136 |   end
137 | 
138 | 
139 |   def find_key_for!( tag, line )
140 |     regex = /@@oo([^@]+?)oo@@/     # e.g. everything in @@ .... @@ (use non-greedy +? plus all chars but not @, that is [^@])
141 | 
142 |     upcase_tag    = tag.upcase
143 |     downcase_tag  = tag.downcase
144 | 
145 |     if line =~ regex
146 |       value = "#{$1}"
147 |       logger.debug "   #{downcase_tag}: >#{value}<"
148 |       
149 |       line.sub!( regex, "[#{upcase_tag}]" )
150 | 
151 |       return $1
152 |     else
153 |       return nil
154 |     end
155 |   end # method find_key_for!
156 | 
157 | 
158 | end # class TitleMapper
159 | end # module TextUtils
160 | 


--------------------------------------------------------------------------------
/textutils/lib/textutils/page.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | ##
  4 | ## page template class for book production w/ markdown
  5 | ##   and static site compiler (e.g. jekyll)
  6 | ##
  7 | ## todo: move filters to filter for public reuse!!!
  8 | 
  9 | module TextUtils
 10 | 
 11 | 
 12 | ############
 13 | ## fix:
 14 | ###   add some unit tests!!!!!!!!!!!!!!!!!
 15 | ###
 16 | 
 17 | class Page
 18 | 
 19 |   ### convenience helper; use like:
 20 |   ##   Page.open() do |page|
 21 |   ##      page.write( text )
 22 |   ##      page.write( text )
 23 |   ##   end
 24 | 
 25 |   def self.open( path, mode, opts={} )
 26 |     page = self.new( path, mode, opts )
 27 |     yield( page )
 28 |     page.close
 29 |   end
 30 | 
 31 |   def self.create( path, opts={} )
 32 |     ## todo: check if 'w' is good enough?? do NOT need to add +
 33 |     page = self.new( path, 'w+', opts )
 34 |     yield( page )
 35 |     page.close
 36 |   end
 37 | 
 38 |   def self.update( path, opts={} )
 39 |     ## todo: check if 'a' is good enough?? do NOT need to add +
 40 |     page = self.new( path, 'a+', opts )
 41 |     yield( page )
 42 |     page.close
 43 |   end
 44 | 
 45 | 
 46 |   def initialize( path, mode, opts={} )
 47 |     ## check if folders exists? if not create folder in path
 48 |     FileUtils.mkdir_p( File.dirname(path) )
 49 | 
 50 |     @file = File.new( path, mode )
 51 | 
 52 |     ## add frontmatter if passed in
 53 |     ## todo: assert check if mode = 'w' and NOT 'a' !!!
 54 |     @file.write render_frontmatter( opts[:frontmatter] )  if opts[:frontmatter]
 55 |   end
 56 | 
 57 |   def write( text )
 58 |     @file.write( text )
 59 |   end
 60 | 
 61 |   def close
 62 |     @file.close
 63 |   end
 64 | 
 65 | private
 66 | 
 67 | ###########################
 68 | #  helpers
 69 | #   - make public for reuse !!!!!
 70 | 
 71 | def render_frontmatter( h )
 72 |   buf = ''
 73 |   buf += "---\n"
 74 | 
 75 |   h.each do |key,value|
 76 |     buf += "#{key}: #{value}\n"
 77 |   end
 78 | 
 79 |   buf += "---\n\n"
 80 |   buf
 81 | end
 82 | 
 83 | end # class Page
 84 | 
 85 | 
 86 | class PageTemplate
 87 | 
 88 | 
 89 | ###
 90 | ## todo: what is the best convention for loading file and handling string
 91 | ## for now it its:
 92 | #
 93 | #  PageTemplate.read( 'to/path ' )  or     --- use load ???? instead of read??
 94 | #  PageTemplate.new( 'template content'  )
 95 | 
 96 | 
 97 | def self.read( path )
 98 |   self.new( File.read_utf8( path ) )
 99 | end
100 | 
101 | 
102 | def initialize( tmpl )
103 |   @tmpl = tmpl.dup   # make a copy; just to be sure no one will change text
104 | end
105 | 
106 | def render( ctx )
107 | # note: erb offers the following trim modes:
108 | #  1) <> omit newline for lines starting with <% and ending in %>
109 | #  2)  >  omit newline for lines ending in %>
110 | #  3)  omit blank lines ending in -%>
111 |   ## run filters
112 |   tmpl = remove_html_comments( @tmpl )
113 |   tmpl = remove_blanks( tmpl )
114 | 
115 |   tmpl = django_to_erb( tmpl )  ## allow django/jinja style templates
116 | 
117 |   tmpl = remove_leading_spaces( tmpl )
118 |   tmpl = concat_lines( tmpl )
119 | 
120 |   text = ERB.new( tmpl, nil, '<>' ).result( ctx )
121 | 
122 |   ### text = cleanup_newlines( text )
123 |   text
124 | end
125 | 
126 | #######################
127 | #  filters
128 | #   - use better names and make public for reuse!!!!
129 | 
130 | def django_to_erb( text )
131 |   ## convert django style markers to erb style marker e.g
132 |   #  {% %} becomes <% %>  -- supports multi-line
133 |   #  {{ }} becomes <%= %>  - does NOT support multi-line
134 | 
135 |   ## comments (support multi-line)
136 |   text = text.gsub( /\{#(.+?)#\}/m ) do |_|
137 |    "<%# #{1} %>"
138 |   end
139 | 
140 |   text = text.gsub( /\{%(.+?)%\}/m ) do |_|
141 |     ## note: also replace newlines w/  %>\n<%  to split
142 |     #   multi-line stmts into single-line stmts
143 |     # lets us use
144 |     # {%
145 |     #  %} will become
146 |     # <%  %>
147 |     # <%  %>
148 |     "<% #{$1} %>".gsub( "\n", " %>\n<% " )
149 |   end
150 | 
151 |   # note: for now {{ }} will NOT support multi-line
152 |   text = text.gsub( /\{\{(.+?)\}\}/ ) do |_|
153 |     "<%= #{$1} %>"
154 |   end
155 | 
156 |   text
157 | end
158 | 
159 | def remove_html_comments( text )
160 |   text.gsub( /<!--.+?-->/, '' )
161 | end
162 | 
163 | def remove_leading_spaces( text )
164 |   # remove leading spaces if less than four !!!
165 |   text.gsub( /^[ \t]+(?![ \t])/, '' )    # use negative regex lookahead e.g. (?!)
166 | end
167 | 
168 | def remove_blanks( text )
169 |   # remove lines only with  ..
170 |   text.gsub( /^[ \t]*\.{2}[ \t]*\n/, '' )
171 | end
172 | 
173 | def cleanup_newlines( text )
174 |   # remove all blank lines that go over three
175 |   text.gsub( /\n{4,}/, "\n\n\n" )
176 | end
177 | 
178 | 
179 | def concat_lines( text )
180 |   #  lines ending with  ++  will get newlines get removed
181 |   # e.g.
182 |   # >|   hello1 ++
183 |   # >1   hello2
184 |   #  becomes
185 |   # >|   hello1 hello2
186 |   
187 |   #
188 |   # note: do NOT use \s - will include \n (newline) ??
189 |   
190 |   text.gsub( /[ \t]+\+{2}[ \t]*\n[ \t]*/, ' ' )  # note: replace with single space
191 | end
192 | 
193 | 
194 | end # class PageTemplate
195 | 
196 | end # module TextUtils
197 | 
198 | 


--------------------------------------------------------------------------------
/textutils/lib/textutils/reader/hash_reader.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | # fix: move into TextUtils namespace/module!!
  4 | 
  5 | 
  6 | class HashReader
  7 | 
  8 |   include LogUtils::Logging
  9 | 
 10 |   def self.from_zip( zip_file, entry_path )
 11 |     entry = zip_file.find_entry( entry_path )
 12 | 
 13 |     ## todo/fix: add force encoding to utf-8 ??
 14 |     ##  check!!!
 15 |     ##  clean/prepprocess lines
 16 |     ##  e.g. CR/LF (/r/n) to LF (e.g. /n)
 17 |     text = entry.get_input_stream().read()
 18 | 
 19 |     ## NOTE: needs logger ref; only available in instance methods; use global logger for now
 20 |     logger = LogUtils::Logger.root
 21 |     logger.debug "text.encoding.name (before): #{text.encoding.name}"
 22 | #####
 23 | # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
 24 | ## NB:
 25 | # for now "hardcoded" to utf8 - what else can we do?
 26 | # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
 27 |     text = text.force_encoding( Encoding::UTF_8 )
 28 |     logger.debug "text.encoding.name (after): #{text.encoding.name}"     
 29 | 
 30 |     ## todo:
 31 |     # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
 32 |     ## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
 33 | 
 34 |     self.from_string( text )
 35 |   end
 36 | 
 37 |   def self.from_file( path )
 38 |     ## nb: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
 39 |     ## - see textutils/utils.rb
 40 |     text = File.read_utf8( path )
 41 |     self.from_string( text )
 42 |   end
 43 | 
 44 |   def self.from_string( text )
 45 |     HashReader.new( text: text )
 46 |   end
 47 | 
 48 |   def initialize( arg )
 49 | 
 50 |     if arg.is_a?( String )  ## old style (deprecated) - pass in filepath as string
 51 |       path = arg
 52 |       logger.info "HashReader.new - deprecated API - use HashReader.from_file() instead"
 53 |       text = File.read_utf8( path )
 54 |     else   ## assume it's a hash
 55 |       opts = arg
 56 |       text = opts[:text]
 57 |     end
 58 | 
 59 |     ### hack for syck yaml parser (e.g.ruby 1.9.2) (cannot handle !!null)
 60 |     ##   change it to !null to get plain nil
 61 |     ##   w/ both syck and psych/libyml
 62 | 
 63 |     text = text.gsub( '!!null', '!null' )
 64 |    
 65 |     ### hacks for yaml
 66 |     
 67 |     ### see yaml gotschas
 68 |     ##  - http://www.perlmonks.org/?node_id=738671
 69 |     ##  - 
 70 | 
 71 |     ## replace all tabs w/ two spaces and issue a warning
 72 |     ## nb: yaml does NOT support tabs see why here -> yaml.org/faq.html
 73 |     
 74 |     text = text.gsub( "\t" ) do |_|
 75 |       logger.warn "hash reader - found tab (\t) replacing w/ two spaces; yaml forbids tabs; see yaml.org/faq.html (path=#{path})"
 76 |       '  '  # replace w/ two spaces
 77 |     end
 78 | 
 79 |     ## quote implicit boolean types on,no,n,y
 80 | 
 81 |     ## nb: escape only if key e.g. no: or "free standing" value on its own line e.g.
 82 |     ##   no: no
 83 | 
 84 |     text = text.gsub( /^([ ]*)(ON|On|on|OFF|Off|off|YES|Yes|yes|NO|No|no|Y|y|N|n)[ ]*:/ ) do |value|
 85 |       logger.warn "hash reader - found implicit bool (#{$1}#{$2}) for key; adding quotes to turn into string; see yaml.org/refcard.html (path=#{path})"
 86 |       # nb: preserve leading spaces for structure - might be significant
 87 |       "#{$1}'#{$2}':"  # add quotes to turn it into a string (not bool e.g. true|false)
 88 |     end
 89 | 
 90 |     ## nb: value must be freestanding (only allow optional eol comment)
 91 |     ##  do not escape if part of string sequence e.g.
 92 |     ##  key: nb,nn,no,se   => nb,nn,'no',se  -- avoid!!
 93 |     #
 94 |     #  check: need we add true|false too???
 95 | 
 96 |     text = text.gsub( /:[ ]+(ON|On|on|OFF|Off|off|YES|Yes|yes|NO|No|no|Y|y|N|n)[ ]*($| #.*$)/ ) do |value|
 97 |       logger.warn "hash reader - found implicit bool (#{$1}) for value; adding quotes to turn into string; see yaml.org/refcard.html (path=#{path})"
 98 |       ": '#{$1}'"  # add quotes to turn it into a string (not bool e.g. true|false)
 99 |     end
100 | 
101 |     
102 |     @hash = YAML.load( text )
103 |   end
104 |   
105 |   ###
106 |   # nb: returns all values as strings
107 |   #
108 |   
109 |   def each
110 |     @hash.each do |key_wild, value_wild|
111 |       # normalize
112 |       # - key n value as string (not symbols, bool? int? array?)
113 |       # - remove leading and trailing whitespace
114 |       key   = key_wild.to_s.strip
115 |       value = value_wild.to_s.strip
116 |       
117 |       logger.debug "yaml key:#{key_wild.class.name} >>#{key}<<, value:#{value_wild.class.name} >>#{value}<<"
118 |     
119 |       yield( key, value )
120 |     end
121 |   end # method each
122 |   
123 |   ###
124 |   # todo: what name to use: each_object or each_typed ???
125 |   #   or use new TypedHashReader class or similar??
126 |   
127 |   def each_typed
128 |     @hash.each do |key_wild, value_wild|
129 |       # normalize
130 |       # - key n value as string (not symbols, bool? int? array?)
131 |       # - remove leading and trailing whitespace
132 |       key   = key_wild.to_s.strip
133 |       
134 |       if value_wild.is_a?( String )
135 |         value = value_wild.strip
136 |       else
137 |         value = value_wild
138 |       end
139 |       
140 |       logger.debug "yaml key:#{key_wild.class.name} >>#{key}<<, value:#{value_wild.class.name} >>#{value}<<"
141 |     
142 |       yield( key, value )
143 |     end
144 |   end # method each
145 | 
146 | end # class HashReader
147 | 


--------------------------------------------------------------------------------
/textutils/lib/textutils/title_mapper2.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | 
  4 | ## see textutils/title.rb
  5 | ##   for existing code
  6 | ##   move over here
  7 | 
  8 | 
  9 | module TextUtils
 10 | 
 11 | class TitleMapper2      ## todo/check: rename to NameMapper ? why? why not??
 12 | 
 13 |   include LogUtils::Logging
 14 | 
 15 |   attr_reader :known_titles   ## rename to mapping or mappings or just titles - why? why not?
 16 | 
 17 |   ##
 18 |   ##  key:      e.g. augsburg 
 19 |   ##  title:    e.g. FC Augsburg
 20 |   ##  length (of title - not pattern):   e.g. 11   -- do not count dots (e.g. U.S.A. => 3 or 6) why? why not? 
 21 |   MappingStruct =  Struct.new( :key, :title, :length, :pattern)   ## todo/check: use (rename to) TitleStruct - why? why not??
 22 | 
 23 | 
 24 |   def initialize( records, tag )
 25 |     @known_titles = build_title_table_for( records )   ## build mapping lookup table
 26 | 
 27 |     ## todo: rename tag to attrib or attrib_name - why ?? why not ???
 28 |     @tag = tag   # e.g. tag name use for @@brewery@@ @@team@@ etc.
 29 |   end
 30 | 
 31 | 
 32 |   def map_titles!( line )   ## rename to just map! - why?? why not???
 33 |     begin
 34 |       found = map_title_for!( @tag, line, @known_titles )
 35 |     end while found
 36 |   end
 37 | 
 38 |   def find_key!( line )
 39 |     find_key_for!( @tag, line )
 40 |   end
 41 | 
 42 |   def find_keys!( line )  # NB: keys (plural!) - will return array
 43 |     counter = 1
 44 |     keys = []
 45 | 
 46 |     key = find_key_for!( "#{@tag}#{counter}", line )
 47 |     while key.present?
 48 |       keys << key
 49 |       counter += 1
 50 |       key = find_key_for!( "#{@tag}#{counter}", line )
 51 |     end
 52 |     keys
 53 |   end
 54 | 
 55 | 
 56 | private
 57 |   def build_title_table_for( records )
 58 | 
 59 |     ## build known tracks table w/ synonyms e.g.
 60 |     #
 61 |     # [[ 'wolfsbrug', 'VfL Wolfsburg'],
 62 |     #  [ 'augsburg',  'FC Augsburg'],
 63 |     #  [ 'augsburg',  'Augi2'],
 64 |     #  [ 'augsburg',  'Augi3' ],
 65 |     #  [ 'stuttgart', 'VfB Stuttgart']]
 66 | 
 67 |     known_titles = []
 68 | 
 69 |     records.each_with_index do |rec,index|
 70 | 
 71 |       title_candidates = []
 72 |       title_candidates << rec.title
 73 | 
 74 |       title_candidates += rec.synonyms.split('|') if rec.synonyms.present?
 75 | 
 76 | 
 77 |       ## check if title includes subtitle e.g. Grand Prix Japan (Suzuka Circuit)
 78 |       #  make subtitle optional by adding title w/o subtitle e.g. Grand Prix Japan
 79 | 
 80 |       titles = []
 81 |       title_candidates.each do |t|
 82 |         titles << t
 83 |         if t =~ /\(.+\)/
 84 |           extra_title = t.gsub( /\(.+\)/, '' ) # remove/delete subtitles
 85 |           # note: strip leading n trailing withspaces too!
 86 |           #  -- todo: add squish or something if () is inline e.g. leaves two spaces?
 87 |           extra_title.strip!
 88 |           titles << extra_title
 89 |         end
 90 |       end
 91 | 
 92 |       titles.each do |t|
 93 |         m = MappingStruct.new
 94 |         m.key     = rec.key
 95 |         m.title   = t
 96 |         m.length  = t.length
 97 |         ## note: escape for regex plus allow subs for special chars/accents
 98 |         m.pattern = TextUtils.title_esc_regex( t )
 99 |       
100 |         known_titles << m
101 |       end
102 | 
103 |       logger.debug "  #{rec.class.name}[#{index+1}] #{rec.key} >#{titles.join('|')}<"      
104 | 
105 |       ## NB: only include code field - if defined
106 |       if rec.respond_to?(:code) && rec.code.present?
107 |         m = MappingStruct.new
108 |         m.key     = rec.key
109 |         m.title   = rec.code
110 |         m.length  = rec.code.length
111 |         m.pattern = rec.code   ## note: use code for now as is (no variants allowed fow now)
112 | 
113 |         known_titles << m      
114 |       end
115 |     end
116 | 
117 |     ## note: sort here by length (largest goes first - best match)
118 |       #  exclude code and key (key should always go last)
119 |     known_titles = known_titles.sort { |left,right| right.length <=> left.length }
120 |     known_titles
121 |   end
122 | 
123 | 
124 |   def map_title_for!( tag, line, mappings )
125 | 
126 |     downcase_tag = tag.downcase
127 | 
128 |     mappings.each do |mapping|
129 |       
130 |       key   = mapping.key
131 |       value = mapping.pattern
132 |       ## nb: \b does NOT include space or newline for word boundry (only alphanums e.g. a-z0-9)
133 |       ## (thus add it, allows match for Benfica Lis.  for example - note . at the end)
134 | 
135 |       ## check add $ e.g. (\b| |\t|$) does this work? - check w/ Benfica Lis.$
136 |       regex = /\b#{value}(\b| |\t|$)/   # wrap with world boundry (e.g. match only whole words e.g. not wac in wacker) 
137 |       if line =~ regex
138 |         logger.debug "     match for #{downcase_tag}  >#{key}< >#{value}<"
139 |         # make sure @@oo{key}oo@@ doesn't match itself with other key e.g. wacker, wac, etc.
140 |         line.sub!( regex, "@@oo#{key}oo@@ " )    # NB: add one space char at end
141 |         return true    # break out after first match (do NOT continue)
142 |       end
143 |     end
144 |     return false
145 |   end
146 | 
147 | 
148 |   def find_key_for!( tag, line )
149 |     regex = /@@oo([^@]+?)oo@@/     # e.g. everything in @@ .... @@ (use non-greedy +? plus all chars but not @, that is [^@])
150 | 
151 |     upcase_tag    = tag.upcase
152 |     downcase_tag  = tag.downcase
153 | 
154 |     if line =~ regex
155 |       value = "#{$1}"
156 |       logger.debug "   #{downcase_tag}: >#{value}<"
157 |       
158 |       line.sub!( regex, "[#{upcase_tag}]" )
159 | 
160 |       return $1
161 |     else
162 |       return nil
163 |     end
164 |   end # method find_key_for!
165 | 
166 | 
167 | end # class TitleMapper2
168 | end # module TextUtils
169 | 


--------------------------------------------------------------------------------
/textutils/lib/textutils/title.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | 
  4 | ## todo: rename to TitleFinder or TitleMapper ??
  5 | #  other options  TitleMatcher? 
  6 | #   TitleMapping? TitleMappings?
  7 | #   or rename to KeyMapping?, KeyMapper?, KeyTable? etc.
  8 | 
  9 | 
 10 | ######
 11 | ## todo/check:
 12 | ###   remove - use TitleMapper instead
 13 | ##  deprecated/obsolete - do NOT use will get removed
 14 | 
 15 | 
 16 | module TextUtils
 17 |   module TitleTable
 18 | 
 19 | ####
 20 | ## fix: turn it into a class w/ methods
 21 | #
 22 | #e.g  t =TitleMapper.new( records, name )  # e.g. name='team'
 23 | #  t.map!( line )
 24 | #  t.find_key!( line )
 25 | # etc.
 26 | #
 27 | #  see textutils/title_mapper.rb
 28 | #
 29 | #   deprecate code here!!! - move to new TitleMapper class
 30 | 
 31 | 
 32 | def build_title_table_for( records )
 33 |     LogUtils::Logger.root.info "  build_title_table_for - deprecated API - use TitleMapper.new instead"
 34 | 
 35 |     ## build known tracks table w/ synonyms e.g.
 36 |     #
 37 |     # [[ 'wolfsbrug', [ 'VfL Wolfsburg' ]],
 38 |     #  [ 'augsburg',  [ 'FC Augsburg', 'Augi2', 'Augi3' ]],
 39 |     #  [ 'stuttgart', [ 'VfB Stuttgart' ]] ]
 40 | 
 41 |     known_titles = []
 42 | 
 43 |     records.each_with_index do |rec,index|
 44 | 
 45 |       title_candidates = []
 46 |       title_candidates << rec.title
 47 | 
 48 |       title_candidates += rec.synonyms.split('|') if rec.synonyms.present?
 49 | 
 50 | 
 51 |       ## check if title includes subtitle e.g. Grand Prix Japan (Suzuka Circuit)
 52 |       #  make subtitle optional by adding title w/o subtitle e.g. Grand Prix Japan
 53 | 
 54 |       titles = []
 55 |       title_candidates.each do |t|
 56 |         titles << t
 57 |         if t =~ /\(.+\)/
 58 |           extra_title = t.gsub( /\(.+\)/, '' ) # remove/delete subtitles
 59 |           extra_title.strip!   # strip leading n trailing withspaces too!
 60 |           titles << extra_title
 61 |         end
 62 |       end
 63 | 
 64 | 
 65 |       ## NB: sort here by length (largest goes first - best match)
 66 |       #  exclude code and key (key should always go last)
 67 |       titles = titles.sort { |left,right| right.length <=> left.length }
 68 |       
 69 |       ## escape for regex plus allow subs for special chars/accents
 70 |       titles = titles.map { |title| TextUtils.title_esc_regex( title )  }
 71 | 
 72 |       ## NB: only include code field - if defined
 73 |       titles << rec.code          if rec.respond_to?(:code) && rec.code.present?
 74 | 
 75 |       known_titles << [ rec.key, titles ]
 76 | 
 77 |       ### fix: use plain logger
 78 |       LogUtils::Logger.root.debug "  #{rec.class.name}[#{index+1}] #{rec.key} >#{titles.join('|')}<"
 79 |     end
 80 | 
 81 |     known_titles
 82 | end
 83 | 
 84 | 
 85 | 
 86 | def find_key_for!( name, line )
 87 |   LogUtils::Logger.root.info "  find_key_for! #{name} - deprecated API - use TitleMapper.find_key! instead"
 88 | 
 89 |   regex = /@@oo([^@]+?)oo@@/     # e.g. everything in @@ .... @@ (use non-greedy +? plus all chars but not @, that is [^@])
 90 | 
 91 |   upcase_name   = name.upcase
 92 |   downcase_name = name.downcase
 93 | 
 94 |   if line =~ regex
 95 |     value = "#{$1}"
 96 |     ### fix: use plain logger
 97 |     LogUtils::Logger.root.debug "   #{downcase_name}: >#{value}<"
 98 |       
 99 |     line.sub!( regex, "[#{upcase_name}]" )
100 | 
101 |     return $1
102 |   else
103 |     return nil
104 |   end
105 | end
106 | 
107 | 
108 | def find_keys_for!( name, line )  # NB: keys (plural!) - will return array
109 |   LogUtils::Logger.root.info "  find_keys_for! #{name} - deprecated API - use TitleMapper.find_keys! instead"
110 | 
111 |   counter = 1
112 |   keys = []
113 | 
114 |   downcase_name = name.downcase
115 | 
116 |   key = find_key_for!( "#{downcase_name}#{counter}", line )
117 |   while key.present?
118 |     keys << key
119 |     counter += 1
120 |     key = find_key_for!( "#{downcase_name}#{counter}", line )
121 |   end
122 | 
123 |   keys
124 | end
125 | 
126 | 
127 | def map_titles_for!( name, line, title_table )
128 |   LogUtils::Logger.root.info "  map_titles_for! #{name} - deprecated API - use TitleMapper.map_titles! instead"
129 | 
130 |   title_table.each do |rec|
131 |     key    = rec[0]
132 |     values = rec[1]
133 |     map_title_worker_for!( name, line, key, values )
134 |   end
135 | end
136 | 
137 | 
138 | def map_title_worker_for!( name, line, key, values )
139 | 
140 |   downcase_name = name.downcase
141 | 
142 |   values.each do |value|
143 |     ## nb: \b does NOT include space or newline for word boundry (only alphanums e.g. a-z0-9)
144 |     ## (thus add it, allows match for Benfica Lis.  for example - note . at the end)
145 | 
146 |     ## check add $ e.g. (\b| |\t|$) does this work? - check w/ Benfica Lis.$
147 |     regex = /\b#{value}(\b| |\t|$)/   # wrap with world boundry (e.g. match only whole words e.g. not wac in wacker) 
148 |     if line =~ regex
149 |       ### fix: use plain logger
150 |       LogUtils::Logger.root.debug "     match for #{downcase_name}  >#{key}< >#{value}<"
151 |       # make sure @@oo{key}oo@@ doesn't match itself with other key e.g. wacker, wac, etc.
152 |       line.sub!( regex, "@@oo#{key}oo@@ " )    # NB: add one space char at end
153 |       return true    # break out after first match (do NOT continue)
154 |     end
155 |   end
156 |   return false
157 | end
158 | 
159 |   end # module TitleTable
160 | end # module TextUtils
161 | 
162 | 
163 | ## auto-include methods
164 | 
165 | module TextUtils
166 |   # make helpers available as class methods e.g. TextUtils.convert_unicode_dashes_to_plain_ascii
167 |   extend TitleTable  # lets us use TextUtils.build_title_table_for etc.
168 | end
169 | 
170 | 
171 | 


--------------------------------------------------------------------------------
/textutils/lib/textutils/helper/title_helper.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | 
  4 | ###
  5 | #
  6 | # fix: move to filter!!!!
  7 | #   follows   fn( content ) pattern!!!
  8 | 
  9 | 
 10 | module TextUtils
 11 |   module TitleHelper
 12 | 
 13 | ####
 14 | # - todo: use new additional sub module ???
 15 | #     e.g. TextUtils::Reader::TagHelper
 16 | #   lets us use "classic" web helpers a la rails
 17 | #   find a good name for sub module -  Reader? Fixtures? Values? Parser? 
 18 | 
 19 |   def strip_part_markers( title )   # use different name e.g. strip_name_markers/strip_name_enclosure etc.??
 20 |      # remove optional part markers
 21 |      # e.g. Bock ‹Damm› becomes =>  Bock Damm
 22 |      #      ‹Estrella› ‹Damm› Inedit becomes =>  Estrella Damm Inedit
 23 | 
 24 |      title.gsub( /[<>‹›]/, '' )
 25 |   end
 26 | 
 27 |   def strip_translations( title )
 28 |       # remove optional english translation in square brackets ([])
 29 |       # e.g. Wien [Vienna]  =>  Wien
 30 | 
 31 |       title.gsub( /\[[^\]]+\]/, '' )
 32 |   end
 33 | 
 34 |   def strip_subtitles( title )
 35 |       # remove optional longer title part in ()
 36 |       # e.g. Las Palmas (de Gran Canaria) => Las Palmas
 37 |       #      Palma (de Mallorca) => Palma
 38 | 
 39 |       title.gsub( /\([^\)]+\)/, '' )
 40 |   end
 41 | 
 42 |   def strip_tags( title )   # todo: use an alias or rename for better name ??
 43 |       # remove optional longer title part in {}
 44 |       #  e.g. Ottakringer {Bio}   => Ottakringer
 45 |       #       Ottakringer {Alkoholfrei} => Ottakringer
 46 |       #
 47 |       # todo: use for autotags? e.g. {Bio} => bio 
 48 |       
 49 |       title.gsub( /\{[^\}]+\}/, '' )
 50 |   end
 51 | 
 52 |   def strip_whitespaces( title )
 53 |       # remove all whitespace and punctuation
 54 |       title.gsub( /[ \t_\-\.!()\[\]'"’\/]/, '' )
 55 |   end
 56 | 
 57 |   def strip_special_chars( title )
 58 |       # remove special chars (e.g. %°&$)
 59 |       # e.g. +Malta
 60 |       #      Minerva 8:60
 61 |       #      $Alianz$ Arena
 62 |       title.gsub( /[%&°+:$]/, '' )
 63 |   end
 64 | 
 65 |   def title_to_key( title )
 66 | 
 67 |       ## NB: used in/moved from readers/values_reader.rb
 68 | 
 69 |       ## NB: downcase does NOT work for accented chars (thus, include in alternatives)
 70 |       key = title.downcase
 71 | 
 72 |       key = strip_part_markers( key )  # e.g. ‹Estrella› ‹Damm› Inedit becomes =>  Estrella Damm Inedit
 73 | 
 74 |       key = strip_translations( key )
 75 | 
 76 |       key = strip_subtitles( key )
 77 | 
 78 |       key = strip_tags( key )
 79 | 
 80 |       key = strip_whitespaces( key )
 81 | 
 82 |       key = strip_special_chars( key )
 83 | 
 84 |       key = TextUtils.asciify( key ).downcase  ## see filter/string_filter
 85 | 
 86 |       key
 87 |   end # method title_to_key
 88 | 
 89 | 
 90 |   def title_esc_regex( title_unescaped )
 91 |       
 92 |       ##  escape regex special chars e.g.
 93 |       #    . to \. and
 94 |       #    ( to \(
 95 |       #    ) to \)
 96 |       #    ? to \? -- zero or one
 97 |       #    * to \* -- zero or more
 98 |       #    + to \+ -- one or more
 99 |       #    $ to \$ -- end of line
100 |       #    ^ to \^ -- start of line etc.
101 |       
102 |       ### add { and } ???
103 |       ### add [ and ] ???
104 |       ### add \ too ???
105 |       ### add | too ???
106 | 
107 |       # e.g. Benfica Lis.
108 |       # e.g. Club Atlético Colón (Santa Fe)
109 |       # e.g. Bauer Anton (????)
110 | 
111 |       ## NB: cannot use Regexp.escape! will escape space '' to '\ '
112 |       ## title = Regexp.escape( title_unescaped )
113 |       title = title_unescaped.gsub( '.', '\.' )
114 |       title = title.gsub( '(', '\(' )
115 |       title = title.gsub( ')', '\)' )
116 |       title = title.gsub( '?', '\?' )
117 |       title = title.gsub( '*', '\*' )
118 |       title = title.gsub( '+', '\+' )
119 |       title = title.gsub( '$', '\$' )
120 |       title = title.gsub( '^', '\^' )
121 | 
122 |       ##  match accented char with or without accents
123 |       ##  add (ü|ue) etc.
124 |       ## also make - optional change to (-| ) e.g. Blau-Weiss == Blau Weiss
125 | 
126 |       ## todo: add some more
127 |       ## see http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references  for more
128 |       ##
129 |       ##  reuse for all readers!
130 |       
131 |       alternatives = [
132 |         ['-', '(-| )'],  ## e.g. Blau-Weiß Linz
133 |         ['æ', '(æ|ae)'],  ## e.g. 
134 |         ['ä', '(ä|ae)'],  ## e.g. 
135 |         ['Ö', '(Ö|Oe)'],  ## e.g. Österreich
136 |         ['ö', '(ö|oe)'],  ## e.g. Mönchengladbach
137 |         ['ß', '(ß|ss)'],  ## e.g. Blau-Weiß Linz
138 |         ['ü', '(ü|ue)'],  ## e.g. 
139 | 
140 |         ['á', '(á|a)'],  ## e.g. Bogotá, Sársfield
141 |         ['ã', '(ã|a)'],  ## e.g  São Paulo
142 |         ['ç', '(ç|c)'],  ## e.g. Fenerbahçe
143 |         ['é', '(é|e)'],  ## e.g. Vélez
144 |         ['ê', '(ê|e)'],  ## e.g. Grêmio
145 |         ['ï', '(ï|i)' ], ## e.g. El Djazaïr
146 |         ['ñ', '(ñ|n)'],  ## e.g. Porteño
147 |         ['ň', '(ň|n)'],  ## e.g. Plzeň
148 |         ['ó', '(ó|o)'],   ## e.g. Colón
149 |         ['ō', '(ō|o)'],  # # e.g. Tōkyō
150 |         ['ș', '(ș|s)'],   ## e.g. Bucarești
151 |         ['ú', '(ú|u)']  ## e.g. Fútbol
152 |       ]
153 | 
154 |       ### fix/todo:  check for  dot+space e.g. . and make dot optional
155 |       ##
156 |       #  e.g. make  dot (.) optional plus allow alternative optional space e.g.
157 |       #   -- for U.S.A. => allow USA or U S A
158 |       #
159 |       ##    e.g. U. de G. or U de G or U.de G. ??
160 |       ##   collect some more (real-world) examples first!!!!!
161 | 
162 |       alternatives.each do |alt|
163 |         title = title.gsub( alt[0], alt[1] )
164 |       end
165 | 
166 |       title
167 |   end
168 | 
169 | 
170 |   end # module TitleHelper
171 | end # module TextUtils
172 | 


--------------------------------------------------------------------------------
/textutils/lib/textutils/helper/address_helper.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | 
  4 | module TextUtils
  5 |   module AddressHelper
  6 | 
  7 |   def normalize_addr( old_address, country_key=nil )
  8 | 
  9 |     # for now only checks german (de) 5-digit zip code and
 10 |     #                    austrian (at) 4-digit zip code
 11 |     #
 12 |     #  e.g.  Alte Plauener Straße 24 // 95028 Hof  becomes
 13 |     #        95028 Hof // Alte Plauener Straße 24 
 14 | 
 15 |     if country_key.nil?
 16 |       puts "TextUtils.normalize_addr drepreciated call - country_key now required; please add !!"
 17 |       return old_address
 18 |     end
 19 |     
 20 |     new_address = old_address   # default - do nothing - just path through
 21 |     
 22 |     lines = old_address.split( '//' )
 23 |     
 24 |     if lines.size == 2   # two lines / check for switching lines
 25 |       
 26 |       line1 = lines[0].strip
 27 |       line2 = lines[1].strip
 28 | 
 29 |       regex_nnnn  = /^[0-9]{4}\s+/   # four digits postal code
 30 |       regex_nnnnn = /^[0-9]{5}\s+/   # five digits postal code
 31 | 
 32 |       if (country_key == 'at' && line2 =~ regex_nnnn ) ||
 33 |          (country_key == 'de' && line2 =~ regex_nnnnn )
 34 |         new_address = "#{line2} // #{line1}"
 35 |       end
 36 |     end
 37 | 
 38 |     new_address
 39 |   end
 40 | 
 41 | 
 42 |   def find_city_in_addr_without_postal_code( address )
 43 | 
 44 |     ## general rule; not country-specific; no postal code/zip code or state
 45 |     #  - must be like two lines (one line empty) e.g.
 46 |     #  // London   or
 47 |     # London //
 48 |     #  will assume entry is city
 49 |     #  note: city may NOT include numbers, or pipe (|) or comma (,) chars
 50 | 
 51 |     # fix: use blank?
 52 |     return nil if address.nil? || address.empty?    # do NOT process nil or empty address lines; sorry
 53 | 
 54 |     old_lines = address.split( '//' )
 55 | 
 56 |     ###
 57 |     # note:   London //   will get split into arry with size 1 e.g. ['London ']
 58 |     #   support it, that is, add missing empty line
 59 | 
 60 |     # 1) strip lines
 61 |     # 2) remove blank lines
 62 |     lines = []
 63 |     
 64 |     old_lines.each do |line|
 65 |       linec = line.strip
 66 |       next if linec.empty?
 67 |       lines << linec
 68 |     end
 69 | 
 70 |     if lines.size == 1
 71 |       linec = lines[0]
 72 |         #  note: city may NOT include
 73 |         #   numbers  (e.g. assumes zip/postal code etc.) or
 74 |         #   pipe (|) or
 75 |         #   comma (,)
 76 |       if linec =~ /[0-9|,]/
 77 |         return nil
 78 |       end
 79 |         #   more than two uppercase letters e.g. TX NY etc.
 80 |         #  check if city exists wit tow uppercase letters??
 81 |       if linec =~ /[A-Z]{2,}/
 82 |         return nil
 83 |       end
 84 |       return linec   # bingo!!! assume candidate line is a city name
 85 |     end
 86 | 
 87 |     nil  # no generic city match found
 88 |   end
 89 | 
 90 | 
 91 |   def find_city_in_addr_with_postal_code( address, country_key )
 92 | 
 93 |     # fix: use blank?
 94 |     return nil if address.nil? || address.empty?    # do NOT process nil or empty address lines; sorry
 95 | 
 96 |     lines = address.split( '//' )
 97 | 
 98 |     if country_key == 'at' || country_key == 'be'
 99 |       # support for now
100 |       #  - 2018 Antwerpen or 2870 Breendonk-Puurs (be)
101 |       lines.each do |line|
102 |         linec = line.strip
103 |         regex_nnnn = /^[0-9]{4}\s+/ 
104 |         if linec =~ regex_nnnn   # must start w/ four digit postal code ? assume its the city line
105 |           return linec.sub( regex_nnnn, '' )  # cut off leading postal code; assume rest is city
106 |         end
107 |       end
108 |     elsif country_key == 'de'
109 |       lines.each do |line|
110 |         linec = line.strip
111 |         regex_nnnnn = /^[0-9]{5}\s+/
112 |         if linec =~ regex_nnnnn   # must start w/ five digit postal code ? assume its the city line
113 |           return linec.sub( regex_nnnnn, '' )  # cut off leading postal code; assume rest is city
114 |         end
115 |       end
116 |     elsif country_key == 'cz' || country_key == 'sk'
117 |       # support for now
118 |       #  - 284 15  Kutná Hora or  288 25  Nymburk (cz)
119 |       #  - 036 42  Martin     or  974 05  Banská Bystrica (sk)
120 |       lines.each do |line|
121 |         linec = line.strip
122 |         regex_nnn_nn = /^[0-9]{3}\s[0-9]{2}\s+/
123 |         if linec =~ regex_nnn_nn   # must start w/ five digit postal code ? assume its the city line
124 |           return linec.sub( regex_nnn_nn, '' )  # cut off leading postal code; assume rest is city
125 |         end
126 |       end
127 |     elsif country_key == 'us'
128 |       # support for now
129 |       #  - Brooklyn | NY 11249  or Brooklyn, NY 11249
130 |       #  - Brooklyn | NY   or Brooklyn, NY
131 | 
132 |       lines.each do |line|
133 |         linec = line.strip
134 |         regexes_us = [/\s*[|,]\s+[A-Z]{2}\s+[0-9]{5}\s*$/,
135 |                       /\s*[|,]\s+[A-Z]{2}\s*$/]
136 |         
137 |         regexes_us.each do |regex|
138 |           if linec =~ regex
139 |             return linec.sub( regex, '' )  # cut off leading postal code; assume rest is city
140 |           end
141 |         end
142 |       end
143 |     else
144 |       # unsupported country/address schema for now; sorry
145 |     end
146 |     return nil   # sorry nothing found
147 |   end
148 | 
149 | 
150 |   def find_city_in_addr( address, country_key )
151 | 
152 |     # fix: use blank?
153 |     return nil if address.nil? || address.empty?    # do NOT process nil or empty address lines; sorry
154 | 
155 |     ## try geneneric rule first (e.g. w/o postal code/zip code or state), see above
156 |     city = find_city_in_addr_without_postal_code( address )
157 |     return city unless city.nil?
158 |     
159 |     city = find_city_in_addr_with_postal_code( address, country_key )
160 |     return city unless city.nil?
161 | 
162 |     nil # sorry; no city found (using known patterns)
163 |   end
164 | 
165 | 
166 |   end # module AddressHelper
167 | end # module TextUtils
168 | 


--------------------------------------------------------------------------------
/textutils/lib/textutils/helper/hypertext_helper.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | module TextUtils
  4 |   module HypertextHelper
  5 | 
  6 | 
  7 | def strip_tags( ht )
  8 |   ### to be done
  9 |   ## strip markup tags; return plain text; use brute force for now
 10 |   # check at least for presence of required a-z+ tag names
 11 |   #
 12 |   #  note: make sure we cover h1/h2/h3/h4/h5/h6  tag w/ number!!
 13 | 
 14 |   ### ht.gsub( /<[^>]+>/, '' ) - old simple
 15 | 
 16 |   ## todo: add strip comments e.g. <!-- xxxx --> ???
 17 |   ##  or use new strip_comments( ht )
 18 | 
 19 | 
 20 |   ## note: follow offical xml spec
 21 |   ##  - allows for first char:  (Letter | '_' | ':')
 22 |   ##  - allows for followup chars: (Letter | Digit | '_' | ':' | '.' | '-')
 23 | 
 24 |   tag_name_pattern = "[a-z_:][a-z0-9_:.\\-]*"
 25 | 
 26 |   empty_tag_pattern   =  "<#{tag_name_pattern}\\s*/>"
 27 |   opening_tag_pattern =  "<#{tag_name_pattern}(\\s+[^>]*)?>"
 28 |   closing_tag_pattern =  "</#{tag_name_pattern}\\s*>"
 29 | 
 30 |   ht = ht.gsub( /#{empty_tag_pattern}/i, '' )    # remove xml-style empty tags eg. <br /> or <br/>
 31 |   ht = ht.gsub( /#{opening_tag_pattern}/i, '' )  # opening tag <p>
 32 |   ht = ht.gsub( /#{closing_tag_pattern}/i, '' )  # closing tag e.g. </p>
 33 |   ht
 34 | end
 35 | 
 36 | 
 37 | def whitelist( ht, tags, opts={} )
 38 | 
 39 |   # note: assumes properly escaped <> in ht/hypertext
 40 | 
 41 |   ###############################################
 42 |   # step one - save whitelisted tags use ‹tag›
 43 |   tags.each do |tag|
 44 |     # note: we strip all attribues
 45 |     # note: match all tags case insensitive e.g. allow a,A or br,BR,bR etc.
 46 |     #   downcase all tags
 47 | 
 48 |     # convert xml-style empty tags to simple html emtpty tags
 49 |     #  e.g. <br/> or <br /> becomses <br>
 50 |     ht = ht.gsub( /<(#{tag})\s*\/>/i )       { |_| "‹#{$1.downcase}›" }   # eg. <br /> or <br/> becomes ‹br›
 51 | 
 52 |     # make sure we won't swall <br> for <b> for example, thus use \s+ before [^>]
 53 |     ht = ht.gsub( /<(#{tag})(\s+[^>]*)?>/i ) { |_| "‹#{$1.downcase}›" }   # opening tag <p>
 54 |     ht = ht.gsub( /<\/(#{tag})\s*>/i )       { |_| "‹/#{$1.downcase}›" }  # closing tag e.g. </p>
 55 |   end
 56 | 
 57 |   ############################
 58 |   # step two - clean tags
 59 | 
 60 |   #   strip images - special treatment for debugging
 61 |   ht = ht.gsub( /<img[^>]*>/i, '♦' )   # for debugging use black diamond e.g. ♦
 62 |   ht = ht.gsub( /<\/img>/i, '' )   # should not exists
 63 | 
 64 |   # strip all remaining tags
 65 |   #  -- note: will NOT strip comments for now e.g. <!-- -->
 66 |   ht = strip_tags( ht )
 67 | 
 68 |   ## pp ht  # fix: debugging indo - remove
 69 | 
 70 |   ############################################
 71 |   # step three - restore whitelisted tags
 72 | 
 73 |   return ht   if opts[:skip_restore]   # skip step 3 for debugging
 74 | 
 75 |   tags.each do |tag|
 76 | #      ht = ht.gsub( /‹(#{tag})›/, "<\1>" )  # opening tag e.g. <p>
 77 | #      ht = ht.gsub( /‹\/(#{tag})›/, "<\/\1>" )  # closing tag e.g. </p>
 78 |     ht = ht.gsub( /‹(#{tag})›/ )   { |_| "<#{$1}>" }
 79 |     ht = ht.gsub( /‹\/(#{tag})›/ ) { |_| "<\/#{$1}>" }  # closing tag e.g. </p>
 80 |   end
 81 | 
 82 |   ht
 83 | end  # method whitelist
 84 | 
 85 | 
 86 | 
 87 | 
 88 | ##  change to simple_hypertext or
 89 | #     hypertext_simple or
 90 | #     sanitize ???
 91 | 
 92 | def sanitize( ht, opts={} )  # ht -> hypertext
 93 |   # todo: add options for
 94 |   #   keep links, images, lists (?too), code, codeblocks
 95 | 
 96 |   ht = whitelist( ht, [:br, :p, :ul, :ol, :li, :pre, :code, :blockquote, :q, :cite], opts )
 97 | 
 98 | # clean (prettify) literal urls (strip protocoll) 
 99 |   ht = ht.gsub( /(http|https):\/\//, '' )
100 |   ht
101 | end
102 | 
103 | 
104 | def textify( ht, opts={} )   # ht -> hypertext
105 |   ## turn into plain (or markdown/wiki-style) text - to be done
106 | 
107 |   sanitize( ht, opts )   # step 1 - sanitize html
108 |   # to be done
109 | 
110 | # strip bold
111 | #    ht = ht.gsub( /<b[^>]*>/, '**' )  # fix: will also swallow bxxx tags - add b space
112 | #    ht = ht.gsub( /<\/b>/, '**' )
113 | 
114 | # strip em
115 | #   ht = ht.gsub( /<em[^>]*>/, '__' )
116 | #   ht = ht.gsub( /<\/em>/, '__' )
117 | 
118 | #    ht = ht.gsub( /&nbsp;/, ' ' )
119 | 
120 | #    # try to cleanup whitespaces
121 | #    # -- keep no more than two spaces
122 | #    ht = ht.gsub( /[ \t]{3,}/, '  ' )
123 | #    # -- keep no more than two new lines
124 | #    ht = ht.gsub( /\n{2,}/m, "\n\n" ) 
125 | #    # -- remove all trailing spaces
126 | #    ht = ht.gsub( /[ \t\n]+$/m, '' )
127 | #    # -- remove all leading spaces
128 | #    ht = ht.gsub( /^[ \t\n]+/m, '' )
129 | end
130 | 
131 | 
132 | ##############################
133 | #  rails-style asset, url tag helpers and friends
134 | #
135 | #  todo:  move into different helper module/modules?? why? why not?
136 | 
137 | def tag( tag, opts={} )  # empty tag (no content e.g. <br>, <img src=''> etc.)
138 |   attribs  = []
139 |   opts.each do |key,value|
140 |     attribs << "#{key}='#{value}'"
141 |   end
142 |   
143 |   if attribs.size > 0
144 |     "<#{tag} #{attribs.join(' ')}>"
145 |   else
146 |     "<#{tag}>"
147 |   end
148 | end
149 | 
150 | def content_tag( tag, content, opts={} ) # content tag (e.g. <p>hello</p> - w/ opening and closing tag)
151 |   attribs = []
152 |   opts.each do |key,value|
153 |     attribs << "#{key}='#{value}'"
154 |   end
155 |   
156 |   if attribs.size > 0
157 |     "<#{tag} #{attribs.join(' ')}>#{content}</#{tag}>"
158 |   else
159 |     "<#{tag}>#{content}</#{tag}>"
160 |   end
161 | end
162 | 
163 | 
164 | def stylesheet_link_tag( href, opts={} )
165 |   href = "#{href}.css"  unless href.end_with?( '.css' )   # auto-add .css if not present
166 |   attribs = { rel:  'stylesheet',
167 |               type: 'text/css',
168 |               href: href }
169 |   attribs = attribs.merge( opts )  ### fix/todo: use reverse merge e.g. overwrite only if not present
170 |   tag( :link, attribs )
171 | end
172 | 
173 | def image_tag( src, opts={} )
174 |   attribs = { src: src }
175 |   attribs = attribs.merge( opts )  ### fix/todo: use reverse merge e.g. overwrite only if not present
176 |   tag( :img, attribs )   ### "<img src='#{src}' #{attributes}>"
177 | end
178 | 
179 | def link_to( content, href, opts={} )
180 |   attribs = { href: href }
181 |   attribs = attribs.merge( opts )  ### fix/todo: use reverse merge e.g. overwrite only if not present
182 |   content_tag( :a, content, attribs )  ### "<a href='#{href}' #{attributes}>#{text}</a>"
183 | end
184 | 
185 | 
186 |   end # module HypertextHelper
187 | end # module TextUtils
188 | 


--------------------------------------------------------------------------------