├── .clang-format
├── .gitattributes
├── .gitignore
├── CMakeLists.txt
├── LICENSE
├── Makefile
├── README.md
├── balance
    ├── Makefile
    ├── balance.cc
    ├── balance.lext
    └── build.sh
├── carmel
    ├── .project
    ├── CMakeLists.txt
    ├── ChangeLog
    ├── Doxyfile
    ├── LICENSE
    ├── Makefile
    ├── NOTES
    ├── README
    ├── ToDo
    ├── carmel
    ├── carmel-tutorial
    │   ├── carmel-training.pdf
    │   ├── cat.fsa
    │   ├── cat.fsa.trained
    │   ├── cat.fsa.trained.noe
    │   ├── cipher.data
    │   ├── cipher.data.noe
    │   ├── cipher.fst
    │   ├── cipher.fst.trained
    │   ├── cipher.gold
    │   ├── cipher.wfsa
    │   ├── cipher.wfsa.noe
    │   ├── cipher.wfsa.trained
    │   ├── cluster.data
    │   ├── cluster.data.noe
    │   ├── cluster.fsa
    │   ├── commands
    │   ├── commands.trace
    │   ├── delete.fst
    │   ├── delete.fst.trained
    │   ├── deltrans.data
    │   ├── deltrans.data.compact
    │   ├── epron-jpron.data
    │   ├── epron-jpron.fst
    │   ├── spellout.fst
    │   ├── spellout.fst.trained
    │   ├── tagging.data
    │   ├── tagging.data.noe
    │   ├── tagging.fsa
    │   ├── tagging.fsa.trained
    │   ├── tagging.fsa.trained.noe
    │   ├── tagging.fst
    │   ├── tagging.fst.trained
    │   ├── tagging.key
    │   ├── trans.fst
    │   └── trans.fst.trained
    ├── carmel-tutorial2.pdf
    ├── debug.sh
    ├── doc
    │   ├── FORMATS
    │   ├── carmel-tutorial.html
    │   ├── carmel-tutorial.ps
    │   └── carmel-tutorial_files
    │   │   ├── filelist.xml
    │   │   ├── image001.gif
    │   │   ├── image002.gif
    │   │   ├── image003.gif
    │   │   ├── image004.gif
    │   │   ├── image005.gif
    │   │   ├── image006.gif
    │   │   ├── image007.gif
    │   │   ├── image008.gif
    │   │   ├── image009.gif
    │   │   ├── image010.gif
    │   │   └── image011.gif
    ├── make-dictionary.pl
    ├── sample
    │   ├── chain.1
    │   ├── chain.2
    │   ├── chain.corpus
    │   ├── chain.mid
    │   ├── comments
    │   ├── decipher
    │   │   ├── README
    │   │   ├── cipher
    │   │   ├── cipher2
    │   │   ├── cipherbad
    │   │   ├── correct
    │   │   ├── correct2
    │   │   ├── errors.sh
    │   │   ├── fem.sh
    │   │   ├── plain.bi.wfsa
    │   │   ├── plain.tri.wfsa
    │   │   ├── subst.wfst
    │   │   └── to-fem.sh
    │   ├── do.graphviz.srilm.sh
    │   ├── egraph
    │   ├── emptyfsa
    │   ├── fsa1
    │   ├── fsa10
    │   ├── fsa11
    │   ├── fsa2
    │   ├── fsa3
    │   ├── fsa4
    │   ├── fsa5
    │   ├── fsa6
    │   ├── fsa7
    │   ├── fsa8
    │   ├── fsa9
    │   ├── fst1
    │   ├── fst2
    │   ├── kevin_g
    │   ├── negative.cost.kbest
    │   ├── nested
    │   ├── space
    │   │   ├── letters.nosp
    │   │   ├── subst2.wfst
    │   │   ├── subst3.wfst
    │   │   └── words.wfsa
    │   ├── tag
    │   │   ├── README
    │   │   ├── channel.fst
    │   │   ├── cipher
    │   │   ├── correct
    │   │   └── source.fsa
    │   ├── test.sh
    │   ├── tiny.sri
    │   ├── tmp
    │   │   ├── a
    │   │   ├── a.2
    │   │   ├── a.t
    │   │   ├── a1
    │   │   ├── a2
    │   │   ├── a3
    │   │   ├── a3.t
    │   │   ├── aa
    │   │   ├── aaa
    │   │   ├── b1.wfsa
    │   │   ├── fsa1
    │   │   ├── fsa2
    │   │   ├── fsa3
    │   │   ├── fsa4
    │   │   ├── fsa4.1
    │   │   ├── fsa5
    │   │   ├── fsa6
    │   │   ├── fsa7
    │   │   ├── fst1
    │   │   ├── fst1.1
    │   │   ├── fst1.2
    │   │   ├── fst1.3
    │   │   ├── fst2
    │   │   ├── t
    │   │   ├── t.1
    │   │   ├── t1
    │   │   ├── t2
    │   │   ├── wfsa1
    │   │   ├── wfsa2
    │   │   ├── wfst1
    │   │   ├── wfst2
    │   │   └── wfst3
    │   ├── tree-cascade
    │   │   ├── README
    │   │   ├── hidden.fsa
    │   │   ├── observed0.data
    │   │   ├── observed0.fst
    │   │   ├── observed1.data
    │   │   ├── observed1.fst
    │   │   ├── observed2.data
    │   │   ├── observed2.fst
    │   │   ├── s
    │   │   │   ├── hidden.fsa
    │   │   │   ├── observed0.data
    │   │   │   ├── observed0.fst
    │   │   │   ├── observed1.data
    │   │   │   ├── observed1.fst
    │   │   │   ├── observed2.data
    │   │   │   └── observed2.fst
    │   │   └── train.sh
    │   ├── wfsa.perplexity
    │   ├── wfsa1
    │   ├── wfsa2
    │   ├── wfsa3
    │   ├── wfsa4
    │   ├── wfst1
    │   ├── wfst2
    │   ├── wfst2.preprune
    │   ├── wfst3
    │   ├── wfst3c
    │   └── wfstlog
    ├── src
    │   ├── Makefile
    │   ├── WARNING
    │   ├── cached_derivs.h
    │   ├── carmel.cc
    │   ├── cascade.h
    │   ├── compose.cc
    │   ├── compose.h
    │   ├── config.hpp
    │   ├── derivations.h
    │   ├── fst.cc
    │   ├── fst.h
    │   ├── gibbs.cc
    │   ├── gibbs.txt
    │   ├── models.h
    │   ├── sri2fsa.pl
    │   ├── state.h
    │   ├── tests
    │   │   ├── Makefile
    │   │   └── Tweight.cc
    │   ├── train.cc
    │   ├── train.h
    │   └── wfstio.cc
    ├── test
    │   ├── N.cascade.train.gen.sh
    │   ├── angela.knight.kbest.wfst
    │   ├── asciikana-katakana.transducer
    │   ├── bad.-a.1
    │   ├── bad.-a.2
    │   ├── compose-test.sh
    │   ├── determinize.usr.dict.sh
    │   ├── empty
    │   ├── epron-jpron.1.transducer
    │   ├── fsa7
    │   ├── j-test-jap
    │   ├── jpron-asciikana.transducer
    │   ├── jpron.transducer
    │   ├── kbest.small.cycle
    │   ├── phillip.kbest
    │   ├── prune.test
    │   ├── runtests.sh
    │   ├── span.spell.corpus
    │   ├── span.spell.wfst
    │   ├── test.asciikana
    │   ├── test.compose.-a.sh
    │   ├── test.epron
    │   ├── test.final
    │   ├── test.jpron2
    │   ├── test.kana
    │   ├── test.katakana
    │   ├── test.word
    │   ├── train.a
    │   ├── train.a.u
    │   ├── train.a.w
    │   ├── train.cascade.gen.sh
    │   ├── train.self.gen.sh
    │   ├── traintest.sh
    │   ├── vowel-separator.transducer
    │   ├── wfst2
    │   ├── word-epron.names.55000wds.transducer
    │   ├── word.names.50000wds.transducer
    │   ├── y.data
    │   ├── y1.new
    │   ├── y2.new
    │   └── y4.new
    └── training-code.txt
├── catn0.cc
├── cipher
    ├── baseline.2.pl
    ├── carmel-quote-words
    ├── class-features
    ├── class-ngrams
    ├── class-word-fst
    ├── class-word-wfst
    ├── count-ngrams
    ├── decipher
    ├── decipher-classes
    ├── do-classes
    ├── encipher
    ├── epsilon-string-pairs
    ├── eval-classes
    ├── filter_docid
    ├── full-class-channel
    ├── soft-classes
    ├── split-words
    ├── sri2fsa.pl
    ├── summary-classes
    ├── text-to-classes
    ├── unigram-freq-bands
    ├── word-accuracy
    └── word-freq
├── clm
    ├── NOTES
    ├── clm-jan-09.pdf
    ├── e-parse-yield.pl
    ├── extract.clm.sh
    ├── phrasal-clm-events
    ├── rule_context.txt
    ├── shen08.pdf
    ├── stripEF.pl
    └── uniq_srilm.pl
├── forest-em
    ├── .gitignore
    ├── CMakeLists.txt
    ├── Makefile
    ├── README
    ├── forest-em-button.sh
    ├── forest-em-params.cpp
    ├── forest-em-params.hpp
    ├── forest-em.README
    ├── forest-em.cpp
    ├── forest-em.hpp
    ├── forest.hpp
    ├── forestviz.cpp
    └── sample
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── best_forest
    │   ├── best_norm
    │   ├── best_weights
    │   ├── byid_rules
    │   ├── derivs
    │       ├── first10.deriv
    │       ├── first10.norm
    │       ├── first10.rules
    │       ├── first100.deriv
    │       ├── first100.norm
    │       ├── first100.rules
    │       ├── first1000.deriv
    │       ├── first1000.norm
    │       ├── first1000.rules
    │       ├── first10000.deriv
    │       ├── first10000.norm
    │       └── first10000.rules
    │   ├── forest
    │   ├── forests
    │   ├── forests.gz
    │   ├── ints
    │   ├── norm
    │   ├── norm_and_forests
    │   ├── raw_weight_array
    │   ├── rule_list
    │   ├── testderivs.sh
    │   └── tree.gz
├── gextract
    ├── 10k.a
    ├── 10k.e-parse
    ├── 10k.f
    ├── 10k.info
    ├── astronauts.a
    ├── astronauts.e-parse
    ├── astronauts.f
    ├── castronauts.a
    ├── castronauts.a-gold
    ├── castronauts.e-parse
    ├── castronauts.f
    ├── check.e-parse.py
    ├── dendro.py
    ├── do.mono.sh
    ├── dumpx.py
    ├── e-parse.format.txt
    ├── etree.py
    ├── gextract.py
    ├── gflags.py
    ├── graehl.py
    ├── optfunc.py
    ├── optfunc
    │   ├── __init__.py
    │   └── optfunc.py
    ├── radu2ptb.pl
    ├── reviz.sh
    ├── subset-training.py
    ├── training.a
    ├── training.e-parse
    ├── training.f
    └── tree.py
├── graehl
    ├── graehl.mk
    └── shared
    │   ├── .gdbinit
    │   ├── .gitignore
    │   ├── 2hash.h
    │   ├── 2heap.h
    │   ├── ChangeLog
    │   ├── FixedBuffer.h
    │   ├── Lx_norm.hpp
    │   ├── Makefile
    │   ├── README
    │   ├── SGT.c
    │   ├── SGT.counts.txt
    │   ├── SGT.hpp
    │   ├── __gmsl
    │   ├── _template.hpp
    │   ├── abs_int.hpp
    │   ├── accumulate.hpp
    │   ├── adjustableheap.hpp
    │   ├── adl_print.hpp
    │   ├── adl_to_string.hpp
    │   ├── align.hpp
    │   ├── aligned_allocator.hpp
    │   ├── aligned_dynamic_array.hpp
    │   ├── alloc_new_delete.hpp
    │   ├── alloc_stack.hpp
    │   ├── any_all.hpp
    │   ├── any_callable.hpp
    │   ├── append.hpp
    │   ├── arc.h
    │   ├── array.hpp
    │   ├── array_stream.hpp
    │   ├── assertlvl.hpp
    │   ├── assign_traits.hpp
    │   ├── assoc_container.hpp
    │   ├── atoi_fast.hpp
    │   ├── auto_report.hpp
    │   ├── backtrace.hpp
    │   ├── band_matrix.hpp
    │   ├── barrier.hpp
    │   ├── base64.hpp
    │   ├── batched_append.hpp
    │   ├── best_tree_options.hpp
    │   ├── bit_arithmetic.hpp
    │   ├── bitarray.h
    │   ├── bitset.hpp
    │   ├── blocks.c
    │   ├── blocks.h
    │   ├── breakpoint.hpp
    │   ├── byref.hpp
    │   ├── changelog.hpp
    │   ├── char_is.hpp
    │   ├── char_map.hpp
    │   ├── char_predicate.hpp
    │   ├── char_transform.hpp
    │   ├── charbuf.hpp
    │   ├── checkpoint_istream.hpp
    │   ├── cmdline_main.hpp
    │   ├── command_line.hpp
    │   ├── commandline.cpp.template
    │   ├── config.h
    │   ├── configurable.hpp
    │   ├── configure.hpp
    │   ├── configure_by_prototype.hpp
    │   ├── configure_hadoop_pipes.hpp
    │   ├── configure_init.hpp
    │   ├── configure_is.hpp
    │   ├── configure_named_bits.hpp
    │   ├── configure_noop.hpp
    │   ├── configure_policy.hpp
    │   ├── configure_program_options.hpp
    │   ├── configure_traits.hpp
    │   ├── configure_validate.hpp
    │   ├── container.hpp
    │   ├── containers.hpp
    │   ├── cpp11.hpp
    │   ├── d_ary_heap.hpp
    │   ├── dbg_level.hpp
    │   ├── debuggable.hpp
    │   ├── debugger.mk
    │   ├── debugprint.hpp
    │   ├── default_pool_alloc.hpp
    │   ├── default_print_on.hpp
    │   ├── defaulted.hpp
    │   ├── delta_sum.hpp
    │   ├── delta_sum_remember.hpp
    │   ├── digamma.hpp
    │   ├── doubling_primes.hpp
    │   ├── dual_mempool.hpp
    │   ├── dummy.hpp
    │   ├── dynamic_array.hpp
    │   ├── dynamic_hash_cache.hpp
    │   ├── dynamic_sized.hpp
    │   ├── em.hpp
    │   ├── epsilon.hpp
    │   ├── escape3.hpp
    │   ├── exact_cast.hpp
    │   ├── example-cpp-with-boost-options.cpp
    │   ├── example.Makefile
    │   ├── example_value.hpp
    │   ├── farmhash.hpp
    │   ├── fast_lexical_cast.hpp
    │   ├── fileargs.cpp
    │   ├── fileargs.hpp
    │   ├── fileheader.hpp
    │   ├── filelines.hpp
    │   ├── filter_file_stream.hpp
    │   ├── find_string.hpp
    │   ├── fixed_array.hpp
    │   ├── fixed_pool.h
    │   ├── flag.hpp
    │   ├── force_link.hpp
    │   ├── format.hpp
    │   ├── from_strings.hpp
    │   ├── ftoa.hpp
    │   ├── ftoa_append.hpp
    │   ├── ftoa_ieee.hpp
    │   ├── ftos.hpp
    │   ├── funcs.hpp
    │   ├── function.hpp
    │   ├── function_macro.hpp
    │   ├── function_output_iterator.hpp
    │   ├── gen-base_construct.ipp
    │   ├── genio.h
    │   ├── gibbs.hpp
    │   ├── gibbs_opts.hpp
    │   ├── glibc_memcpy.hpp
    │   ├── glog.hpp
    │   ├── gmsl
    │   ├── good_alloc_size.hpp
    │   ├── graph.cc
    │   ├── graph.h
    │   ├── graph.hpp
    │   ├── graphviz.hpp
    │   ├── gzstream.cpp
    │   ├── gzstream.h
    │   ├── gzstream.hpp
    │   ├── has_print.hpp
    │   ├── hash.hpp
    │   ├── hash_cache.hpp
    │   ├── hash_city.hpp
    │   ├── hash_functions.hpp
    │   ├── hash_jenkins.hpp
    │   ├── hash_murmur.hpp
    │   ├── hashbench.cpp
    │   ├── hashed_value.hpp
    │   ├── hashtable_fwd.hpp
    │   ├── have_64_bits.hpp
    │   ├── hex_int.hpp
    │   ├── hypergraph.hpp
    │   ├── identity.hpp
    │   ├── ifdbg.hpp
    │   ├── ilinenostream.hpp
    │   ├── indent_level.hpp
    │   ├── indexed.hpp
    │   ├── indexgraph.hpp
    │   ├── indices_after.hpp
    │   ├── indirect.hpp
    │   ├── info_debug.hpp
    │   ├── inline.hpp
    │   ├── input_error.hpp
    │   ├── insert_to.hpp
    │   ├── int_hash_map.hpp
    │   ├── int_types.hpp
    │   ├── interruption_point.hpp
    │   ├── intorpointer.hpp
    │   ├── intrusive_refcount.hpp
    │   ├── io.hpp
    │   ├── is_container.hpp
    │   ├── is_null.hpp
    │   ├── itoa.hpp
    │   ├── karma_tostr.hpp
    │   ├── kbest-test.cc
    │   ├── kbest.cc
    │   ├── kbest.h
    │   ├── key_to_blob.hpp
    │   ├── large_streambuf.hpp
    │   ├── lazier_forest.hpp
    │   ├── lazy_forest_kbest.hpp
    │   ├── lazy_forest_kbest_test.hpp
    │   ├── lc_ascii.hpp
    │   ├── leaf_configurable.hpp
    │   ├── leb128.hpp
    │   ├── lerp.hpp
    │   ├── likely.hpp
    │   ├── list.h
    │   ├── lock_policy.hpp
    │   ├── log_intsize.hpp
    │   ├── lz4.c
    │   ├── lz4.h
    │   ├── lz4.hpp
    │   ├── lz4stream.hpp
    │   ├── main.hpp
    │   ├── main.msvc.hpp
    │   ├── map_from_set.hpp
    │   ├── math_constants.hpp
    │   ├── maybe_update_bound.hpp
    │   ├── mdb_from_db.1
    │   ├── mdb_from_db.c
    │   ├── mean_field_normalize.hpp
    │   ├── mean_field_scale.hpp
    │   ├── memleak.hpp
    │   ├── memmap.hpp
    │   ├── memoindex.hpp
    │   ├── memory_archive.hpp
    │   ├── memory_stats.hpp
    │   ├── memory_stream.hpp
    │   ├── monotonic_time.hpp
    │   ├── must_eof.hpp
    │   ├── myassert.h
    │   ├── named_enum.hpp
    │   ├── named_main.hpp
    │   ├── nan.hpp
    │   ├── nary_tree.hpp
    │   ├── new_shared.hpp
    │   ├── nibble_array.hpp
    │   ├── no_locking.hpp
    │   ├── nondet_random.cpp
    │   ├── noreturn.hpp
    │   ├── normalize.hpp
    │   ├── normalize_range.hpp
    │   ├── null_deleter.hpp
    │   ├── null_ostream.hpp
    │   ├── null_output_iterator.hpp
    │   ├── null_terminated.hpp
    │   ├── optional_pair.hpp
    │   ├── order_preserving.hpp
    │   ├── os.hpp
    │   ├── os_memory.hpp
    │   ├── outedges.hpp
    │   ├── packedalloc.hpp
    │   ├── pairlist.hpp
    │   ├── parse_float.hpp
    │   ├── path_traits.hpp
    │   ├── percent.hpp
    │   ├── periodic.hpp
    │   ├── pod.hpp
    │   ├── podcpy.hpp
    │   ├── pointer_int.hpp
    │   ├── pointer_traits.hpp
    │   ├── pointeroffset.hpp
    │   ├── pool_construct.ipp
    │   ├── pool_traits.hpp
    │   ├── power_of_10.hpp
    │   ├── predicate_compose.hpp
    │   ├── prefix_option.hpp
    │   ├── print_read.hpp
    │   ├── print_width.hpp
    │   ├── printlines.hpp
    │   ├── priority_queue.hpp
    │   ├── proc_linux.hpp
    │   ├── program_options.hpp
    │   ├── program_options_config_example.txt
    │   ├── program_options_path.hpp
    │   ├── property.hpp
    │   ├── property_factory.hpp
    │   ├── push_backer.hpp
    │   ├── quote.hpp
    │   ├── random.hpp
    │   ├── random.ipp
    │   ├── randomreader.hpp
    │   ├── read_stream.hpp
    │   ├── reconstruct.hpp
    │   ├── reduce.hpp
    │   ├── replace_digits.hpp
    │   ├── reserved_memory.hpp
    │   ├── safe_bool.hpp
    │   ├── safe_db.hpp
    │   ├── sample
    │       ├── sample.graph
    │       ├── sample.lattice
    │       ├── sample.lattice.carmel
    │       └── simple.cycle.graph
    │   ├── segments.hpp
    │   ├── semiring.hpp
    │   ├── serialize_batch.hpp
    │   ├── serialize_config.hpp
    │   ├── set_difference.hpp
    │   ├── shared_ptr.hpp
    │   ├── shell.hpp
    │   ├── shell_escape.hpp
    │   ├── show.hpp
    │   ├── simple_serialize.hpp
    │   ├── siphash.hpp
    │   ├── size_mega.hpp
    │   ├── slist.h
    │   ├── small_vector.hpp
    │   ├── snprintf.hpp
    │   ├── sparse_vector.hpp
    │   ├── split.hpp
    │   ├── split_noquote.hpp
    │   ├── stable_vector.hpp
    │   ├── stackalloc.hpp
    │   ├── stacktrace.hpp
    │   ├── static_fgets_buf.h
    │   ├── static_itoa.h
    │   ├── statistics.hpp
    │   ├── stopwatch.hpp
    │   ├── stream_util.hpp
    │   ├── stream_whitespace.hpp
    │   ├── strhash.cc
    │   ├── strhash.h
    │   ├── stride.hpp
    │   ├── string.hpp
    │   ├── string_buffer.hpp
    │   ├── string_builder.hpp
    │   ├── string_match.hpp
    │   ├── string_to.hpp
    │   ├── string_tr.hpp
    │   ├── stringable.hpp
    │   ├── stringkey.cc
    │   ├── stringkey.h
    │   ├── strstrsep.c
    │   ├── strstrsep.h
    │   ├── swap_pod.hpp
    │   ├── swapbatch.hpp
    │   ├── symbol.hpp
    │   ├── tails_up_hypergraph.hpp
    │   ├── teestream.hpp
    │   ├── test.hpp
    │   ├── test
    │       ├── LazyKbestTrees_test.cpp
    │       ├── Makefile
    │       ├── backtrace.cpp
    │       ├── epsilon.cpp
    │       ├── make.sh
    │       ├── make_kbest.sh
    │       ├── slist.cpp
    │       ├── tree.cpp
    │       └── weight_underflow.cpp
    │   ├── text-to-cc.cpp
    │   ├── the_null_ostream.hpp
    │   ├── thread_group.hpp
    │   ├── threadlocal.hpp
    │   ├── time_report.hpp
    │   ├── time_series.hpp
    │   ├── time_space_report.hpp
    │   ├── to_from_buf.hpp
    │   ├── tree.hpp
    │   ├── treetrie.hpp
    │   ├── triangular_array.hpp
    │   ├── type_string.hpp
    │   ├── type_traits.hpp
    │   ├── umod.hpp
    │   ├── unimplemented.hpp
    │   ├── unlimit_memlock.hpp
    │   ├── unordered.hpp
    │   ├── unthreaded_ptr.hpp
    │   ├── validate.hpp
    │   ├── value_str.hpp
    │   ├── verbose_exception.hpp
    │   ├── warn.hpp
    │   ├── warning_compiler.h
    │   ├── warning_pop.h
    │   ├── warning_push.h
    │   ├── weight.cc
    │   ├── weight.h
    │   ├── word_spacer.hpp
    │   ├── words.h
    │   └── zip_builder.hpp
├── sblm
    ├── 10.counted
    ├── 10.eng-parse
    ├── 10.for-norm
    ├── Makefile
    ├── README
    ├── TODO
    ├── add-pcfg-feature
    ├── cat-pcfg-for-divide
    ├── count.py
    ├── dumpx.py
    ├── etree-stats.py
    ├── etree.py
    ├── example.py
    ├── fast-lhs-sums-map
    ├── graehl.py
    ├── had-pcfg-probs
    ├── lhs-sums-map
    ├── nbest-sblm.py
    ├── nbest.py
    ├── ngram.py
    ├── optfunc.py
    ├── pcfg-backoff
    ├── pcfg-map
    ├── pcfg-map-precomb
    ├── pcfg.py
    ├── precombine.py
    ├── rules
    ├── sample
    │   ├── dev.e-parse
    │   ├── test.e-parse
    │   └── training.e-parse
    ├── sbmtrule.py
    ├── test.sh
    ├── test.txt
    ├── tree.py
    └── xrs-pcfg-events.cpp
├── utf8.h
├── utf8
    ├── checked.h
    ├── core.h
    └── unchecked.h
└── util
    ├── .aspell.en.pws
    ├── .emacs
    ├── .gdbinit
    ├── .gitconfig
    ├── .gitignore
    ├── .octaverc
    ├── .svn.authorsfile
    ├── 1count.cc
    ├── C-small.cc
    ├── add_paths.sh
    ├── addlicense.sh
    ├── aliases.sh
    ├── alignment-links.py
    ├── bash.txt
    ├── bashlib.sh
    ├── bl3.sh
    ├── bloblib.sh
    ├── c++space
    ├── camelcase.pl
    ├── ccache-wrapper.sh
    ├── charvocab.py
    ├── check-condor
    ├── cj
    ├── close-ns-inplace.pl
    ├── codejam-example.cc
    ├── codejam.hh
    ├── color.xetex
    ├── config.fish
    ├── datespan.py
    ├── dependencies.sh
    ├── dictdiff
    ├── diffnbest.pl
    ├── dotprod.py
    ├── dropcaches.c
    ├── dumpx.py
    ├── edit
    ├── emacs.reg
    ├── etree.py
    ├── extract-field-fast.pl
    ├── extract-field.pl
    ├── featstats.py
    ├── findscripts.sh
    ├── fix-include-guard-inplace.pl
    ├── fixunrpn_
    ├── float-round.pl
    ├── forall.sub
    ├── format-doxygen-c-comment
    ├── gcc.sh
    ├── giraffe
    ├── giraffe.0.3
    ├── giraffe.split
    ├── gist
    ├── git-completion.bash
    ├── gitalias.sh
    ├── gitcredit
    ├── gnuplot.auto.inc
    ├── graehl.py
    ├── growth
    ├── hexnorm.pl
    ├── hippie-expand.emacs.txt
    ├── identity.py
    ├── indent-c-comment
    ├── inpy
    ├── insert_attributes_opl.pl
    ├── iomr-hadoop
    ├── joinleft
    ├── lc1count.cc
    ├── libgraehl.pl
    ├── license.txt
    ├── localgcc.sh
    ├── localhistory.sh
    ├── log_fn.py
    ├── mflist.pl
    ├── misc.sh
    ├── monitor.py
    ├── nbest.py
    ├── newwin.sh
    ├── nfeats
    ├── no-trailing-space-inplace.pl
    ├── optfunc.py
    ├── osx-setup.sh
    ├── pandoc.constantia.css
    ├── pandoc.css
    ├── parharmonize.cc
    ├── pcfg.py
    ├── pragma_once.py
    ├── printers.py
    ├── pychecks.sh
    ├── qsh
    ├── random-c-array.py
    ├── randomwords.py
    ├── ref-updated
    ├── reject_chars.py
    ├── relpath
    ├── relpathp
    ├── remove_namespace.py
    ├── sample
        └── alignment-links.tsv
    ├── shortenpar.pl
    ├── space-brace-inplace.pl
    ├── split.lua
    ├── splitutf8.pl
    ├── start-hadoop
    ├── stats.py
    ├── subst.pl
    ├── subst.pypy.sh
    ├── summarize_num.pl
    ├── svndiff.sh
    ├── template.py
    ├── test.grf
    ├── textToC.py
    ├── time.sh
    ├── udump
    ├── unionfind.hh
    ├── unrpn_
    ├── valgrind.supp
    ├── viz-tree-string-pair.pl
    ├── why-empty.pl
    ├── windows-vista-fonts.sh
    ├── xetex.template
    ├── xetex.template2
    └── yuminstall.txt


/.gitattributes:
--------------------------------------------------------------------------------
1 | * text=auto
2 | .sln text eol=crlf
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | bin
 2 | .DS_Store
 3 | deps
 4 | obj
 5 | *~
 6 | *.pyc
 7 | lc1count
 8 | .history
 9 | trash.*
10 | carmel/test/logs
11 | carmel/test/span.spell.trained2
12 | openfst
13 | util/reveal.js
14 | latest.log
15 | balance/balance
16 | *.lex.cc
17 | *.lex.hh
18 | util/aliases.sh
19 | util/.bashrc
20 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(USE_BOOST_ROOT "/local/graehl/c/xmt-externals/FC12/libraries/boost-1.69.0" CACHE STRING
 2 |   "If this exists, it's used instead of system path when finding boost libs")
 3 | option(USE_STATIC "prefer static linking except for libc" ON)
 4 | add_definitions(-fvisibility=hidden)
 5 | cmake_minimum_required(VERSION 3.15)
 6 | project(all)
 7 | if (USE_STATIC)
 8 |   set(ZLIB_USE_STATIC_LIBS ON)
 9 |   set(ZSTD_USE_STATIC_LIBS ON)
10 |   set(Boost_USE_STATIC_LIBS ON)
11 | endif()
12 | set(Boost_require_VERSION 1.53) # earlier is probably fine
13 | set(CMAKE_CXX_STANDARD 17)
14 | include_directories("${PROJECT_SOURCE_DIR}")
15 | list(APPEND subdirs
16 |   carmel
17 |   forest-em
18 |   )
19 | 
20 | macro(our_boost_libs)
21 |   if (EXISTS "${USE_BOOST_ROOT}")
22 |     set(Boost_NO_SYSTEM_PATHS 1)
23 |     set(BOOST_ROOT "${USE_BOOST_ROOT}")
24 |     set(BOOST_INCLUDEDIR "${USE_BOOST_ROOT}/include")
25 |   endif()
26 |   find_package(Boost "${Boost_require_VERSION}" COMPONENTS ${ARGV} REQUIRED)
27 |   include_directories(${BOOST_INCLUDE_DIR})
28 |   foreach(BLIB ${ARGV})
29 |     list(APPEND OUR_BOOST_LIBS "Boost::${BLIB}")
30 |   endforeach()
31 |   message("boost libs: ${OUR_BOOST_LIBS}")
32 | endmacro()
33 | 
34 | foreach(subdir ${subdirs})
35 |   add_subdirectory(${subdir})
36 | endforeach()
37 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | (see carmel/LICENSE for terms covering the carmel/ subproject)
 2 | 
 3 |    Copyright 2011 Jonathan Graehl - http://graehl.org/
 4 | 
 5 |    Licensed under the Apache License, Version 2.0 (the "License");
 6 |    you may not use this file except in compliance with the License.
 7 |    You may obtain a copy of the License at
 8 | 
 9 |        http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 |    Unless required by applicable law or agreed to in writing, software
12 |    distributed under the License is distributed on an "AS IS" BASIS,
13 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |    See the License for the specific language governing permissions and
15 |    limitations under the License.
16 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	cd carmel && make -j 4
3 | 


--------------------------------------------------------------------------------
/balance/Makefile:
--------------------------------------------------------------------------------
1 | STD = -std=c++11
2 | STDLIB = -stdlib=libc++
3 | CXXFLAGS = $(STD) $(STDLIB)
4 | balance: balance.lex.cc balance.cc
5 | 	$(CXX) $(CXXFLAGS) $(CPPFLAGS) -o $@ $^
6 | %.lex.cc %.lex.hh: %.lext
7 | 	flex --header-file=$*.lex.hh --outfile=$*.lex.cc $<
8 | 


--------------------------------------------------------------------------------
/balance/balance.cc:
--------------------------------------------------------------------------------
 1 | /** \file
 2 | 
 3 |     perhaps a more practical way to solve unbalanced brace (C++ namespace) confusion:
 4 | 
 5 |     use mcpp (https://github.com/h8liu/mcpp) to expand your .cpp
 6 | 
 7 |     then add { } at the beginning and end of the file. then emacs syntax-table
 8 |     sexpr navigation from eof back will show you the dangling open (excess closes tend to
 9 |     error out in compile more intuitively).
10 | 
11 |     C11 lexer from http://www.quut.com/c/ANSI-C-grammar-l-2011.html
12 | */
13 | 
14 | #include "balance.lex.hh"
15 | #include <iostream>
16 | #include <fstream>
17 | #include <cstdlib>
18 | 
19 | using namespace std;
20 | 
21 | void err(char const* msg) {
22 |   cerr << msg << '\n';
23 |   abort();
24 | }
25 | 
26 | void run(istream& in, char const* name) {
27 |   cerr << name << " ...\n";
28 |   yyFlexLexer l(&in, &cerr);
29 |   while (in && l.yylex()) {
30 |     cout << string(l.YYText(), l.YYLeng()) << "\n";
31 |   }
32 |   cout << '\n';
33 | }
34 | 
35 | void run(char const* name) {
36 |   ifstream in(name);
37 |   if (!in) err(name);
38 |   run(in, name);
39 | }
40 | 
41 | int main(int argc, char* argv[]) {
42 |   if (argc < 2)
43 |     run(cin, "[STDIN]");
44 |   else
45 |     for (int i = 1; i < argc; ++i) run(argv[i]);
46 |   return 0;
47 | }
48 | 


--------------------------------------------------------------------------------
/balance/build.sh:
--------------------------------------------------------------------------------
1 | LD=gcc CPP=gcc CXX=g++ CC=gcc CFLAGS= make
2 | 


--------------------------------------------------------------------------------
/carmel/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | project(carmel)
2 | our_boost_libs(random timer)
3 | add_executable(carmel src/carmel.cc src/fst.cc src/train.cc src/gibbs.cc)
4 | target_link_libraries(carmel ${OUR_BOOST_LIBS})
5 | 


--------------------------------------------------------------------------------
/carmel/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/carmel/LICENSE


--------------------------------------------------------------------------------
/carmel/NOTES:
--------------------------------------------------------------------------------
1 | 
2 | bayestag / scala version regression for -blocked
3 | 
4 | 


--------------------------------------------------------------------------------
/carmel/ToDo:
--------------------------------------------------------------------------------
 1 | features:
 2 | training with conditional normalization - option to leave alone or assign uniform weights to arcs with zero counts for some input leaving some state (but often people like zeroing out unused arcs for pruning?)
 3 | attach and preserve arbitrary labels to arcs (states already have their arbitrary names)
 4 | 'wildcard', 'except-state' and 'except-global' label for arcs - for input or output not seen leaving the state, or at all
 5 | external input/output dictionary files?  binary format?
 6 | option to iteratively sum paths with e-cycles for -S and -t.  use matrix math (e-paths of length 0,1,2,3... = 1+A^1+A^2+ ...)
 7 | 
 8 | code:
 9 | List::count_length for input sequences - use iterators instead?
10 | unnecessary copying of path lists due to lameness of output iterator / insert_iterator
11 | STL allocator - get rid of CUSTOMNEW mess (is it faster?)
12 | command-line regression tests
13 | unit tests
14 | STL hash
15 | one filename per public class: Class.h Class.cc
16 | 
17 | bugs:
18 | 


--------------------------------------------------------------------------------
/carmel/carmel:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | if [ -z "$ARCH" ] ; then
 3 | u=`uname`
 4 | ARCH=cygwin
 5 | if [ $u = Linux ] ; then
 6 |  ARCH=linux
 7 | fi
 8 | if [ $u = SunOS ] ; then
 9 |  ARCH=solaris
10 | fi
11 | if [ $u = Darwin ] ; then
12 |  ARCH=macosx
13 | fi
14 | fi
15 | realprog=$0
16 | d=`dirname $realprog`
17 | if [ -L $realprog ] ; then
18 |  if [ -x "`which readlink`" ] ; then
19 |     realprog=`readlink $0`
20 |     if [ ${realprog:0:1} = / ] ; then #absolute path
21 |      d=`dirname $realprog`
22 |     else
23 |      d=$d/`dirname $realprog`
24 |     fi     
25 |  fi
26 | fi
27 | 
28 | if [ $ARCH = Darwin ]; then
29 |  exec $d/$ARCH/carmel $*
30 | else
31 |  exec $d/$ARCH/carmel.static $*
32 | fi
33 | 
34 | 


--------------------------------------------------------------------------------
/carmel/carmel-tutorial/carmel-training.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/carmel/carmel-tutorial/carmel-training.pdf


--------------------------------------------------------------------------------
/carmel/carmel-tutorial/cat.fsa:
--------------------------------------------------------------------------------
 1 | 0
 2 | (0 (0 *e* "space"))
 3 | (0 (c1 *e* "c1"))
 4 | (0 (c2 *e* "c2"))
 5 | (0 (c3 *e* "c3"))
 6 | (c1 (0 *e* "space"))
 7 | (c1 (c1 *e* "c1"))
 8 | (c1 (c2 *e* "c2"))
 9 | (c1 (c3 *e* "c3"))
10 | (c2 (0 *e* "space"))
11 | (c2 (c1 *e* "c1"))
12 | (c2 (c2 *e* "c2"))
13 | (c2 (c3 *e* "c3"))
14 | (c3 (0 *e* "space"))
15 | (c3 (c1 *e* "c1"))
16 | (c3 (c2 *e* "c2"))
17 | (c3 (c3 *e* "c3"))
18 | 


--------------------------------------------------------------------------------
/carmel/carmel-tutorial/cat.fsa.trained:
--------------------------------------------------------------------------------
 1 | 0
 2 | (0 (0 *e* "space" 0.0896584819643305))
 3 | (0 (1 *e* "c1" 0.636327421275486))
 4 | (0 (2 *e* "c2" 0.274013807990881))
 5 | (0 (3 *e* "c3" 2.88769303122129e-07))
 6 | (1 (0 *e* "space" 4.06374797674586e-05))
 7 | (1 (1 *e* "c1" 0.124246995161629))
 8 | (1 (2 *e* "c2" 0.874856889754987))
 9 | (1 (3 *e* "c3" 0.00085547760361627))
10 | (2 (0 *e* "space" 0.153523266340634))
11 | (2 (1 *e* "c1" 0.116884883887446))
12 | (2 (2 *e* "c2" 0.125343236298317))
13 | (2 (3 *e* "c3" 0.604248613473604))
14 | (3 (0 *e* "space" 0.535737245619616))
15 | (3 (1 *e* "c1" 0.0455422297669097))
16 | (3 (2 *e* "c2" 0.0491608536591124))
17 | (3 (3 *e* "c3" 0.369559670954362))
18 | 


--------------------------------------------------------------------------------
/carmel/carmel-tutorial/cat.fsa.trained.noe:
--------------------------------------------------------------------------------
 1 | 0
 2 | (0 (0 "space" "space" 0.0896584819643305))
 3 | (0 (1 "c1" "c1" 0.636327421275486))
 4 | (0 (2 "c2" "c2" 0.274013807990881))
 5 | (0 (3 "c3" "c3" 2.88769303122129e-07))
 6 | (1 (0 "space" "space" 4.06374797674586e-05))
 7 | (1 (1 "c1" "c1" 0.124246995161629))
 8 | (1 (2 "c2" "c2" 0.874856889754987))
 9 | (1 (3 "c3" "c3" 0.00085547760361627))
10 | (2 (0 "space" "space" 0.153523266340634))
11 | (2 (1 "c1" "c1" 0.116884883887446))
12 | (2 (2 "c2" "c2" 0.125343236298317))
13 | (2 (3 "c3" "c3" 0.604248613473604))
14 | (3 (0 "space" "space" 0.535737245619616))
15 | (3 (1 "c1" "c1" 0.0455422297669097))
16 | (3 (2 "c2" "c2" 0.0491608536591124))
17 | (3 (3 "c3" "c3" 0.369559670954362))
18 | 


--------------------------------------------------------------------------------
/carmel/carmel-tutorial/delete.fst:
--------------------------------------------------------------------------------
1 | 0
2 | (0 (0 "a" "a"))
3 | (0 (0 "a" *e*))
4 | (0 (0 "b" "b"))
5 | (0 (0 "b" *e*))
6 | (0 (0 "c" "c"))
7 | (0 (0 "c" *e*))
8 | 


--------------------------------------------------------------------------------
/carmel/carmel-tutorial/delete.fst.trained:
--------------------------------------------------------------------------------
1 | 0
2 | (0 (0 "a") (0 "a" *e* 1.36958138882441e-18) (0 "b" 2.88391710752266e-18) (0 "b" *e*) (0 "c") (0 "c" *e* 7.23771649369207e-20))
3 | 


--------------------------------------------------------------------------------
/carmel/carmel-tutorial/deltrans.data:
--------------------------------------------------------------------------------
1 | "a" "b" "c"
2 | "b" "a"
3 | "a" "b" "c"
4 | "a" "c"
5 | "a" "c"
6 | "b" "c"
7 | "a" "c"
8 | "b" "c"
9 | 


--------------------------------------------------------------------------------
/carmel/carmel-tutorial/deltrans.data.compact:
--------------------------------------------------------------------------------
 1 | 1
 2 | "a" "b" "c"
 3 | "b" "a"
 4 | 1
 5 | "a" "b" "c"
 6 | "a" "c"
 7 | 2
 8 | "a" "c"
 9 | "b" "c"
10 | 


--------------------------------------------------------------------------------
/carmel/carmel-tutorial/epron-jpron.data:
--------------------------------------------------------------------------------
1 | "L" "IY" "N"
2 | "R" "I" "N"
3 | "R" "AE" "N"
4 | "R" "A" "N"
5 | "F" "AE" "N"
6 | "H" "A" "N"
7 | "L" "AY" "N"
8 | "R" "A" "I" "N"
9 | 


--------------------------------------------------------------------------------
/carmel/carmel-tutorial/trans.fst:
--------------------------------------------------------------------------------
 1 | 0
 2 | (0 (0 "a" "a"))
 3 | (0 (0 "a" "b"))
 4 | (0 (0 "a" "c"))
 5 | (0 (0 "b" "a"))
 6 | (0 (0 "b" "b"))
 7 | (0 (0 "b" "c"))
 8 | (0 (0 "c" "a"))
 9 | (0 (0 "c" "b"))
10 | (0 (0 "c" "c"))
11 | 


--------------------------------------------------------------------------------
/carmel/carmel-tutorial/trans.fst.trained:
--------------------------------------------------------------------------------
1 | 0
2 | (0 (0 "a" 0.25) (0 "a" "b" 0.75) (0 "b" "a" 0.999726408340245) (0 "b" 0.000273591658524948) (0 "b" "c" 1.22743061972586e-12) (0 "c" "a" 0.25) (0 "c" 0.75))
3 | 


--------------------------------------------------------------------------------
/carmel/carmel-tutorial2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/carmel/carmel-tutorial2.pdf


--------------------------------------------------------------------------------
/carmel/debug.sh:
--------------------------------------------------------------------------------
1 | gdb --args bin/$ARCH/carmel.debug -k 2 test/kbest.small.cycle
2 | 


--------------------------------------------------------------------------------
/carmel/doc/carmel-tutorial_files/filelist.xml:
--------------------------------------------------------------------------------
 1 | <xml xmlns:o="urn:schemas-microsoft-com:office:office">
 2 |  <o:MainFile HRef="../carmel-tutorial.htm"/>
 3 |  <o:File HRef="image001.gif"/>
 4 |  <o:File HRef="image002.gif"/>
 5 |  <o:File HRef="image003.gif"/>
 6 |  <o:File HRef="image004.gif"/>
 7 |  <o:File HRef="image005.gif"/>
 8 |  <o:File HRef="image006.gif"/>
 9 |  <o:File HRef="image007.gif"/>
10 |  <o:File HRef="image008.gif"/>
11 |  <o:File HRef="image009.gif"/>
12 |  <o:File HRef="image010.gif"/>
13 |  <o:File HRef="image011.gif"/>
14 |  <o:File HRef="filelist.xml"/>
15 | </xml>


--------------------------------------------------------------------------------
/carmel/doc/carmel-tutorial_files/image001.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/carmel/doc/carmel-tutorial_files/image001.gif


--------------------------------------------------------------------------------
/carmel/doc/carmel-tutorial_files/image002.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/carmel/doc/carmel-tutorial_files/image002.gif


--------------------------------------------------------------------------------
/carmel/doc/carmel-tutorial_files/image003.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/carmel/doc/carmel-tutorial_files/image003.gif


--------------------------------------------------------------------------------
/carmel/doc/carmel-tutorial_files/image004.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/carmel/doc/carmel-tutorial_files/image004.gif


--------------------------------------------------------------------------------
/carmel/doc/carmel-tutorial_files/image005.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/carmel/doc/carmel-tutorial_files/image005.gif


--------------------------------------------------------------------------------
/carmel/doc/carmel-tutorial_files/image006.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/carmel/doc/carmel-tutorial_files/image006.gif


--------------------------------------------------------------------------------
/carmel/doc/carmel-tutorial_files/image007.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/carmel/doc/carmel-tutorial_files/image007.gif


--------------------------------------------------------------------------------
/carmel/doc/carmel-tutorial_files/image008.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/carmel/doc/carmel-tutorial_files/image008.gif


--------------------------------------------------------------------------------
/carmel/doc/carmel-tutorial_files/image009.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/carmel/doc/carmel-tutorial_files/image009.gif


--------------------------------------------------------------------------------
/carmel/doc/carmel-tutorial_files/image010.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/carmel/doc/carmel-tutorial_files/image010.gif


--------------------------------------------------------------------------------
/carmel/doc/carmel-tutorial_files/image011.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/carmel/doc/carmel-tutorial_files/image011.gif


--------------------------------------------------------------------------------
/carmel/make-dictionary.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use Getopt::Long;
 4 | 
 5 | #outputs an FSA of character-recognizer for lines on STDIN (newline excluded)
 6 | 
 7 | my $end="END";
 8 | my $start=0;
 9 | 
10 | print "$end\n";
11 | 
12 | my $s=1;
13 | my $random=0;
14 | my $weighted=0;
15 | 
16 | GetOptions("random!"=>\$random
17 | ,"weighted!"=>\$weighted
18 | ) || die;
19 | 
20 | 
21 | sub quote_char {
22 |     my ($c)=@_;
23 |     $c='\"' if $c eq '"';
24 |     return qq{"$c"};
25 | }
26 | 
27 | my $num_match=qr/(?:[+\-]|\b)[0123456789]+(?:[.][0123456789]*(?:[eE][0123456789\-+]*)?)?/;
28 | 
29 | while(<>) {
30 |     my $w=1;
31 |     if ($weighted) {
32 |         s/((?:e\^|10\^)?$num_match(?:ln|log)?)\s+// || die "no weight found for line $_ with --weighted";
33 |         $w=$1;
34 |     }
35 |     $w=1-rand(1) if $random;
36 |     my $p=$start;
37 |     chomp;
38 |     my @c=split //,$_;
39 |     for (0..$#c) {
40 |         my $d=($_==$#c)?$end:$s++;
41 |         print "($p $d ",&quote_char($c[$_]);
42 |         print " $w" if $w ne '1' && $_==0;
43 |         print ")\n";
44 |         $p=$d;
45 |     }
46 | }
47 | 
48 | 


--------------------------------------------------------------------------------
/carmel/sample/chain.1:
--------------------------------------------------------------------------------
1 | 2
2 | (0 (2 0.1) (1 *e* b 0.6) (0 *e* a 0.3) (4 *e* a .1))
3 | (1 (2 *e* a 0.3) (0 *e* b 0.7))
4 | (2)
5 | (4 (2 *e* c))
6 | 


--------------------------------------------------------------------------------
/carmel/sample/chain.2:
--------------------------------------------------------------------------------
1 | 1
2 | (0 (1 *e* 1) (0 a c .6) (0 a d .4) (0 b d .2) (0 b e .8))
3 | (1)
4 | 


--------------------------------------------------------------------------------
/carmel/sample/chain.corpus:
--------------------------------------------------------------------------------
1 | 
2 | d e c
3 | 
4 | d d
5 | 
6 | 
7 | 


--------------------------------------------------------------------------------
/carmel/sample/chain.mid:
--------------------------------------------------------------------------------
1 | 0
2 | (0 (0 a b .1) (0 b a .2) (0 a a .9) (0 b b .8))
3 | 


--------------------------------------------------------------------------------
/carmel/sample/comments:
--------------------------------------------------------------------------------
 1 | S
 2 | #aasdf
 3 | #A
 4 | (S (S "PRO
 5 | #" "they" -20log))
 6 | (S (S "AUX" "can" -20ln))
 7 | (S (S "VERB" "can" 200log))
 8 | (S (S "NOUN" "fish" 0.0001))
 9 | (S (S "VERB" "fish" 0.0001))
10 | 


--------------------------------------------------------------------------------
/carmel/sample/decipher/README:
--------------------------------------------------------------------------------
1 | unsupervised decipherment of a 1-1 letter subst. cipher (carmel knows nothing about 1-1, however)
2 | 


--------------------------------------------------------------------------------
/carmel/sample/decipher/correct:
--------------------------------------------------------------------------------
 1 | 
 2 | _ D E C I P H E R M E N T _ I S _ 
 3 | 
 4 | _ T H E _ A N A L Y S I S _ O F _ D O C U M E N T S _ W R I T T E N _ I N _ 
 5 | 
 6 | _ A N C I E N T _ L A N G U A G E S _ W H E R E _ T H E _ L A N G U A G E _ I S _ U N K N O W N _ O R _ 
 7 | 
 8 | _ K N O W L E D G E _ O F _ T H E _ L A N G U A G E _ H A S _ B E E N _ L O S T _ I T _ I S _ C L O S E L Y _ 
 9 | 
10 | _ R E L A T E D _ T O _ C R Y P T A N A L Y S I S _ T H E _ D I F F E R E N C E _ B E I N G _ T H A T _ T H E _ 
11 | 
12 | _ O R I G I N A L _ D O C U M E N T _ W A S _ N O T _ D E L I B E R A T E L Y _ W R I T T E N _ T O _ B E _ 
13 | 
14 | _ D I F F I C U L T _ T O _ D E C I P H E R _ T H E _ T E R M _ H A S _ A L S O _ B E E N _ U S E D _ T O _ 
15 | 
16 | _ D E S C R I B E _ T H E _ A N A L Y S I S _ O F _ T H E _ G E N E T I C _ C O D E _ S E E _ T H E _ H U M A N _ 
17 | 
18 | _ G E N O M E _ P R O J E C T _ F O R _ M O R E _ O N _ T H I S _ S O M E _ P E O P L E _ H A V E _ A L S O _ U S E D _ 
19 | 
20 | _ T H E _ W O R D _ M E T A P H O R I C A L L Y _ T O _ M E A N _ S O M E T H I N G _ L I K E _ U N D E R S T A N D I N G _ 
21 | 


--------------------------------------------------------------------------------
/carmel/sample/decipher/errors.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | function eval-res {
 4 |  cat $1 | tr ' ' '\012' | awk 'NF > 0' | tr -d '"' >z1
 5 |  cat $2 | tr ' ' '\012' | awk 'NF > 0' | tr -d '"' >z2
 6 |  echo `paste -d ' ' z1 z2 | awk '$1 != $2' | wc -l`
 7 |  #echo `diff -y z1 z2 | egrep '(\||<|>)' | wc -l` '('`diff z1 z2 | grep '^<' | wc -l`')'
 8 |  #grep '^<' | wc -l
 9 | }
10 | echo ${suf:=.trained} ${csuf:=2} ${carmel:=carmel} ${chanbase=subst.wfst}
11 | echo ${src:=plain.bi.wfsa} ${chan:=$chanbase$suf} ${cipher:=cipher$csuf} ${correct:=correct$csuf} ${log:=errors.log}
12 | 
13 | if [ "$weights" ] ; then
14 |     suf=`basename $weights`
15 |     set -x
16 |     $carmel -H --load-fem-param=$weights $src $chanbase --no-compose --write-loaded=$suf
17 |     chan=$chanbase.$suf
18 |     set +x
19 | fi
20 | 
21 | $carmel -HJ -= 3.0 $chan > $chan.cubed
22 | $carmel --project-right --project-identity-fsa $src > $src.id
23 | function errors_chan
24 | {
25 | $carmel -qbsriQIWEk 1 $src.id $1 < $cipher > $chan.decode
26 | echo "errors $2 = " `eval-res $correct $chan.decode `
27 | }
28 | (
29 | echo 'length of text = ' `tr -d '"_' < $correct | wc -w`
30 | errors_chan $chan "    ";
31 | errors_chan $chan.cubed "cubed"
32 | ) 2>&1 | tee $log
33 | 


--------------------------------------------------------------------------------
/carmel/sample/decipher/fem.sh:
--------------------------------------------------------------------------------
 1 | set -e
 2 | echo ${suf:=fem} ${ITER:=500} ${restarts:=0}
 3 | i4=$((ITER/4))
 4 | [ "$EM" ] || CRP=1
 5 | if [ "$EM" ] ; then
 6 | $fem -f forest -H -n norm -I param -e 0 -o $suf.em -i $i4 -r $restarts
 7 | weights=$suf.em ./errors.sh
 8 | fi
 9 | if [ "$CRP" ] ; then
10 | if [ "$DA" ] ; then
11 |  sda=".crp.da=.$DA"
12 |  argda="--high-temp=2 --low-temp=$DA"
13 | fi
14 | $fem -f forest -H -n norm -I param -e 0 -o $suf$sda $argda --crp=$ITER --burnin=$i4 --alpha=alpha
15 | weights=$suf$sda ./errors.sh
16 | fi
17 | 


--------------------------------------------------------------------------------
/carmel/sample/decipher/to-fem.sh:
--------------------------------------------------------------------------------
1 | set -x
2 | $carmel --train-cascade -aHJmM -1 cipher2 plain.bi.wfsa subst.wfst --priors=1e5,1e-2 --fem-norm=norm --fem-forest=forest --fem-param=param --normby=NC --fem-alpha=alpha
3 | 


--------------------------------------------------------------------------------
/carmel/sample/do.graphviz.srilm.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | B=${B:-../bin/cage/carmel.debug}
3 | ../src/sri2fsa.pl < tiny.sri > tiny.fsa;$B tiny.fsa -YZB > tiny.dot;dot -O -Tpdf tiny.dot;dot -O -Gdpi=150 -Tpng tiny.dot
4 | 


--------------------------------------------------------------------------------
/carmel/sample/egraph:
--------------------------------------------------------------------------------
1 | 4
2 | (1 (2 *e* 2) (3 *e* .5) (4 *e* 8))
3 | (2 (3 *e* 1) (4 *e* 3))
4 | (3 (4 *e* 1))
5 | 


--------------------------------------------------------------------------------
/carmel/sample/emptyfsa:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/carmel/sample/emptyfsa


--------------------------------------------------------------------------------
/carmel/sample/fsa1:
--------------------------------------------------------------------------------
1 | 3
2 | (0 (1 "he"))
3 | (1 (2 "saw"))
4 | (2 (3 "me"))
5 | (1 (4 "ran"))
6 | (4 (3 "home"))
7 | (0 (5 "she"))
8 | (5 (3 "talked"))
9 | 


--------------------------------------------------------------------------------
/carmel/sample/fsa10:
--------------------------------------------------------------------------------
1 | 1
2 | (0 1)
3 | 


--------------------------------------------------------------------------------
/carmel/sample/fsa11:
--------------------------------------------------------------------------------
1 | 1
2 | (0 (0 a) (0 .5) (0) (1) (1 b))
3 | 


--------------------------------------------------------------------------------
/carmel/sample/fsa2:
--------------------------------------------------------------------------------
1 | F
2 | (S (A "he"))
3 | (A (B "ran"))
4 | (B (F "home"))
5 | 


--------------------------------------------------------------------------------
/carmel/sample/fsa3:
--------------------------------------------------------------------------------
1 | 3
2 | (0 (1 "he"))
3 | (1 (2 "saw"))
4 | (2 (3 "me"))
5 | (0 (5 "she"))
6 | (5 (2 "studied"))
7 | 


--------------------------------------------------------------------------------
/carmel/sample/fsa4:
--------------------------------------------------------------------------------
1 | F
2 | (S (A "the"))
3 | (A (A "big"))
4 | (A (F "dog"))
5 | 


--------------------------------------------------------------------------------
/carmel/sample/fsa5:
--------------------------------------------------------------------------------
1 | F
2 | (S (A "the"))
3 | (S (A *e*))
4 | (A (B "big"))
5 | (B (C "big"))
6 | (C (F "dog"))
7 | 


--------------------------------------------------------------------------------
/carmel/sample/fsa6:
--------------------------------------------------------------------------------
1 | F
2 | (S (A "big"))
3 | (A (B "big"))
4 | (B (C "dog"))
5 | (C (F "big"))
6 | 


--------------------------------------------------------------------------------
/carmel/sample/fsa7:
--------------------------------------------------------------------------------
1 | 3
2 | (0 (1 "they"))
3 | (1 (2 "can"))
4 | (2 (3 "fish"))
5 | 


--------------------------------------------------------------------------------
/carmel/sample/fsa8:
--------------------------------------------------------------------------------
1 | 3
2 | (0 1 "they")
3 | (0 2 "please")
4 | (1 2 "can")
5 | (2 3 "fish")
6 | 


--------------------------------------------------------------------------------
/carmel/sample/fsa9:
--------------------------------------------------------------------------------
1 | 1
2 | (0 (1))
3 | 


--------------------------------------------------------------------------------
/carmel/sample/fst1:
--------------------------------------------------------------------------------
1 | 0
2 | (0 (0 "big" "small"))
3 | (0 (0 "dog" "dog"))
4 | 


--------------------------------------------------------------------------------
/carmel/sample/fst2:
--------------------------------------------------------------------------------
1 | 0
2 | (0 (0 "K" "c"))
3 | (0 (0 "AE" "a"))
4 | (0 (1 "SH" "s"))
5 | (1 (0 *e* "h"))
6 | 


--------------------------------------------------------------------------------
/carmel/sample/kevin_g:
--------------------------------------------------------------------------------
1 | 2
2 | (0 (1 "C" -150ln))
3 | (0 (1 "V" -50log))
4 | (1 (0 *e* 0.9))
5 | (1 (2 *e* 0.1))
6 | (2)
7 | 


--------------------------------------------------------------------------------
/carmel/sample/nested:
--------------------------------------------------------------------------------
1 | S
2 | #aasdf
3 | #A
4 | (S (S "PRO
5 | #" "they" -20log))
6 | (S (S ("AUX" "can" -20ln) ("VERB" "can" 200log)))
7 | (S (S "NOUN" "fish" 0.0001))
8 | (S (S "VERB" "fish" 0.0001))
9 | 


--------------------------------------------------------------------------------
/carmel/sample/tag/README:
--------------------------------------------------------------------------------
1 | source is a fully connected tag bigram
2 | channel is a dictionary allowing a subset of part of speech tags for each word
3 | cipher is words (decipher into hidden tags)
4 | 


--------------------------------------------------------------------------------
/carmel/sample/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 | for f in *fs*; do echo $f
4 | ../bin/$HOST/carmel $f -k 10 >/dev/null; done
5 | 


--------------------------------------------------------------------------------
/carmel/sample/tiny.sri:
--------------------------------------------------------------------------------
 1 | 
 2 | \data\
 3 | ngram 1=5
 4 | ngram 2=6
 5 | ngram 3=2
 6 | 
 7 | \1-grams:
 8 | -3      a     -.1
 9 | -2      b     -.2
10 | -4	c
11 | -99      <s>     -1.5
12 | -1       </s>
13 | 
14 | \2-grams:
15 | -.5      b </s> 
16 | -1       b b    -2
17 | -3       a b
18 | -5       a a
19 | -2       b a -8
20 | -.3        <s> a  -1
21 | 
22 | \3-grams:
23 | -.1 <s> a a
24 | -.2   b b a
25 | 
26 | \end\
27 | 


--------------------------------------------------------------------------------
/carmel/sample/tmp/a:
--------------------------------------------------------------------------------
1 | S
2 | (S (1 "she"    1.0))
3 | (S (1 "he"  1.0  ))
4 | (1 (2 "can"  0.99))
5 | (1 (2 "can"  0.01))
6 | (2 (S "swing"  0.7))
7 | (2 (S "dance"  0.7))
8 | (2 (S "swing"  0.3))
9 | 


--------------------------------------------------------------------------------
/carmel/sample/tmp/a.2:
--------------------------------------------------------------------------------
1 | F
2 | (S (A "the"))
3 | (A (A "big"))
4 | (A (F "dog"))
5 | 


--------------------------------------------------------------------------------
/carmel/sample/tmp/a.t:
--------------------------------------------------------------------------------
1 | S
2 | (S (1 "she" "SHE"   1.0))
3 | (S (1 "he"  "HE" 1.0  ))
4 | (1 (2 "can"  "C" 0.99))
5 | (1 (2 "can"  "C" 0.01))
6 | (2 (S "swing"  "SSS" 0.7))
7 | (2 (S "dance"  "DDD" 0.7))
8 | (2 (S "swing"  "BBB" 0.3))
9 | 


--------------------------------------------------------------------------------
/carmel/sample/tmp/a1:
--------------------------------------------------------------------------------
 1 | 3
 2 | (0 (1 *e*))
 3 | (0 (1 "he"))
 4 | (1 (2 "saw" 0.8))
 5 | (2 (3 "me"))
 6 | (1 (4 "ran" 0.2))
 7 | (4 (3 "home" 1.0))
 8 | (0 (5 "she"))
 9 | (5 (3 "talked" 1.0))
10 | 


--------------------------------------------------------------------------------
/carmel/sample/tmp/a2:
--------------------------------------------------------------------------------
 1 | 3
 2 | (0 (1 "he"   0.4))
 3 | (1 (2 "see"   0.8))
 4 | (2 (3 "me"   1.0))
 5 | (1 (4 "ran"   0.2))
 6 | (4 (3 "school"   1.0))
 7 | (0 (5 "she"   0.6))
 8 | (5 (3 "talked"   1.0))
 9 | (0 (6 "he"   0.5))
10 | (6 (0 "dance" 0.9))
11 | 


--------------------------------------------------------------------------------
/carmel/sample/tmp/a3:
--------------------------------------------------------------------------------
 1 | S
 2 | (S (1 "he" 1.0))
 3 | (S (1 "she" 1.0))
 4 | (1 (2 "could" 0.99))
 5 | (1 (2 "can" 0.19))
 6 | (1 (2 "could" 0.01))
 7 | (2 (S "dance" 0.7))
 8 | (2 (S "swing" 0.7))
 9 | (2 (S "dance" 0.3))
10 | 


--------------------------------------------------------------------------------
/carmel/sample/tmp/a3.t:
--------------------------------------------------------------------------------
 1 | S
 2 | (S (1 "he" "HE" 1.0))
 3 | (S (1 "she" *e* 1.0))
 4 | (1 (2 "could" "SSS" 0.99))
 5 | (1 (2 "can" "C" 0.19))
 6 | (1 (2 "could" "C" 0.01))
 7 | (2 (S "dance" "C" 0.7))
 8 | (2 (S "swing" "C" 0.7))
 9 | (2 (S "dance" "C" 0.3))
10 | (1 (3 "ran" "RAN" 0.3))
11 | (3 (S "home" "HOME" 0.3))
12 | 


--------------------------------------------------------------------------------
/carmel/sample/tmp/aa:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/carmel/sample/tmp/aa


--------------------------------------------------------------------------------
/carmel/sample/tmp/aaa:
--------------------------------------------------------------------------------
1 | 3
2 | (0 (1 "he"))
3 | (1 (2 "ran"))
4 | (2 (3 "home"))
5 | (3)
6 | 


--------------------------------------------------------------------------------
/carmel/sample/tmp/b1.wfsa:
--------------------------------------------------------------------------------
 1 | 3
 2 | (0 (1 "he"   0.4))
 3 | (1 (2 "see"   0.8))
 4 | (2 (3 "me"   1.0))
 5 | (1 (4 "ran"   0.2))
 6 | (4 (3 "school"   1.0))
 7 | (0 (5 "she"   0.6))
 8 | (5 (3 "talked"   1.0))
 9 | (0 (6 "he"   0.5))
10 | (6 (0 "dance" 0.9))
11 | 


--------------------------------------------------------------------------------
/carmel/sample/tmp/fsa1:
--------------------------------------------------------------------------------
1 | 3
2 | (0 (1 "he"))
3 | (1 (2 "saw"))
4 | (2 (3 "me"))
5 | (1 (4 "ran"))
6 | (4 (3 "home"))
7 | (0 (5 "she"))
8 | (5 (3 "talked"))
9 | 


--------------------------------------------------------------------------------
/carmel/sample/tmp/fsa2:
--------------------------------------------------------------------------------
1 | F
2 | (S (A "he"))
3 | (A (B "ran"))
4 | (B (F "home"))
5 | 


--------------------------------------------------------------------------------
/carmel/sample/tmp/fsa3:
--------------------------------------------------------------------------------
1 | 3
2 | (0 (1 "he"))
3 | (1 (2 "saw"))
4 | (2 (3 "me"))
5 | (0 (5 "she"))
6 | (5 (2 "studied"))
7 | 


--------------------------------------------------------------------------------
/carmel/sample/tmp/fsa4:
--------------------------------------------------------------------------------
1 | F
2 | (S (A "the"))
3 | (A (A "big"))
4 | (A (F "dog"))
5 | 


--------------------------------------------------------------------------------
/carmel/sample/tmp/fsa4.1:
--------------------------------------------------------------------------------
1 | F
2 | (S (A "the"))
3 | (A (A "big"))
4 | (A (F "dog"))
5 | 


--------------------------------------------------------------------------------
/carmel/sample/tmp/fsa5:
--------------------------------------------------------------------------------
1 | F
2 | (S (A "the"))
3 | (S (A *e*))
4 | (A (B "big"))
5 | (B (C "big"))
6 | (C (F "dog"))
7 | 


--------------------------------------------------------------------------------
/carmel/sample/tmp/fsa6:
--------------------------------------------------------------------------------
1 | F
2 | (S (A "big"))
3 | (A (B "big"))
4 | (B (C "dog"))
5 | (C (F "big"))
6 | 


--------------------------------------------------------------------------------
/carmel/sample/tmp/fsa7:
--------------------------------------------------------------------------------
1 | 3
2 | (0 (1 "they"))
3 | (1 (2 "can"))
4 | (2 (3 "fish"))
5 | 


--------------------------------------------------------------------------------
/carmel/sample/tmp/fst1:
--------------------------------------------------------------------------------
1 | 0
2 | (0 (0 "big" "small"))
3 | (0 (0 "dog" "dog"))
4 | 


--------------------------------------------------------------------------------
/carmel/sample/tmp/fst1.1:
--------------------------------------------------------------------------------
1 | 0
2 | (0 (0 "the" "a"))
3 | (0 (0 "big" "small"))
4 | (0 (0 "dog" "dog"))
5 | 


--------------------------------------------------------------------------------
/carmel/sample/tmp/fst1.2:
--------------------------------------------------------------------------------
1 | 0
2 | (0 (0 "a" "A"))
3 | (0 (0 "small" "SMALL"))
4 | (0 (0 "dog" "DOG"))
5 | 


--------------------------------------------------------------------------------
/carmel/sample/tmp/fst1.3:
--------------------------------------------------------------------------------
1 | 0
2 | (0 (0 "the" "*"))
3 | (0 (0 "big" "**"))
4 | (0 (0 "dog" "***"))
5 | 


--------------------------------------------------------------------------------
/carmel/sample/tmp/fst2:
--------------------------------------------------------------------------------
1 | 0
2 | (0 (0 "K" "c"))
3 | (0 (0 "AE" "a"))
4 | (0 (1 "SH" "s"))
5 | (1 (0 *e* "h"))
6 | 


--------------------------------------------------------------------------------
/carmel/sample/tmp/t:
--------------------------------------------------------------------------------
1 | S
2 | (S (1 "she" "PRO" 1.0))
3 | (1 (2 "can" "AUX" 0.99))
4 | (1 (2 "can" "VERB" 0.01))
5 | (2 (S "swing" "NOUN" 0.7))
6 | (2 (S "swing" "VERB" 0.3))
7 | 


--------------------------------------------------------------------------------
/carmel/sample/tmp/t.1:
--------------------------------------------------------------------------------
1 | 0
2 | (0 (0 "small" "SMALL"))
3 | (0 (0 "dog" "DOG"))
4 | 


--------------------------------------------------------------------------------
/carmel/sample/tmp/t1:
--------------------------------------------------------------------------------
1 | 0
2 | (0 (0 "HE" "he" 0.4))
3 | (0 (0 "SAW" "saw" 0.8))
4 | (0 (0 "ME" "me" 1.0))
5 | 
6 | 


--------------------------------------------------------------------------------
/carmel/sample/tmp/t2:
--------------------------------------------------------------------------------
 1 | S
 2 | (S (1 "he" "PRO" 1.0))
 3 | (S (1 "she" "PRO" 1.0))
 4 | (1 (2 "could" "AUX" 0.99))
 5 | (1 (2 "can" "AUX" 0.59))
 6 | (1 (2 "could" "VERB" 0.01))
 7 | (2 (S "dance" "NOUN" 0.7))
 8 | (2 (S "swing" "NOUN" 0.7))
 9 | (2 (S "dance" "VERB" 0.3))
10 | 


--------------------------------------------------------------------------------
/carmel/sample/tmp/wfsa1:
--------------------------------------------------------------------------------
1 | 3
2 | (0 (1 "he" 0.4))
3 | (1 (2 "saw" 0.8))
4 | (2 (3 "me" 1.0))
5 | (1 (4 "ran" 0.2))
6 | (4 (3 "home" 1.0))
7 | (0 (5 "she" 0.6))
8 | (5 (3 "talked" 1.0))
9 | 


--------------------------------------------------------------------------------
/carmel/sample/tmp/wfsa2:
--------------------------------------------------------------------------------
 1 | FINAL
 2 | (START (PRO "PRO" 0.6))
 3 | (START (NOUN "NOUN" 0.3))
 4 | (START (AUX "AUX" 0.05))
 5 | (START (VERB "VERB" 0.05))
 6 | (PRO (AUX "AUX" 0.4))
 7 | (PRO (VERB "VERB" 0.6))
 8 | (NOUN (NOUN "NOUN" 0.7))
 9 | (NOUN (VERB "VERB" 0.3))
10 | (AUX (VERB "VERB" 1.0))
11 | (VERB (NOUN "NOUN" 1.0))
12 | (VERB (FINAL *e* 1.0))
13 | (AUX (FINAL *e* 1.0))
14 | (NOUN (FINAL *e* 1.0))
15 | (PRO (FINAL *e* 1.0))
16 | 


--------------------------------------------------------------------------------
/carmel/sample/tmp/wfst1:
--------------------------------------------------------------------------------
1 | S
2 | (S (S "they" "PRO" 1.0))
3 | (S (S "she" "PRO" 1.0))
4 | (S (S "can" "AUX" 0.99))
5 | (S (S "may" "AUX" 0.99))
6 | (S (S "can" "VERB" 0.01))
7 | (S (S "dance" "NOUN" 0.7))
8 | (S (S "fish" "VERB" 0.3))
9 | 


--------------------------------------------------------------------------------
/carmel/sample/tmp/wfst2:
--------------------------------------------------------------------------------
1 | S
2 | (S (S "PRO" "they" 0.07))
3 | (S (S "AUX" "can" 0.21))
4 | (S (S "VERB" "can" 0.00001))
5 | (S (S "NOUN" "fish" 0.0001))
6 | (S (S "VERB" "fish" 0.0001))
7 | 


--------------------------------------------------------------------------------
/carmel/sample/tmp/wfst3:
--------------------------------------------------------------------------------
1 | S
2 | (S (S "she" "PRO" 1.0))
3 | (S (S "could" "AUX" 0.99))
4 | (S (S "can" "VERB" 0.01))
5 | (S (S "ran" "NOUN" 0.7))
6 | (S (S "fish" "VERB" 0.3))
7 | (S (S "but" "VERB" 0.3))
8 | (S (S "dance" "VERB" 0.3))
9 | 


--------------------------------------------------------------------------------
/carmel/sample/tree-cascade/README:
--------------------------------------------------------------------------------
 1 | The graphical "tree cascade" model you described can be learned by carmel.
 2 | 
 3 | If x is a hidden string (POS tags + sentence boundaries for your corpus) with an untrained source model p(x), and there are 1 or more models p_i(z|x) and observed {z_i}, and the parameters of models p and p_0 are to be learned while the other p_i are known, then carmel can learn the best (MAP) model for p(x|{z_i}) = k*p(x)*prod_i{p(z_i|x)}, where k is constant since the {z_i} are all known.
 4 | 
 5 | A script and some small models/data are attached.
 6 | 
 7 | >From what I heard, you want to incorporate some (vague) expectation as to e.g. what portion in the whole corpus of tags are NN etc.  Actually using an observation of e.g. 40000 NN in a large corpus will result in a huge p(x|z_NN) model, because the FSA would need at least 40000 states.  A more exponential model would be more efficient.  You can definitely just explicitly encode a p(x) multiplicative prior - just place it in the cascade and don't normalize it (--normby=...N...) or lock the arcs with "-N 0".  I also wonder whether an additive prior might be good if you just want to bias the initialization a little (I presume to help with the identification problem with evaluating unsupervised tags/parses)
 8 | 
 9 | To simultaneously train more than one of the conditional models would probably require modifying carmel or exporting to forest-em (the program I mentioned that handles derivation forests and more explicitly encodes the identity and normalization of parameters, rather than relying on carmel's odd "tied parameter group" facility).
10 | 


--------------------------------------------------------------------------------
/carmel/sample/tree-cascade/hidden.fsa:
--------------------------------------------------------------------------------
 1 | FINAL
 2 | (0 (Z *e* Z ))
 3 | (0 (X *e* X ))
 4 | (0 (Y *e* Y ))
 5 |   (0 (FINAL ))
 6 | (Y (Y *e* Y ))
 7 | (Y (X *e* X ))
 8 | (Y (Z *e* Z ))
 9 | (Y (FINAL  ))
10 | (X (Y *e* Y ))
11 | (X (X *e* X ))
12 | (X (Z *e* Z ))
13 | (X (FINAL  ))
14 | (Z (Y *e* Y ))
15 | (Z (X *e* X ))
16 | (Z (Z *e* Z ))
17 | (Z (FINAL  ))
18 | 
19 | 


--------------------------------------------------------------------------------
/carmel/sample/tree-cascade/observed0.data:
--------------------------------------------------------------------------------
1 | a a b c a b c b b a
2 | 


--------------------------------------------------------------------------------
/carmel/sample/tree-cascade/observed0.fst:
--------------------------------------------------------------------------------
 1 | 0
 2 | (0 (0 X a ))
 3 | (0 (0 Y a ))
 4 | (0 (0 Z a ))
 5 | (0 (0 X b ))
 6 | (0 (0 Y b ))
 7 | (0 (0 Z b ))
 8 | (0 (0 X c ))
 9 | (0 (0 Y c ))
10 | (0 (0 Z c ))
11 | 
12 | 


--------------------------------------------------------------------------------
/carmel/sample/tree-cascade/observed1.data:
--------------------------------------------------------------------------------
1 | Z Z Z
2 | 


--------------------------------------------------------------------------------
/carmel/sample/tree-cascade/observed1.fst:
--------------------------------------------------------------------------------
1 | 0
2 | (0 (0 X *e* ))
3 | (0 (0 Y *e* ))
4 | (0 (0 Z ))
5 | 


--------------------------------------------------------------------------------
/carmel/sample/tree-cascade/observed2.data:
--------------------------------------------------------------------------------
1 | Y
2 | 


--------------------------------------------------------------------------------
/carmel/sample/tree-cascade/observed2.fst:
--------------------------------------------------------------------------------
1 | 0
2 | (0 (0 X *e* ))
3 | (0 (0 Y Y ))
4 | (0 (0 Z *e* ))
5 | 
6 | 


--------------------------------------------------------------------------------
/carmel/sample/tree-cascade/s/hidden.fsa:
--------------------------------------------------------------------------------
1 | 0
2 | (0 (0 *e* Z ))
3 | (0 (0 *e* X ))
4 | (0 (0 *e* Y ))
5 | 
6 | 


--------------------------------------------------------------------------------
/carmel/sample/tree-cascade/s/observed0.data:
--------------------------------------------------------------------------------
1 | 
2 | a a b c a b c b b a
3 | 


--------------------------------------------------------------------------------
/carmel/sample/tree-cascade/s/observed0.fst:
--------------------------------------------------------------------------------
 1 | 0
 2 | (0 (0 X a ))
 3 | (0 (0 Y a ))
 4 | (0 (0 Z a ))
 5 | (0 (0 X b ))
 6 | (0 (0 Y b ))
 7 | (0 (0 Z b ))
 8 | (0 (0 X c ))
 9 | (0 (0 Y c ))
10 | (0 (0 Z c ))
11 | 
12 | 


--------------------------------------------------------------------------------
/carmel/sample/tree-cascade/s/observed1.data:
--------------------------------------------------------------------------------
1 | Z
2 | Z
3 | 


--------------------------------------------------------------------------------
/carmel/sample/tree-cascade/s/observed1.fst:
--------------------------------------------------------------------------------
1 | F
2 | (0 (0 X *e* ))
3 | (0 (0 Y *e* ))
4 | (0 (0 Z Z ))
5 | (0 (F) )
6 | 


--------------------------------------------------------------------------------
/carmel/sample/tree-cascade/s/observed2.data:
--------------------------------------------------------------------------------
1 | 
2 | Y
3 | 


--------------------------------------------------------------------------------
/carmel/sample/tree-cascade/s/observed2.fst:
--------------------------------------------------------------------------------
1 | 0
2 | (0 (0 X *e* ))
3 | (0 (0 Y Y ))
4 | (0 (0 Z *e* ))
5 | 
6 | 


--------------------------------------------------------------------------------
/carmel/sample/wfsa.perplexity:
--------------------------------------------------------------------------------
1 | 2
2 | (0 (1 .5) (2 .5))
3 | 


--------------------------------------------------------------------------------
/carmel/sample/wfsa1:
--------------------------------------------------------------------------------
1 | 3
2 | (0 (1 "he" 0.4))
3 | (1 (2 "saw" 0.8))
4 | (2 (3 "me" 1.0))
5 | (1 (4 "ran" 0.2))
6 | (4 (3 "home" 1.0))
7 | (0 (5 "she" 0.6))
8 | (5 (3 "talked" 1.0))
9 | 


--------------------------------------------------------------------------------
/carmel/sample/wfsa2:
--------------------------------------------------------------------------------
 1 | FINAL
 2 | (START (PRO "PRO" 0.6))
 3 | (START (NOUN "NOUN" 0.3))
 4 | (START (AUX "AUX" 0.05))
 5 | (START (VERB "VERB" 0.05))
 6 | (PRO (AUX "AUX" 0.4))
 7 | (PRO (VERB "VERB" 0.6))
 8 | (NOUN (NOUN "NOUN" 0.7))
 9 | (NOUN (VERB "VERB" 0.3))
10 | (AUX (VERB "VERB" 1.0))
11 | (VERB (NOUN "NOUN" 1.0))
12 | (VERB (FINAL *e* 1.0))
13 | (AUX (FINAL *e* 1.0))
14 | (NOUN (FINAL *e* 1.0))
15 | (PRO (FINAL *e* 1.0))
16 | 


--------------------------------------------------------------------------------
/carmel/sample/wfsa3:
--------------------------------------------------------------------------------
1 | 1
2 | (0 (1 a .3) (1 e^-5) (1 () (b .5))) 
3 | 


--------------------------------------------------------------------------------
/carmel/sample/wfsa4:
--------------------------------------------------------------------------------
1 | 1
2 | (0 (0 a -1log) (0 .5) (0) (1) (1 b))
3 | (1 1 .5)
4 | 


--------------------------------------------------------------------------------
/carmel/sample/wfst1:
--------------------------------------------------------------------------------
1 | S
2 | (S (S "they" "PRO" 1.0))
3 | (S (S "can" "AUX" 0.99))
4 | (S (S "can" "VERB" 0.01))
5 | (S (S "fish" "NOUN" 0.7))
6 | (S (S "fish" "VERB" 0.3))
7 | 


--------------------------------------------------------------------------------
/carmel/sample/wfst2:
--------------------------------------------------------------------------------
1 | S
2 | (S (N "they" "PRO" 1.0))
3 | (N (Q "they" "PRO" 1.0))
4 | (S (S "they" "PRO" 1.0))
5 | (S (S "can" "AUX" 0.99))
6 | (S (S "can" "VERB" 0.01))
7 | (S (S "fish" "NOUN" 0.7))
8 | (S (S "fish" "VERB" 0.3))
9 | 


--------------------------------------------------------------------------------
/carmel/sample/wfst2.preprune:
--------------------------------------------------------------------------------
1 | S
2 | (S (N "they" "PRO" 1.0))
3 | (N (Q "they" "PRO" 1.0))
4 | (S (S "they" "PRO" 1.0))
5 | (S (S "can" "AUX" 0.99))
6 | (S (S "can" "VERB" 0.01))
7 | (S (S "fish" "NOUN" 0.7))
8 | (S (S "fish" "VERB" 0.3))
9 | 


--------------------------------------------------------------------------------
/carmel/sample/wfst3:
--------------------------------------------------------------------------------
1 | 2
2 | (0 (2 0.1) (1 *e* b 0.6) (0 *e* a 0.3) (4 *e* a .1))
3 | (1 (2 *e* a 0.3) (0 *e* b 0.7))
4 | (2)
5 | (4 (2 *e* c))
6 | 


--------------------------------------------------------------------------------
/carmel/sample/wfst3c:
--------------------------------------------------------------------------------
1 | 0
2 | (0 (0 a c .6) (0 a d .4) (0 b d .2) (0 b e .8))
3 | 


--------------------------------------------------------------------------------
/carmel/sample/wfstlog:
--------------------------------------------------------------------------------
1 | S
2 | (S (S "PRO" "they" -20log))
3 | (S (S "AUX" "can" -20ln))
4 | (S (S "VERB" "can" 200log))
5 | (S (S "NOUN" "fish" 0.0001))
6 | (S (S "VERB" "fish" 0.0001))
7 | 


--------------------------------------------------------------------------------
/carmel/src/Makefile:
--------------------------------------------------------------------------------
1 | default:
2 | 	cd .. && make
3 | 


--------------------------------------------------------------------------------
/carmel/src/WARNING:
--------------------------------------------------------------------------------
 1 | Carmel is the first significant C++ I wrote.  It's pre-STL.  I wouldn't write it this way today, but it's not worth a rewrite.  Here are some things to watch out for:
 2 | 
 3 | I wrote a node-based hashtable which means it's safe to directly contain a singly linked list by value (because nodes are never copied in normal hashtable operation).  This means you'd have horrible performance on grows if you switched to open hashing.  The gnu hashtable is also node-based.
 4 | 
 5 | arcs' groupId field is reused as an index for other purposes (e.g. tracking lists of original cascade arcs in a composition, associating arcs w/ gibbs parameter ids)
 6 | 
 7 | Command line option processing is gross.  I'd use boost program_options today.  --long options aren't spelling checked / closed-class.
 8 | 
 9 | singly linked lists were used for arcs and other things, so as to be relatively memory-concise, and mutable.  a growing array (e.g. std::vector) might be better cache-locality and performance, and certainly lower space if compacted after e.g. reading lists from file (size won't change after that).
10 | 
11 | 


--------------------------------------------------------------------------------
/carmel/src/models.h:
--------------------------------------------------------------------------------
 1 | #ifndef MODELS_H 
 2 | #define MODELS_H 1
 3 | #include <vector>
 4 | #include <string>
 5 | 
 6 | char *ModelsDef[] = {
 7 |   "0 (0 (0 \"A\" \"A\" 0.75)  (0 \"AA\" \"A\" 0.25) (0 \"B\" \"B\" 0.67) (0 \"BB\" \"B\" 0.33))",
 8 |   "0 (0 (0 \"A\" \"a\") (0 \"B\" \"b\"))"
 9 | };
10 | // define additional models if needed
11 | 
12 | 
13 | vector<string> Models ;
14 | void initModels()
15 | {
16 |   // Similarly, add additional models if necessary
17 |   int n_models = sizeof(ModelsDef)/sizeof(char *);
18 |   for (int i=0;i<n_models;++i)
19 |     Models.push_back(ModelsDef[i]);
20 | }
21 | 
22 | 
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/carmel/src/tests/Makefile:
--------------------------------------------------------------------------------
1 | #include ../../Makefile
2 | all:
3 | 	g++ -ffast-math -ggdb Tweight.cc ../weight.cc
4 | 


--------------------------------------------------------------------------------
/carmel/src/tests/Tweight.cc:
--------------------------------------------------------------------------------
 1 | #include "../weight.h"
 2 | #include "../list.h"
 3 | #include <algorithm>
 4 | using namespace std;
 5 | 
 6 | main()
 7 | {
 8 |   List<Weight> l;
 9 |   Weight a,b;
10 |   insert_iterator<List<Weight> > o(l,l.begin());
11 |   for (;;) {
12 | 	  
13 |     cin >> a >> b;
14 | 	if (cin) {
15 | 	*o++ = a;
16 | 	*o++ = b;
17 | 		Weight::out_ln(cout);Weight::out_always_real(cout); 
18 | 	cout << "a=" << a << " b=" << b << " a*b=" << a*b << " a/b=" << a/b << " a+b=" << a+b << " a-b=" << a-b << endl;
19 | 	Weight::out_always_log(cout);
20 | 	cout << "a=" << a << " b=" << b << " a*b=" << a*b << " a/b=" << a/b << " a+b=" << a+b << " a-b=" << a-b << endl;
21 | 	Weight::out_variable(cout);
22 | 	cout << "a=" << a << " b=" << b << " a*b=" << a*b << " a/b=" << a/b << " a+b=" << a+b << " a-b=" << a-b << endl;
23 | 	} else
24 | 		break;
25 |   }
26 |   cout << "\n";
27 |   for (List<Weight>::iterator i=l.begin();i!=l.end();++i)
28 | 	  cout << *i << " ";
29 |   cout << "\n";
30 |   cout << "\n";
31 |   l.reverse();
32 |   for (List<Weight>::const_iterator i=l.const_begin(),end=l.const_end();i!=end;++i)
33 | 	  cout << *i << " ";
34 |   cout << "\n";
35 | 
36 | }
37 | 


--------------------------------------------------------------------------------
/carmel/test/N.cascade.train.gen.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -x
 3 | carmel=${carmel:-carmel}
 4 | N=${N:-100}
 5 | M=${M:-5}
 6 | function safefilename {
 7 |    echo "$@" | perl -pe 's/\W+/./g'
 8 | }
 9 | comp=comp.`safefilename $*`
10 | corp=.corpus.$comp.$N
11 | $carmel "$@" > $comp
12 | $carmel -g $N $comp > $corp
13 | uchain=
14 | for f in $*; do
15 |  $carmel -n --constant-weight=1 $f > $f.u
16 |  uchain+=" $f.u"
17 | done
18 | $carmel -S $corp $uchain >/dev/null
19 | $carmel -S $corp $comp >/dev/null
20 | $carmel -M $M --train-cascade $ARGS $corp $uchain
21 | for f in $*; do
22 |  echo original:
23 |  $carmel $f
24 |  echo trained:
25 |  $carmel $f.u.trained
26 | done
27 | 


--------------------------------------------------------------------------------
/carmel/test/angela.knight.kbest.wfst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/carmel/test/angela.knight.kbest.wfst


--------------------------------------------------------------------------------
/carmel/test/asciikana-katakana.transducer:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/carmel/test/asciikana-katakana.transducer


--------------------------------------------------------------------------------
/carmel/test/bad.-a.1:
--------------------------------------------------------------------------------
1 | 2
2 | (0 (2 "A" "b" .2) (2 "B" "b" 0.5) (1 "C" *e* 0.5))
3 | (1 (2 "D" "b" 0.5) (0 "E" "a" 0.5))
4 | (2)
5 | 


--------------------------------------------------------------------------------
/carmel/test/bad.-a.2:
--------------------------------------------------------------------------------
1 | F
2 | (S (S "a" "X" 0.5) (S "b" "Y" 0.3) (F *e* "Z" .2))
3 | (F)
4 | 


--------------------------------------------------------------------------------
/carmel/test/compose-test.sh:
--------------------------------------------------------------------------------
1 | $B -rsim jpron.transducer vowel-separator.transducer         jpron-asciikana.transducer asciikana-katakana.transducer   < test.katakana
2 | 


--------------------------------------------------------------------------------
/carmel/test/determinize.usr.dict.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -x
 3 | input=${input:-/usr/share/dict/words}
 4 | if [ ! "$skipdict" ] ; then
 5 | ../make-dictionary.pl -r $input > dict.words.char.fsa.random
 6 | carmel -jn dict.words.char.fsa.random > dict.words.char.fsa
 7 | cp dict.words.char.fsa looped.dict.words.char.fsa
 8 | echo '(END 0 " ")' >> looped.dict.words.char.fsa
 9 | fi
10 | carmel --minimize --minimize-determinize $* dict.words.char.fsa -F det.dict.fsa
11 | carmel --minimize --minimize-determinize $* looped.dict.words.char.fsa -F det.looped.dict.fsa
12 | carmel -kO 20 det.dict.fsa
13 | carmel -kO 20 dict.words.char.fsa
14 | carmel -kO 20 det.looped.dict.fsa
15 | carmel -kO 20 looped.dict.words.char.fsa
16 | 


--------------------------------------------------------------------------------
/carmel/test/empty:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/carmel/test/empty


--------------------------------------------------------------------------------
/carmel/test/fsa7:
--------------------------------------------------------------------------------
1 | 3
2 | (0 (1 "they"))
3 | (1 (2 "can"))
4 | (2 (3 "fish"))
5 | 


--------------------------------------------------------------------------------
/carmel/test/jpron.transducer:
--------------------------------------------------------------------------------
 1 | START
 2 | (START (START ("PAUSE") ("A") ("E") ("I") ("O") ("U") ("N"))
 3 |        (C1 ("N") ("NN") ("K") ("KK") ("S") ("SS") ("SH") ("SSH") ("T") 
 4 | 	   ("TT") ("D" 0.05) ("DD" 0.05) ("TS") ("TTS") ("M") ("MM") ("R") ("RR") 
 5 | 	   ("G") ("GG") ("Z") ("ZZ") ("J") ("JJ") ("F" 0.05) ("FF" 0.05)
 6 | 	   ("CH") ("TCH") ("B") ("BB") ("P") ("PP") ("H") ("HH") ("V" 0.05))
 7 |        (C2 ("NN") ("K") ("KK") ("S") ("SS") ("SH") ("SSH") ("T") 
 8 | 	   ("TT") ("TS") ("TTS") ("M") ("MM") ("R") ("RR") 
 9 | 	   ("G") ("GG") ("Z") ("ZZ") ("J") ("JJ") ("F" 0.05) ("FF" 0.05)
10 | 	   ("CH") ("TCH") ("B") ("BB") ("P") ("PP") ("W") ("Y") ("H") ("HH") ("V" 0.05))
11 |        (C3 ("K") ("KK") ("S") ("SS"))
12 |        (D  ("D") ("DD")))
13 | (C1 (C2 ("Y")))
14 | (C2 (START ("A") ("E") ("I") ("O") ("U")))
15 | (C3 (C2 ("W" 0.05)))
16 | (D  (START ("A") ("E") ("I" 0.05) ("O") ("U" 0.05)))
17 | 


--------------------------------------------------------------------------------
/carmel/test/kbest.small.cycle:
--------------------------------------------------------------------------------
1 | 2
2 | (0 (0 *e* b .033333) (2 0.3) (1 0.333333) (0 *e* a 0.333333))
3 | (1 (2 *e* b 0.5) (0 0.5))
4 | (2 (0 .1) (1 .9))
5 | 


--------------------------------------------------------------------------------
/carmel/test/prune.test:
--------------------------------------------------------------------------------
1 | 01
2 | (00 (10 .1))
3 | (10 (11 .1) (20 .1) (01 .1))
4 | (20 (21 .1) (11 .1))
5 | (11 (01 .1) (10 .1))
6 | (21 (11 .1) (20 .1))
7 | (01 (00 .1))
8 | 


--------------------------------------------------------------------------------
/carmel/test/runtests.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | cd `dirname $0`
 3 | B=${1:-../bin/macosx/carmel}
 4 | which $B
 5 | mkdir -p logs
 6 | log=logs/tests.`basename $B`.`date +%C%y%m%d_%H:%M`
 7 | (echo $B;ls -l $B;uname -a;hostname; time . traintest.sh;time $B -IEQ -k 1000 angela.knight.kbest.wfst;time . j-test-jap ) 2>&1  | tee $log
 8 | ln -sf $log latest.log
 9 | echo
10 | echo `pwd`/latest.log
11 | 


--------------------------------------------------------------------------------
/carmel/test/test.asciikana:
--------------------------------------------------------------------------------
1 | "a" "n" "ji" "ra" "na" "i" "to"
2 | 


--------------------------------------------------------------------------------
/carmel/test/test.compose.-a.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | . ~/isd/hints/aliases.sh
 3 | B=${B:-carmel}
 4 | N=${N:-10}
 5 | a=$1
 6 | b=${2:-args: xdcr1 xdcr2 (for composition)}
 7 | set -x
 8 | set -e
 9 | $B -N 100000 $a > $a.g
10 | $B -N 200000 $b > $b.g
11 | $B -m $a $b > $a.comp.$b
12 | $B -am $a.g $b.g > $a.comp.-a.$b
13 | $B -@k $N $a.comp.$b > $a.composed.best
14 | $B -k $N $a.comp.$b > $a.composed.paths
15 | $B -S $a.composed.best $a.comp.$b 
16 | $B -S $a.composed.best $a.comp.-a.$b 
17 | 


--------------------------------------------------------------------------------
/carmel/test/test.epron:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/carmel/test/test.epron


--------------------------------------------------------------------------------
/carmel/test/test.final:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/carmel/test/test.final


--------------------------------------------------------------------------------
/carmel/test/test.jpron2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/carmel/test/test.jpron2


--------------------------------------------------------------------------------
/carmel/test/test.kana:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/carmel/test/test.kana


--------------------------------------------------------------------------------
/carmel/test/test.katakana:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/carmel/test/test.katakana


--------------------------------------------------------------------------------
/carmel/test/test.word:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/carmel/test/test.word


--------------------------------------------------------------------------------
/carmel/test/train.a:
--------------------------------------------------------------------------------
1 | fin
2 | (start (start *e* a) (1 *e* b) (fin) (fin *e* d 0) (fin *e* c))
3 | (1 (start *e* b) (fin *e* a))
4 | (fin)
5 | 


--------------------------------------------------------------------------------
/carmel/test/train.a.u:
--------------------------------------------------------------------------------
1 | 2
2 | (0 (2 0.333333) (1 *e* b 0.333333) (0 *e* a 0.333333))
3 | (1 (2 *e* a 0.5) (0 *e* b 0.5))
4 | (2)
5 | 


--------------------------------------------------------------------------------
/carmel/test/train.a.w:
--------------------------------------------------------------------------------
1 | 2
2 | (0 (2 0.1) (1 *e* b 0.6) (0 *e* a 0.3))
3 | (1 (2 *e* a 0.3) (0 *e* b 0.7))
4 | (2)
5 | 


--------------------------------------------------------------------------------
/carmel/test/train.cascade.gen.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -x
 3 | carmel=${carmel:-carmel}
 4 | fst1=${1:?arg 1: a wfst}
 5 | fst2=${2:?arg 2: a wfst to compose w/ fst 1}
 6 | shift
 7 | shift
 8 | N=${N:-100}
 9 | M=${M:-5}
10 | comp=$fst1.comp.$fst2
11 | corp=.corpus.$comp.$N
12 | $carmel $fst1 $fst2 > $comp
13 | $carmel -g $N $comp > $corp
14 | $carmel -n --constant-weight=1 $fst1 > $fst1.u
15 | $carmel -n --constant-weight=1 $fst2 > $fst2.u
16 | $carmel -S $corp $fst1.u $fst2.u >/dev/null
17 | $carmel -S $corp $comp >/dev/null
18 | $carmel -M $M --train-cascade $* $corp $fst1.u $fst2.u 
19 | echo trained:
20 | $carmel $fst1.u.trained
21 | $carmel $fst2.u.trained
22 | echo original:
23 | $carmel $fst1
24 | $carmel $fst2
25 | 
26 | 


--------------------------------------------------------------------------------
/carmel/test/train.self.gen.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -x
 3 | carmel=${carmel:-carmel}
 4 | fst=${1:?arg 1: a wfst e.g. train.a.w}
 5 | shift
 6 | N=${N:-100}
 7 | $carmel -g $N $fst > corpus.$fst.$N
 8 | $carmel --constant-weight=1 $fst > $fst.u
 9 | $carmel -S corpus.$fst.$N $fst.u >/dev/null 
10 | $carmel -F $fst.trained.self.gen -t $* corpus.$fst.$N $fst.u 
11 | #echo original:
12 | #$carmel $fst
13 | 
14 | 


--------------------------------------------------------------------------------
/carmel/test/traintest.sh:
--------------------------------------------------------------------------------
1 | cmd="$B -o 1.1 -M 4 -F span.spell.trained2 -t span.spell.corpus span.spell.wfst"
2 | #$B -o 1.1 -M 3 -F span.spell.trained2 -t span.spell.corpus span.spell.wfst
3 | echo $cmd
4 | $cmd
5 | echo $cmd
6 | 


--------------------------------------------------------------------------------
/carmel/test/vowel-separator.transducer:
--------------------------------------------------------------------------------
 1 | 0
 2 | (0 (a "A" "AA") (i "I" "II") (o "O" "OO") (u "U" "UU") (e "E" "EE") 
 3 |    (0 "A" "A" 0.99) (0 "B" "B") (0 "BB" "BB") (0 "CH" "CH") (0 "D" "D") (0 "DD" "DD")
 4 |    (0 "E" "E" 0.99) (0 "G" "G") (0 "GG" "GG") (0 "H" "H") (0 "HH" "HH") (0 "I" "I" 0.99)
 5 |    (0 "J" "J") (0 "JJ" "JJ") (0 "K" "K") (0 "KK" "KK") (0 "M" "M") (0 "MM" "MM")
 6 |    (0 "N" "N") (0 "NN" "NN") (0 "O" "O" 0.99) (0 "P" "P") (0 "PAUSE" "PAUSE")
 7 |    (0 "PP" "PP") (0 "R" "R") (0 "RR" "RR") (0 "S" "S") (0 "SH" "SH") (0 "SS" "SS")
 8 |    (0 "SSH" "SSH") (0 "T" "T") (0 "TCH" "TCH") (0 "TS" "TS") (0 "TT" "TT")
 9 |    (0 "TTS" "TTS") (0 "U" "U" 0.99) (0 "V" "V") (0 "W" "W") (0 "Y" "Y")
10 |    (0 "F" "F") (0 "FF" "FF") (0 "Z" "Z") (0 "ZZ" "ZZ"))
11 | (a (0 "A" *e*)) 
12 | (i (0 "I" *e*)) 
13 | (e (0 "E" *e*)) 
14 | (o (0 "O" *e*)) 
15 | (u (0 "U" *e*))
16 | 


--------------------------------------------------------------------------------
/carmel/test/wfst2:
--------------------------------------------------------------------------------
1 | S
2 | (S (S "PRO" "they" 0.07))
3 | (S (S "AUX" "can" 0.21))
4 | (S (S "VERB" "can" 0.00001))
5 | (S (S "NOUN" "fish" 0.0001))
6 | (S (S "VERB" "fish" 0.0001))
7 | 


--------------------------------------------------------------------------------
/carmel/test/word-epron.names.55000wds.transducer:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/carmel/test/word-epron.names.55000wds.transducer


--------------------------------------------------------------------------------
/carmel/test/y.data:
--------------------------------------------------------------------------------
1 | 
2 | "a" "a" "a" "b" "a" "a" "b" "a" "a" "b"
3 | 


--------------------------------------------------------------------------------
/carmel/test/y1.new:
--------------------------------------------------------------------------------
1 | 2
2 | (1 (2 *e* "C" 0.2!) (1 *e* "V" 0.8!))
3 | (2 (2 *e* "C" 0.2!) (1 *e* "V" 0.8!))
4 | 


--------------------------------------------------------------------------------
/carmel/test/y2.new:
--------------------------------------------------------------------------------
1 | 0
2 | (0 (0 "C" "b" 0.5!100) (0 "C" "a" 0.5!101) (0 "V" "b" 0.5!102) (0 "V" "a" 0.5!103))
3 | 


--------------------------------------------------------------------------------
/carmel/test/y4.new:
--------------------------------------------------------------------------------
 1 | 4
 2 | (0 (1 *e* *e* 0.2!))
 3 | (0 (2 *e* *e* 0.8!))
 4 | (2 (0 *e* "a" 0.5!103))
 5 | (2 (0 *e* "b" 0.5!102))
 6 | (1 (4 *e* "a" 0.5!101))
 7 | (1 (4 *e* "b" 0.5!100))
 8 | (4 (3 *e* *e* 0.2!))
 9 | (4 (5 *e* *e* 0.8!))
10 | (3 (0 *e* "a" 0.5!103))
11 | (3 (0 *e* "b" 0.5!102))
12 | (5 (4 *e* "a" 0.5!101))
13 | (5 (4 *e* "b" 0.5!100)) 
14 | 


--------------------------------------------------------------------------------
/cipher/baseline.2.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | use strict;
 3 | 
 4 | open A,"test.freq" or die;
 5 | open B,"train.freq" or die;
 6 | 
 7 | my $N=0;
 8 | my $right=0;
 9 | my $Ndict=0;
10 | my $rightdict=0;
11 | 
12 | while(<A>) {
13 |     my $a=$_;
14 |     my $b=<B>;
15 |     last unless defined $b;
16 |     my ($na,$wa)=split ' ',$a;
17 |     my ($nb,$wb)=split ' ',$b;
18 |     $Ndict++;
19 |     $N+=$na;
20 |     if ($wa eq $wb) {
21 |         print STDERR $a;
22 |         $right+=$na;
23 |         $rightdict++;
24 |     }
25 | }
26 | 
27 | print "per-word ($rightdict correct out of $Ndict unique test words) accuracy: ",$rightdict/$Ndict,"\n";
28 | print "per-running-text (out of $N running test words) accuracy: ",$right/$N,"\n";
29 | 
30 | 


--------------------------------------------------------------------------------
/cipher/carmel-quote-words:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | use strict;
 3 | 
 4 | # use for all single words as tokens in FSA
 5 | sub escape_for_carmel
 6 | {
 7 |     my ($s)=@_;
 8 |     $s =~ s/([\"])/\$1/og;
 9 |     return qq{"$s"};
10 | }
11 | 
12 | while(<>) {
13 |     s/(\S+)/&escape_for_carmel($1)/oge;
14 |     print;
15 | }
16 | 
17 | 


--------------------------------------------------------------------------------
/cipher/class-features:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # needs carmel binaries in PATH or in carmel env var
 3 | 
 4 | d=`dirname $0`
 5 | nclass=${nclass:-4}
 6 | class="class$nclass"
 7 | maxorder=${maxorder:-2}
 8 | lmsuf=${maxorder}gram
 9 | test=${test:-test}
10 | train=${train:-train}
11 | chan=$class/class-channel.$train.$test
12 | tchan=${tchan:-$chan.$lmsuf}
13 | 
14 | if [ "$FLOOR" ] ; then
15 |  echo class FLOOR=$FLOOR
16 | fi
17 | 
18 | fbase=$class/feats.$lmsuf
19 | ftrain=$fbase.train
20 | ftest=$fbase.test
21 | 
22 | set -x
23 | $d/unigram-freq-bands $train $ftrain.uni < $test > $ftest.uni && \
24 | $d/soft-classes $class/$train < $ftrain.uni > $ftrain && \
25 | $d/soft-classes $class/$test $tchan < $ftest.uni > $ftest
26 | rm $ftrain.uni $ftest.uni
27 | 


--------------------------------------------------------------------------------
/cipher/class-ngrams:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | nclass=${nclass:-4}
 4 | class="class$nclass"
 5 | mkdir -p $class
 6 | test=${test:-test}
 7 | train=${train:-train}
 8 | texts="$test $train"
 9 | #texts=${texts:-train test test.cipher}
10 | echo making lm classes for \"$texts\"
11 | for t in $texts ; do
12 |  ngram-class -numclasses $nclass -text $t -class-counts $class/$t.counts -classes $class/$t
13 | done
14 | 


--------------------------------------------------------------------------------
/cipher/class-word-fst:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | use strict;
 3 | 
 4 | my $S="S";
 5 | 
 6 | print "$S\n";
 7 | print "($S\n";
 8 | 
 9 | my $invert=$ENV{INVERT}; # fst takes word->class if set.
10 | my $floor=$ENV{FLOOR}; # drop lines w/ p<floor
11 | 
12 | # use for all single words as tokens in FSA
13 | sub escape_for_carmel
14 | {
15 |     my ($s)=@_;
16 |     $s =~ s/([\"])/\$1/og;
17 |     return qq{"$s"};
18 | }
19 | 
20 | while(<>) {
21 |     my ($class,$p,$w)=split;
22 |     next if defined $floor && $p < $floor;
23 |     $w=escape_for_carmel($w);
24 |     if ($invert) {
25 |         my $t=$w;
26 |         $w=$class;
27 |         $class=$t;
28 |     }
29 |     print " ($S $class $w $p)\n";
30 | }
31 | 
32 | print ")\n";
33 | 


--------------------------------------------------------------------------------
/cipher/class-word-wfst:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | use strict;
 3 | 
 4 | my $S="S";
 5 | 
 6 | print "$S\n";
 7 | print "($S\n";
 8 | 
 9 | my $invert=$ENV{INVERT}; # fst takes word->class if set.
10 | my $floor=$ENV{FLOOR}; # drop lines w/ p<floor
11 | 
12 | # use for all single words as tokens in FSA
13 | sub escape_for_carmel
14 | {
15 |     my ($s)=@_;
16 |     $s =~ s/([\"])/\$1/og;
17 |     return qq{"$s"};
18 | }
19 | 
20 | while(<>) {
21 |     my ($class,$p,$w)=split;
22 |     next if defined $floor && $p < $floor;
23 |     $w=escape_for_carmel($w);
24 |     if ($invert) {
25 |         my $t=$w;
26 |         $w=$class;
27 |         $class=$t;
28 |     }
29 |     print " ($S $class $w $p)\n";
30 | }
31 | 
32 | print ")\n";
33 | 


--------------------------------------------------------------------------------
/cipher/count-ngrams:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | d=`dirname $0`
 3 | maxorder=${maxorder:-3}
 4 | texts=${texts:-train test test.cipher}
 5 | out=${out:-lms}
 6 | mkdir -p $out
 7 | echo making lms and counts for texts \"$texts\" up to ngram order \"$maxorder\"
 8 |  for t in $texts ; do
 9 |  writearg="-write $out/$t.counts"
10 |   for N in `seq 1 $maxorder` ; do
11 |      writearg="$writearg -write$N $out/$t.counts$N"
12 |   done
13 | trainlm=$out/$t.${maxorder}gram
14 |   echo counting for $t: ngram-count -order $maxorder -unk -sort -text $t $writearg -lm $trainlm
15 |   ngram-count -order $maxorder -unk -sort -text $t $writearg -lm $trainlm
16 |   for N in `seq 1 $maxorder` ; do
17 |     sort -rnk $((N+1)) $out/$t.counts$N > $out/$t.sortcounts$N
18 |   done
19 | 
20 | NOQUOTE= CHECK_SUFFIX= EOS= $d/sri2fsa.pl $trainlm | carmel -Ns 0 > $trainlm.fst
21 | 
22 |  done
23 | 


--------------------------------------------------------------------------------
/cipher/decipher:
--------------------------------------------------------------------------------
1 | encipher


--------------------------------------------------------------------------------
/cipher/decipher-classes:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # TODO: train on unclassed ciphertext w/ soft clusters in ilne?
 3 | # needs carmel binaries in PATH
 4 | d=`dirname $0`
 5 | maxorder=${maxorder:-2}
 6 | N=$maxorder
 7 | lmsuf=${N}gram
 8 | test=${test:-test}
 9 | train=${train:-train}
10 | texts="$test $train"
11 | nclass=${nclass:-4}
12 | class="class$nclass"
13 | mkdir -p $class
14 | 
15 | echo producing trained carmel channel $tchan from train $train and test $test
16 | 
17 | set -x
18 |  for t in $texts ; do
19 |   c=$class/$t
20 |   $d/text-to-classes $c $t > $c.classtext
21 |  done
22 | 
23 | ct=$class/$train
24 | trainlm=$ct.$lmsuf
25 | classfsa=$trainlm.fsa
26 | classfst=$trainlm.fst
27 | ngram-count -order $N -sort -text $ct.classtext -lm $trainlm
28 | NOQUOTE=1 CHECK_SUFFIX= EOS= $d/sri2fsa.pl $trainlm > $classfst
29 | carmel -N 0 --project-right $classfst > $classfsa
30 | chan=$class/class-channel.$train.$test
31 | tchan=$chan.$lmsuf
32 | uchan=$chan.untrained
33 | 
34 | ctest=$class/$test
35 | $d/full-class-channel $ct $ctest > $uchan
36 | QUOTE= $d/epsilon-string-pairs $ctest.classtext | carmel $trainopt -sta --train-cascade $classfsa $uchan
37 | mv $uchan.trained $tchan
38 | echo trained class channel in $tchan
39 | 
40 | 
41 | #cw=$test.class.word
42 | #INVERT= class-word-fst  $ctest > $cw
43 | #QUOTE=1 $d/epsilon-string-pairs $test | carmel $trainopt -sta --train-cascade $classfsa $uchan $cw
44 | #mv $uchan.trained $tchan
45 | #tcw=class-cipherword.$lmsuf
46 | #mv $cw $tcw
47 | 
48 | 
49 | #echo adjusted cipher class-word soft clusters in $tcw
50 | #echo fixed cipher class-word trained class channel in $tchan.fixed.class


--------------------------------------------------------------------------------
/cipher/do-classes:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # needs carmel binaries in PATH
 3 | d=`dirname $0`
 4 | nclass=${nclass:-4}
 5 | class="class$nclass"
 6 | mkdir -p $class
 7 | $d/class-ngrams
 8 | $d/decipher-classes
 9 | $d/eval-classes
10 | $d/class-features


--------------------------------------------------------------------------------
/cipher/encipher:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | # not crypto secure ;) reverses order of printable nonspace ascii (run again to
 4 | # get plaintext)
 5 | 
 6 | use strict;
 7 | 
 8 | my $ne=127; # DEL (127)
 9 | my $n0=32; # ascii space, not reversed
10 | 
11 | my %t;
12 | 
13 | for my $o (0..255) {
14 |     my $c=chr($o);
15 |     if ($o<$ne && $o>$n0) {
16 |         $o=$ne-($o-$n0);
17 |     }
18 |     $t{$c}=chr($o);
19 | }
20 | 
21 | while(<>) {
22 |     s#(.)#$t{$1}#g;
23 |     print;
24 | }
25 | 


--------------------------------------------------------------------------------
/cipher/epsilon-string-pairs:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | use strict;
 3 | 
 4 | my $QUOTE=$ENV{QUOTE};
 5 | 
 6 | while(<>) {
 7 |     print "\n";
 8 |     if ($QUOTE) {
 9 |         s/(\S+)/"$1"/og;
10 |     }
11 |     print;
12 | }
13 | 


--------------------------------------------------------------------------------
/cipher/filter_docid:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | while(<>) {
 4 |     s/^\S+\s*/ /;
 5 |     s/ \%(\S*)\b/++$spec{$1};''/ge;
 6 |     print;
 7 | }
 8 | 
 9 | for (sort keys %spec) {
10 |     print STDERR "removed $spec{$_} $_\n";
11 | }
12 | 


--------------------------------------------------------------------------------
/cipher/full-class-channel:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | use strict;
 3 | 
 4 | my %c1;
 5 | my %c2;
 6 | 
 7 | 
 8 | sub get_classes {
 9 |     my ($h,$f)=@_;
10 |     open F,"<",$f or die;
11 |     while (<F>) {
12 |         my ($class)=split ' ',$_,2;
13 |         ++$h->{$class};
14 |     }
15 | }
16 | 
17 | get_classes(\%c1,shift);
18 | get_classes(\%c2,shift);
19 | 
20 | my @c2=sort keys %c2;
21 | my $n2=scalar @c2;
22 | my $pcond=1./$n2;
23 | 
24 | my $S="S";
25 | 
26 | print "$S\n";
27 | print "($S\n";
28 | 
29 | for my $k1 (sort keys %c1) { 
30 |     for (@c2) {
31 |         print " ($S $k1 $_ $pcond)\n";
32 |     }
33 | }
34 | 
35 | print ")\n";
36 | 


--------------------------------------------------------------------------------
/cipher/split-words:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl -n
2 | split;
3 | print $_,"\n" for @_;
4 | 


--------------------------------------------------------------------------------
/cipher/sri2fsa.pl:
--------------------------------------------------------------------------------
1 | ../carmel/src/sri2fsa.pl


--------------------------------------------------------------------------------
/cipher/summary-classes:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | for f in $*; do
3 | echo 
4 | echo ====================
5 | echo $f
6 | tail -n 2 $f/class-channel.train.test.*gram.accuracy.top-* | grep -v Conditional
7 | done


--------------------------------------------------------------------------------
/cipher/text-to-classes:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | use strict;
 3 | 
 4 | my %c;
 5 | 
 6 | my $UNKTO=$ENV{UNKTO};
 7 | 
 8 | open C,"<",shift or die;
 9 | while(<C>) {
10 |     my ($class,$p,$w)=split;
11 |     $c{$w}=$class;
12 | }
13 | 
14 | $UNKTO="the" unless defined $UNKTO;
15 | 
16 | my $unkto=$c{$UNKTO};
17 | print STDERR "text-to-classes assigning unknown words to the class for '${UNKTO}' ($unkto).\n" if $unkto;
18 | 
19 | while(<>) {
20 |     s/(\S+)/$c{$1} || $unkto || die "class missing for word $1 (UNKTO=$UNKTO unkto_class=$unkto)"/oge;
21 |     print;
22 | }
23 | 
24 | 


--------------------------------------------------------------------------------
/cipher/word-freq:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | d=`dirname $0`
3 | 
4 | if [ "$top" ] ; then
5 | $d/split-words $* | sort | uniq -c | sort -rn | head -n $top
6 | else
7 | $d/split-words $* | sort | uniq -c | sort -rn 
8 | fi
9 | 


--------------------------------------------------------------------------------
/clm/clm-jan-09.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/clm/clm-jan-09.pdf


--------------------------------------------------------------------------------
/clm/e-parse-yield.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | #input: one per line ghkm-format trees ... (NN dog) (-LRB- () (-RRB- ))
 3 | #output: one per line yield ... dog ( )
 4 | my $DEBUG=$ENV{DEBUG};
 5 | while(<>) {
 6 |     my $sp='';
 7 |     while (/\(([^() ]+) ([^ ]+)\)( |$)/g) {
 8 |         my ($pos,$lex)=($1,$2);
 9 |         print STDERR "($pos $lex) " if $DEBUG;
10 |         print "$sp$lex";
11 |         $sp=' ';
12 |     }
13 |     print STDERR "\n" if $DEBUG;
14 |     print "\n";
15 | }
16 | 


--------------------------------------------------------------------------------
/clm/shen08.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/clm/shen08.pdf


--------------------------------------------------------------------------------
/clm/uniq_srilm.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # input: srilm
 3 | # optional env: order (checks just that order).  nodup: skip dup check.  print: print events (without prob/bo)
 4 | # output: die on duplicates ngram events (no output, no exit code = no duplicates)
 5 | 
 6 | use warnings;
 7 | my $order=$ENV{order};
 8 | my $print=$ENV{print};
 9 | my $dup=!$ENV{nodup};
10 | 
11 | my %ctx;
12 | my $N=0;
13 | while(<>) {
14 |     if (/^\\(\d+)-grams:\s*$/o) {
15 |         $N=$1;
16 |         print STDERR "starting $N-grams...\n";
17 |     } elsif (/^\\end\\$/) {
18 |         $N=0;
19 |         %ctx=();
20 |     } elsif ($N==0 || ($order&&$order!=$N) || /^\s*$/ ) {
21 |     } else {
22 |         my @w=split;
23 |         my $ctx=join(' ',@w[1..$N]);
24 |         if ($dup) {
25 |             die "DUPLICATE ($ARGV): $ctx :\n$_ " if exists $ctx{$ctx};
26 |             $ctx{$ctx}=1;
27 |         }
28 |         print $ctx,"\n" if $print;
29 |     }
30 | }
31 | 


--------------------------------------------------------------------------------
/forest-em/.gitignore:
--------------------------------------------------------------------------------
1 | forest-em.README.hpp
2 | 


--------------------------------------------------------------------------------
/forest-em/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | project(forest-em)
 2 | our_boost_libs(program_options serialization system filesystem
 3 |   random chrono timer iostreams filesystem
 4 |   unit_test_framework
 5 |   )
 6 | find_package(zstd QUIET)
 7 | message("zstd found?: ${zstd_FOUND}")
 8 | set(zstdlib)
 9 | if (zstd_FOUND)
10 |   # at least with mac+brew boost, iostreams appears to pull in zstd, lzma, z, bz2 libs; -lzstd wasn't found
11 |   link_directories("/usr/local/lib")
12 |   list(APPEND zstdlib zstd::libzstd_static)
13 | endif()
14 | add_executable(text-to-cc ../graehl/shared/text-to-cc.cpp)
15 | include_directories(${PROJECT_SOURCE_DIR})
16 | set(README_IN forest-em.README)
17 | set(README_GEN_H ${PROJECT_SOURCE_DIR}/forest-em.README.hpp)
18 | add_custom_command(OUTPUT ${README_GEN_H}
19 |   INPUT ${README_IN}
20 |   COMMAND text-to-cc usage_str < ${PROJECT_SOURCE_DIR}/${README_IN} > ${README_GEN_H}
21 |   VERBATIM
22 |   )
23 | add_executable(forest-em forest-em.cpp)
24 | set_property(SOURCE forest-em.cpp APPEND PROPERTY OBJECT_DEPENDS ${README_GEN_H})
25 | set_property(TARGET forest-em APPEND PROPERTY OBJECT_DEPENDS ${README_GEN_H})
26 | add_executable(forestviz forestviz.cpp)
27 | 
28 | target_link_libraries(forest-em Boost::timer Boost::random Boost::iostreams Boost::program_options ${zstdlib})
29 | target_link_libraries(forestviz Boost::random Boost::iostreams Boost::program_options ${zstdlib})
30 | 


--------------------------------------------------------------------------------
/forest-em/README:
--------------------------------------------------------------------------------
1 |    make INSTALL_PREFIX=/usr/local install -j 4
2 |    forest-em --help
3 | 
4 | Note: you can also read forest-em.README before compiling.
5 | 


--------------------------------------------------------------------------------
/forest-em/forest-em.cpp:
--------------------------------------------------------------------------------
 1 | #define GRAEHL__SINGLE_MAIN
 2 | #ifdef DEBUG
 3 | //# define TEST_ADD_ONE_LIMIT
 4 | # endif
 5 | #include "forest-em-params.hpp"
 6 | //#define SINGLE_PRECISION
 7 | //#define HINT_SWAPBATCH_BASE
 8 | #include <graehl/shared/config.h>
 9 | #include <memory> //auto_ptr
10 | #include <graehl/shared/main.hpp>
11 | #ifndef GRAEHL_TEST
12 | 
13 | using namespace boost;
14 | using namespace std;
15 | using namespace boost::program_options;
16 | using namespace graehl;
17 | 
18 | 
19 | 
20 | //#define FOREST_EM_VERSION_STR(type,size) "sizeof(" #type ")=" FOREST_EM_STRINGIZE(size)
21 | //#define FOREST_EM_VERSION_SIZE(name,type) FOREST_EM_VERSION_STR(name,sizeof(type))
22 | //#define FOREST_EM_SIZE_COUNT sizeof(forest::count_t)
23 | //#define FOREST_EM_VERSION_STRING FOREST_EM_VERSION "-" FOREST_EM_VERSION_STR(count,FOREST_EM_SIZE_COUNT)
24 | //FOREST_EM_VERSION_SIZE(prob,forest::prob_t)
25 | 
26 | MAIN_BEGIN
27 | {
28 |     DBP_INC_VERBOSE;
29 | #ifdef DEBUG
30 |         DBP::set_logstream(&cerr);
31 | #endif
32 | //DBP_OFF;
33 |         
34 |         return forest_em_param.main(argc,argv);
35 |         
36 | }
37 | MAIN_END
38 | 
39 | #endif
40 | 
41 | 


--------------------------------------------------------------------------------
/forest-em/sample/.gitignore:
--------------------------------------------------------------------------------
1 | forests.dot
2 | forests.b
3 | 


--------------------------------------------------------------------------------
/forest-em/sample/Makefile:
--------------------------------------------------------------------------------
1 | default:
2 | 	cd ../ && make
3 | 


--------------------------------------------------------------------------------
/forest-em/sample/best_forest:
--------------------------------------------------------------------------------
1 | (1 
2 |  (OR (OR
3 |    (1 (OR  #1(2 (OR 1 3)) (OR 3 4) (OR (1 (2 3)) 4)) (OR 1 2 (3 2) (2 (OR #1 4)) (1 #1 #1)))
4 |    (4 4 (OR 1 1 2) (OR 4 4) (OR 1 4) 4)
5 |    5
6 | )
7 | ))
8 | 


--------------------------------------------------------------------------------
/forest-em/sample/best_norm:
--------------------------------------------------------------------------------
1 | ((1 2 3 4 5))
2 | 


--------------------------------------------------------------------------------
/forest-em/sample/best_weights:
--------------------------------------------------------------------------------
1 | e^-4
2 | e^-2
3 | e^-3
4 | e^-6
5 | e^-100
6 | 


--------------------------------------------------------------------------------
/forest-em/sample/byid_rules:
--------------------------------------------------------------------------------
 1 | rule3 a id=3
 2 | rule4 a id=4
 3 | rule1 a id=1
 4 | rule2 a id=2
 5 | rule5 a id=5
 6 | rule9 a id=9
 7 | rule10 a id=10
 8 | rule6 a id=6
 9 | rule7 a id=7
10 | rule8 a id=8
11 | rule11 a id=11
12 | rule12 a id=12
13 | rule13 a id=13
14 | rule14 a id=14
15 | rule15 a id=15
16 | rule16 a id=16
17 | 


--------------------------------------------------------------------------------
/forest-em/sample/derivs/first10.norm:
--------------------------------------------------------------------------------
1 | ((85 139) (72) (52) (35) (31) (44) (28) (118) (45) (188) (36 166) (168) (34) (187) (185) (16 65) (53) (146) (200) (198 79) (29) (92) (46) (62) (147) (107) (12) (184) (207) (43 156 25 22) (138) (152) (154) (83) (95) (143) (100) (150 76) (61) (78) (50) (159) (177) (179) (80) (54) (123) (201) (178) (206 102 174 74) (18) (158) (153) (167 191) (119) (182) (77) (14) (30) (99) (64) (157 208) (113) (58) (106) (109 40 94 104 145 144) (171 204 41) (49) (11) (86) (128 170) (10) (5) (195) (169) (149) (141) (59) (88 38 162) (90) (133) (56) (24) (122) (210) (148) (60 165) (211 175) (155) (26) (9) (68 135) (91) (172) (120) (110) (23 181) (131) (81 1) (112) (97) (2) (183 3) (75) (129) (116) (98) (189) (161) (124) (67) (136) (57) (202) (140) (203) (194) (121) (48) (173) (70 103) (125) (164) (55) (82) (51) (37) (193) (33) (4) (66) (197) (142) (96) (27) (126) (73) (87) (176) (163) (209) (130) (7) (20) (132) (13) (93) (17 205) (101) (19) (32) (6) (180) (115) (134) (71 111) (137) (47) (186 39) (42) (199) (8) (108) (160 192) (117) (69) (105) (127) (84) (151 15 114 190) (21) (196) (89) (63))
2 | 


--------------------------------------------------------------------------------
/forest-em/sample/derivs/first10.rules:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/forest-em/sample/derivs/first10.rules


--------------------------------------------------------------------------------
/forest-em/sample/derivs/first100.rules:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/forest-em/sample/derivs/first100.rules


--------------------------------------------------------------------------------
/forest-em/sample/derivs/first1000.rules:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/forest-em/sample/derivs/first1000.rules


--------------------------------------------------------------------------------
/forest-em/sample/derivs/first10000.rules:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/forest-em/sample/derivs/first10000.rules


--------------------------------------------------------------------------------
/forest-em/sample/forest:
--------------------------------------------------------------------------------
1 | (OR 
2 |  #1(1 #2(2) 3 #2) (4 #4(5) #2) (6 #2 #4) (7 8)
3 |  (9 #5(OR (10 (11 12)) #6(13 14)) (15 #1 #2) (16 #6))
4 | )
5 | 


--------------------------------------------------------------------------------
/forest-em/sample/forests:
--------------------------------------------------------------------------------
1 | (OR 
2 |  #1(1 #2(2) 3 #2) (4 #4(5) #2) (6 #2 #4) (7 8)
3 |  (9 #5(OR (10 (11 12)) #6(13 14)) (15 #1 #2) (16 #6))
4 | )
5 | (1 4)
6 | (OR (1 4) (1 3))
7 | (OR (1 4 4) (2 3 4) (2 4 3) (1 5))
8 | (OR #1(1 #2(OR 2 3) #2) (4 #4(OR #2 5) #2) (6 #2 #4) (7 8))
9 | 


--------------------------------------------------------------------------------
/forest-em/sample/forests.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/forest-em/sample/forests.gz


--------------------------------------------------------------------------------
/forest-em/sample/ints:
--------------------------------------------------------------------------------
 1 | 1 2 3
 2 | 1
 3 | 1 1 1
 4 | 2 2
 5 | 5
 6 | 
 7 | 9
 8 | 7
 9 | 12
10 | 2 2
11 | 5
12 | 
13 | 9
14 | 7
15 | 12
16 | 1 20
17 | 17
18 | 16
19 | 16
20 | 9 9 9
21 | 9 9 9
22 | 9 9 9
23 | 9 9 9
24 | 9 9 9
25 | 11 11 11
26 | 11 11 11
27 | 11 11 11
28 | 11 11 11
29 | 11 11 11
30 | 11 11 11
31 | 14 14 14 
32 | 14 14 14 
33 | 14 14 14 
34 | 15 15 15 15
35 | 15 15 15 15
36 | 15 15 15 15
37 | 15 15 15 15
38 | 5 5 5 5 5 5 5
39 | 5 5 5 5 5 5 5
40 | 5 5 5 5 5 5 5
41 | 5 5 5 5 5 5 5
42 | 5 5 5 5 5 5 5
43 | 5 5 5 5 5 5 5
44 | 19 20 21 22
45 | 19 20 21 22
46 | 19 20 21 22
47 | 19 20 21 22
48 | 19 20 21 22
49 | 19 20 21 22
50 | 19 20 21 22
51 | 19 20 21 22
52 | 5 10 10 10 10 10 10 10 10 10 10 10 10 10
53 | 
54 | 5 10 10 10 10 10 10 10 10 10 10 10 10 10
55 | 
56 | 5 13 13 13 13 13 13 13 13 13 13 13 
57 | 
58 | 5 14 14 14 14 14 14 14 14 14 14 14 
59 | 
60 | 5 14 14 13 13 13 13 13 13 13 13 13 
61 | 


--------------------------------------------------------------------------------
/forest-em/sample/norm:
--------------------------------------------------------------------------------
1 | ((1 2 7 ) (3 4 5 6))
2 | 


--------------------------------------------------------------------------------
/forest-em/sample/norm_and_forests:
--------------------------------------------------------------------------------
1 | ((1 2 7 ) (3 4 5 6))
2 | (1 4)
3 | (OR (1 4) (1 3))
4 | (OR (1 4 4) (2 3 4) (2 4 3) (1 5))
5 | (OR #1(1 #2(4) #2) (2 #4(3) #2) (2 #2 #4) (1 5))
6 | 
7 | 


--------------------------------------------------------------------------------
/forest-em/sample/raw_weight_array:
--------------------------------------------------------------------------------
 1 | e^2
 2 | 1
 3 | 0
 4 | .5
 5 | 10
 6 | 2
 7 | 3
 8 | 4
 9 | 5
10 | e^5
11 | e^-1e+30
12 | e^1
13 | 


--------------------------------------------------------------------------------
/forest-em/sample/rule_list:
--------------------------------------------------------------------------------
 1 | rule1 a id=1
 2 | rule2 a id=2
 3 | rule3 a id=3
 4 | rule4 a id=4
 5 | rule5 a id=5
 6 | rule6 a id=6
 7 | rule7 a id=7
 8 | rule8 a id=8
 9 | rule9 a id=9
10 | rule10 a id=10
11 | rule11 a id=11
12 | rule12 a id=12
13 | rule13 a id=13
14 | rule14 a id=14
15 | rule15 a id=15
16 | rule16 a id=16
17 | 


--------------------------------------------------------------------------------
/forest-em/sample/testderivs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | d=`dirname $0`
 3 | F=$d/../bin/$ARCH/forest-em
 4 | dd=$d/derivs
 5 | w=$1
 6 | shift
 7 | normsuffix=${1:-norm}
 8 | shift
 9 | norm=$dd/$w.$normsuffix
10 | deriv=$dd/$w.deriv
11 | rules=$dd/$w.rules
12 | out=train.$w.$normsuffix.out
13 | log=train.$w.$normsuffix.log
14 | watchrule=`grep -n '^S(x0:NP-C x1:VP)' $rules | head -1 | cut -d: -f1`
15 | echo watching rule $watchrule:
16 | head -$watchrule $rules | tail -1
17 | cm="$F -f $deriv -n $norm -o $out --rules-file $rules  --watch-rule $watchrule --watch-depth 40 --watch-period 5 -M 1560 -i 200 -r 4 $*"
18 | echo $cm
19 | time $cm 2>&1 | tee $log
20 | #$F -M 500 -i 200 -f $deriv -n $norm -o $out $*
21 | 


--------------------------------------------------------------------------------
/forest-em/sample/tree.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/forest-em/sample/tree.gz


--------------------------------------------------------------------------------
/gextract/astronauts.a:
--------------------------------------------------------------------------------
1 | 0-1 2-0
2 | 0-0 1-1 2-1 3-2 4-6 4-7 5-3 6-3 7-4 8-8
3 | 1-0 3-3 4-1 6-2 7-4
4 | 


--------------------------------------------------------------------------------
/gextract/astronauts.e-parse:
--------------------------------------------------------------------------------
1 | (PP (IN by) (DT the) (NN police))
2 | (S (NP (DT These) (CD 7) (NNS people)) (VP (VBP include) (NP (NP (NNS astronauts)) (VP (VBG coming) (PP (IN from) (NP (NNP (France)))) ))) (. .))
3 | (S (NP (DT The) (NNS gunmen)) (VP (VBD were) (VP-C (VBN killed) (PP (IN by) (NP (DT the) (NN police) )))) (. .))
4 | 


--------------------------------------------------------------------------------
/gextract/astronauts.f:
--------------------------------------------------------------------------------
1 | POLICE BY
2 | THESE 7PEOPLE INCLUDE COMINGFROM FRANCE DUH ASTRO- -NAUTS PERIOD
3 | GUNMEN BY POLICE WEREKILLED .
4 | 


--------------------------------------------------------------------------------
/gextract/castronauts.a:
--------------------------------------------------------------------------------
1 | 0-1 0-0
2 | 0-0 0-1 2-5 4-2 4-6 4-7 5-3 6-3 7-4 8-8
3 | 1-0 3-3 6-1 7-2 7-4
4 | 


--------------------------------------------------------------------------------
/gextract/castronauts.a-gold:
--------------------------------------------------------------------------------
1 | 0-1 2-0
2 | 0-0 1-1 2-1 3-2 4-6 4-7 5-3 6-3 7-4 8-8
3 | 1-0 3-3 4-1 6-2 7-4
4 | 


--------------------------------------------------------------------------------
/gextract/castronauts.e-parse:
--------------------------------------------------------------------------------
1 | (PP (IN by) (DT the) (NN police) )
2 | (S (NP (DT These) (CD 7) (NNS people) ) (VP (VBP include) (NP (NP (NNS astronauts) ) (VP (VBG coming) (PP (IN from) (NP (NNP France) ) ) ) ) ) (. .) )
3 | (S (NP (DT The) (NNS gunmen) ) (VP (VBD were) (VP-C (VBN killed) (PP (IN by) (NP (DT the) (NN police) ) ) ) ) (. .) )
4 | 


--------------------------------------------------------------------------------
/gextract/castronauts.f:
--------------------------------------------------------------------------------
1 | POLICE BY
2 | THESE 7PEOPLE INCLUDE COMINGFROM FRANCE DUH ASTRO- -NAUTS PERIOD
3 | GUNMEN BY POLICE WEREKILLED .
4 | 


--------------------------------------------------------------------------------
/gextract/etree.py:
--------------------------------------------------------------------------------
1 | ../sblm/etree.py


--------------------------------------------------------------------------------
/gextract/optfunc.py:
--------------------------------------------------------------------------------
1 | optfunc/optfunc.py


--------------------------------------------------------------------------------
/gextract/optfunc/__init__.py:
--------------------------------------------------------------------------------
1 | # Empty __init__.py file to make optfunc into a quick-and-dirty module
2 | 


--------------------------------------------------------------------------------
/gextract/radu2ptb.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | while (<>) {
 4 |     s/^0$//;
 5 |     s/\(([^~]+)~(\d+)~(\d+)\s+([-.\d]+)/($1/g;
 6 |     s/\((-LRB-(-\d+)?) \(\)/\($1 -LRB-\)/g;
 7 |     s/\((-RRB-(-\d+)?) \)\)/\($1 -RRB-\)/g;
 8 |     print;
 9 | }
10 | 


--------------------------------------------------------------------------------
/gextract/reviz.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | export skip=1
 3 | skip=1 noise=.1 until=10 every=100 iter=1000 ./do.mono.sh
 4 | skip=1 noise=.1 until=10 iter=10 ./do.mono.sh
 5 | [ "$first" ] && exit
 6 | skip=1 noised=0 temp0=10 tempf=.5 until=3 every=10 noise=0 iter=40 ./do.mono.sh
 7 | vizall=1 skip=1  noised=0 until=3 every=10 noise=0 iter=100 ./do.mono.sh
 8 | skip=1  noised=4 until=3 every=20 noise=.3 iter=100 ./do.mono.sh
 9 | skip=1 vizall=1 noised=0 temp0=10 tempf=.5 until=3 every=10 noise=0 iter=40 ./do.mono.sh
10 | skip=1 until=5 nomono=1 temp0=1.2 tempf=.2 iter=100 every=20 ./do.mono.sh
11 | skip=1 until=10 nomono=1 iter=200 every=20 ./do.mono.sh
12 | skip=1 noised=5 until=3 every=10 noise=.3 iter=120 ./do.mono.sh
13 | skip=1 until=5 noised=2 every=20 temp0=1 tempf=1 noise=.2 iter=160 ./do.mono.sh
14 | 
15 | skip=1 iter=100 nin=1000 noise=.3 noised=5 ./do.mono.sh
16 | 
17 | skip=1 noised=2 until=3 every=10 temp0=1.5 tempf=.08 noise=.2 iter=80 ./do.mono.sh
18 | 
19 | 


--------------------------------------------------------------------------------
/graehl/shared/.gdbinit:
--------------------------------------------------------------------------------
1 | catch throw
2 | r
3 | 


--------------------------------------------------------------------------------
/graehl/shared/.gitignore:
--------------------------------------------------------------------------------
1 | SGT
2 | 


--------------------------------------------------------------------------------
/graehl/shared/Lx_norm.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl - http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #ifndef GRAEHL_SHARED__LX_NORM_HPP
15 | #define GRAEHL_SHARED__LX_NORM_HPP
16 | 
17 | #include <graehl/shared/reduce.hpp>
18 | 
19 | namespace graehl {
20 | 
21 | struct sum_powx
22 | {
23 |   double x;
24 | 
25 |   // most common is L2-norm (Euclid. distance)
26 |   sum_powx(double x = 2) : x(x) {}
27 | 
28 |   template <class W>
29 |   W operator()(W total, W component)
30 |   {
31 |     return total+pow(component, x);
32 |   }
33 | 
34 |   //boost::result_of
35 |   template <class W> struct result {};
36 |   template <class W> struct result<Lx_norm(W, W> { typedef W type; };
37 |                                            };
38 | 
39 |   template <class R>
40 |   typename range_value<R>::type
41 |   lx_norm(R const& range, double x = 2)
42 |   {
43 |     return pow(reduce(range, sum_powx(x), 0), 1./x);
44 |   }
45 | 
46 | 
47 | 
48 | }
49 | 
50 | 
51 | #endif
52 | 


--------------------------------------------------------------------------------
/graehl/shared/Makefile:
--------------------------------------------------------------------------------
 1 | P=main_template
 2 | 
 3 | all: next
 4 | 
 5 | test: next
 6 | 
 7 | .PHONY: test
 8 | 
 9 | %:
10 | 	g++ words_per_line.cpp -I../.. -o words_per_line && ./words_per_line < words_per_line.cpp
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/graehl/shared/SGT.counts.txt:
--------------------------------------------------------------------------------
 1 | 1	120
 2 | 2	40
 3 | 3	24
 4 | 4	13
 5 | 5	15
 6 | 6	5
 7 | 7	11
 8 | 8	2
 9 | 9	2
10 | 10	1
11 | 12	3
12 | 


--------------------------------------------------------------------------------
/graehl/shared/_template.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl - http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #ifndef TEMPLATE_HPP
15 | #define TEMPLATE_HPP
16 | 
17 | #ifdef GRAEHL_TEST
18 | #include <graehl/shared/test.hpp>
19 | #endif
20 | 
21 | #ifdef GRAEHL_TEST
22 | BOOST_AUTO_TEST_CASE( TEST_TEMPLATE )
23 | {
24 | }
25 | #endif
26 | 
27 | #endif
28 | 


--------------------------------------------------------------------------------
/graehl/shared/abs_int.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl - http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #ifndef GRAEHL__SHARED__ABS_INT_HPP
15 | #define GRAEHL__SHARED__ABS_INT_HPP
16 | 
17 | #include <boost/cstdint.hpp>
18 | #include <boost/utility/enable_if.hpp>
19 | #include <boost/type_traits/is_integral.hpp>
20 | #include <boost/type_traits/remove_cv.hpp>
21 | 
22 | namespace graehl {
23 | 
24 | template <class I>
25 | inline typename boost::enable_if< typename boost::is_integral<I>
26 |                                   , typename boost::remove_cv<I>::type
27 |                                   >::type
28 | bit_rotate_right(I x)
29 | {
30 |   typedef typename boost::remove_cv<I>::type IT;
31 |   return x<0?-x:x;
32 | }
33 | 
34 | }
35 | 
36 | 
37 | #endif
38 | 


--------------------------------------------------------------------------------
/graehl/shared/assertlvl.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl-http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | /** \file
15 | 
16 |     a continuum of asserts (finer than all-off-for-release)
17 | */
18 | 
19 | 
20 | #ifndef GRAEHL__SHARED__ASSERTLVL_HPP
21 | #define GRAEHL__SHARED__ASSERTLVL_HPP
22 | #pragma once
23 | 
24 | #ifndef ASSERT_LEVEL
25 | #define ASSERT_LEVEL 9999
26 | #endif
27 | 
28 | #define IF_ASSERT(level) if (ASSERT_LEVEL >= level)
29 | #define UNLESS_ASSERT(level) if (ASSERT_LEVEL < level)
30 | #ifndef assertlvl
31 | #include <cassert>
32 | #define assertlvl(level, assertion)         \
33 |   do {                                      \
34 |     IF_ASSERT(level) { assert(assertion); } \
35 |   } while (0)
36 | #endif
37 | 
38 | #endif
39 | 


--------------------------------------------------------------------------------
/graehl/shared/batched_append.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl - http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #ifndef GRAEHL__SHARED__batched_append_hpp
15 | #define GRAEHL__SHARED__batched_append_hpp
16 | 
17 | #include <algorithm> //swap
18 | #include <cstddef>
19 | 
20 | template <class SRange, class Vector>
21 | void batched_append(Vector &v, SRange const& s) {
22 |   std::size_t news = v.size()+s.size();
23 |   v.reserve(news);
24 |   v.insert(v.end(), s.begin(), s.end());
25 | }
26 | 
27 | template <class SRange, class Vector>
28 | void batched_append_swap(Vector &v, SRange & s) {
29 |   using namespace std; // to find the right swap
30 |   size_t i = v.size();
31 |   size_t news = i+s.size();
32 |   v.resize(news);
33 |   typename SRange::iterator si = s.begin();
34 |   for (; i<news; ++i, ++si)
35 |     swap(v[i], *si);
36 | }
37 | 
38 | #endif
39 | 


--------------------------------------------------------------------------------
/graehl/shared/breakpoint.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl - http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #ifndef BREAKPOINT_HPP
15 | #define BREAKPOINT_HPP
16 | 
17 | #ifndef BREAKPOINT
18 | 
19 | #if defined(_MSC_VER) && defined(_WIN32)
20 | # define BREAKPOINT __asm int 3
21 | #else
22 | # if defined(__i386__) || defined(__x86_64__)
23 | #  define BREAKPOINT asm("int $3")
24 | # else
25 | #  define BREAKPOINT do { volatile int *p = 0; *p = 0; } while (0)
26 | # endif
27 | #endif
28 | 
29 | #endif
30 | 
31 | #ifdef DEBUG
32 | # define DEBUG_BREAKPOINT BREAKPOINT
33 | #else
34 | # define DEBUG_BREAKPOINT
35 | #endif
36 | 
37 | 
38 | #endif
39 | 


--------------------------------------------------------------------------------
/graehl/shared/char_is.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl - http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #ifndef GRAEHL__CHAR_IS_HPP
15 | #define GRAEHL__CHAR_IS_HPP
16 | 
17 | namespace graehl {
18 | 
19 | #define MAKE_P(f, arg, ret) struct p ## f { typedef arg argument_type; typedef ret result_type; inline result_type operator()(argument_type const& a) const { return f(a); } };
20 | #define MAKE_CHARP(f) MAKE_P(f, char, bool)
21 | 
22 | inline bool isdigit(char c) {
23 |   return c>='0' && c<='9';
24 | }
25 | inline bool isalpha(char c) {
26 |   return c>='A' && c<='Z' || c>='a'&& c<='z';
27 | }
28 | inline bool isblank(char c) {
29 |   return c=='\t' || c==' ';
30 | }
31 | inline bool isspace(char c) {
32 |   return c=='\n' || isblank(c);  // intentionally neglecting \r \v \f
33 | }
34 | MAKE_CHARP(isdigit)
35 | MAKE_CHARP(isalpha)
36 | MAKE_CHARP(isblank)
37 | MAKE_CHARP(isspace)
38 | 
39 | }//ns
40 | 
41 | 
42 | #endif
43 | 


--------------------------------------------------------------------------------
/graehl/shared/cpp11.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #ifndef GRAEHL_CPP11
 3 | 
 4 | #if __cplusplus >= 201700L
 5 | #define GRAEHL_CPP17 1
 6 | #else
 7 | #define GRAEHL_CPP17 0
 8 | #endif
 9 | 
10 | #if __cplusplus >= 201103L || SDL_CPP11 || _MSC_VER >= 1900
11 | #define GRAEHL_CPP11 1
12 | #if __cplusplus >= 201400L
13 | #define GRAEHL_CPP14 1
14 | #define GRAEHL_CPP14_TYPETRAITS 1
15 | #else
16 | #define GRAEHL_CPP14 0
17 | #define GRAEHL_CPP14_TYPETRAITS 0
18 | #endif
19 | #else
20 | #define GRAEHL_CPP11 0
21 | #define GRAEHL_CPP14 0
22 | #define GRAEHL_CPP14_TYPETRAITS 0
23 | #endif
24 | 
25 | #if GRAEHL_CPP11
26 | #define GRAEHL_CONSTEXPR constexpr
27 | #else
28 | #define GRAEHL_CONSTEXPR
29 | #endif
30 | 
31 | #if _MSC_VER >= 1900
32 | #undef GRAEHL_CPP14_TYPETRAITS
33 | #define GRAEHL_CPP14_TYPETRAITS 1
34 | #endif
35 | 
36 | #if __cplusplus >= 201700L
37 | // GCC 8.2 has 201709 and clang 7.0 has 201707
38 | #define GRAEHL_CPP17 1
39 | #else
40 | #define GRAEHL_CPP17 0
41 | #endif
42 | 
43 | #if __cplusplus >= 202000L
44 | // GCC 8.2 has 201709 and clang 7.0 has 201707
45 | #define GRAEHL_CPP20 1
46 | #else
47 | #define GRAEHL_CPP20 0
48 | #endif
49 | 
50 | #endif
51 | 


--------------------------------------------------------------------------------
/graehl/shared/dbg_level.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl-http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | /** \file
15 | 
16 |  .
17 | */
18 | 
19 | #ifndef GRAEHL_SHARED__DBG_LEVEL_HPP
20 | #define GRAEHL_SHARED__DBG_LEVEL_HPP
21 | #pragma once
22 | 
23 | #include <graehl/shared/os.hpp>
24 | 
25 | #define DECLARE_DBG_LEVEL_C(n, env) DECLARE_ENV_C_LEVEL(n, getenv_##env, env)
26 | #define DECLARE_DBG_LEVEL(ch) DECLARE_DBG_LEVEL_C(ch##_DBG_LEVEL, ch##_DBG)
27 | #define DECLARE_DBG_LEVEL_IF(ch) ch(DECLARE_DBG_LEVEL_C(ch##_DBG_LEVEL, ch##_DBG))
28 | 
29 | #endif
30 | 


--------------------------------------------------------------------------------
/graehl/shared/dummy.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl - http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #ifndef GRAEHL_SHARED__DUMMY_HPP
15 | #define GRAEHL_SHARED__DUMMY_HPP
16 | 
17 | #ifdef GRAEHL_TEST
18 | #include <graehl/shared/test.hpp>
19 | #endif
20 | 
21 | namespace graehl {
22 | 
23 | template <class C>
24 | struct dummy {
25 |   static const C &var();
26 | };
27 | 
28 | 
29 | template <class C>
30 | const C& dummy<C>::var() {
31 |   static C var;
32 |   return var;
33 | }
34 | 
35 | #ifdef GRAEHL_TEST
36 | 
37 | BOOST_AUTO_TEST_CASE( TEST_dummy )
38 | {
39 |   BOOST_CHECK(dummy<int>::var() == 0);
40 | }
41 | #endif
42 | 
43 | }
44 | 
45 | #endif
46 | 


--------------------------------------------------------------------------------
/graehl/shared/exact_cast.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl - http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #ifndef GRAEHL__SHARED__EXACT_CAST_HPP
15 | #define GRAEHL__SHARED__EXACT_CAST_HPP
16 | 
17 | #include <stdexcept>
18 | 
19 | namespace graehl {
20 | 
21 | struct inexact_cast : public std::runtime_error
22 | {
23 |   inexact_cast() : std::runtime_error("inexact_cast - casting to a different type lost information") {}
24 | };
25 | 
26 | template <class To, class From>
27 | To exact_static_assign(To &to, From const& from)
28 | {
29 |   to = static_cast<To>(from);
30 |   if (static_cast<From>(to)!=from)
31 |     throw inexact_cast();
32 |   return to;
33 | }
34 | 
35 | template <class To, class From>
36 | To exact_static_cast(From const& from)
37 | {
38 |   To to;
39 |   exact_static_assign(to, from);
40 |   return to;
41 | }
42 | 
43 | 
44 | }//graehl
45 | 
46 | #endif
47 | 


--------------------------------------------------------------------------------
/graehl/shared/example.Makefile:
--------------------------------------------------------------------------------
 1 | #gdb --args /cache/tt/bin/cygwin/forest-em.debug -f /cache/tt/sample/best_forest -I /cache/tt/sample/best_weights -n /cache/tt/sample/best_norm -i 1 -m 100k -w 3 -x sample/best_viterbi -L 9
 2 | 
 3 | PROGS= count-id-freq add-giza-models
 4 | #PROGS+=text-to-cc
 5 | 
 6 | 
 7 | count-id-freq_OBJ=count-id-freq.o
 8 | count-id-freq_SLIB=$(BOOST_OPT_LIB)
 9 | count-id-freq_NOTEST=1
10 | #count-id-freq_NOSTATIC=1
11 | #count-id-freq_NODEBUG=1
12 | 
13 | add-giza-models_OBJ=add-giza-models.o
14 | add-giza-models_SLIB=$(BOOST_OPT_LIB)
15 | add-giza-models_NOTEST=1
16 | 
17 | 
18 | SHARED=../shared
19 | INC= . $(SHARED)
20 | LIB=
21 | CXX:=g++
22 | 
23 | BASECXXFLAGS= -ggdb -ffast-math
24 | CXXFLAGS= $(BASECXXFLAGS) -O -DNO_BACKTRACE -DUSE_NONDET_RANDOM
25 | #-DSINGLE_PRECISION
26 | ## would have to link with boost random nondet source
27 | 
28 | CPPFLAGS_DEBUG+= -DDEBUG
29 | CXXFLAGS_DEBUG= $(BASECXXFLAGS)
30 | # -DDEBUGFIXEDINPUT
31 | CPPFLAGS_TEST+= -DTEST -DDEBUG
32 | CXXFLAGS_TEST=$(BASECXXFLAGS)
33 | #CPP_EXT=cpp
34 | ALL_CLEAN +=  *.restart.* *.swap.* *.stackdump *.d *.out *.log massif.* core
35 | 
36 | default: all
37 | #forest-em-debug
38 | #mydefault
39 | 
40 | vpath %.cpp .:$(SHARED)
41 | 
42 | include ../shared/graehl.mk
43 | 
44 | 
45 | mydefault: $(BIN)/count-id-freq.debug
46 | 


--------------------------------------------------------------------------------
/graehl/shared/fast_lexical_cast.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl - http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #ifndef FAST_LEXICAL_CAST_HPP
15 | #define FAST_LEXICAL_CAST_HPP
16 | 
17 | #define BOOST_LEXICAL_CAST_ASSUME_C_LOCALE
18 | 
19 | #include <boost/lexical_cast.hpp>
20 | 
21 | using boost::lexical_cast;
22 | 
23 | #endif
24 | 


--------------------------------------------------------------------------------
/graehl/shared/force_link.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl-http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | /** \file
15 | 
16 |     avoid elimination of dead symbols while static linking.
17 | */
18 | 
19 | #ifndef FORCE_LINK_JG_2015_03_23_HPP
20 | #define FORCE_LINK_JG_2015_03_23_HPP
21 | #pragma once
22 | 
23 | #include <cstdlib>
24 | 
25 | namespace graehl {
26 | 
27 | static void force_link(void* p) {
28 |   static volatile std::size_t forced_link;
29 |   forced_link ^= (std::size_t)p;
30 | }
31 | 
32 | template <class C>
33 | static void force_link_class() {
34 |   static C f;
35 |   force_link(&f);
36 | }
37 | 
38 | #define GRAEHL_FORCE_LINK_CLASS(x) graehl::force_link_class<x>();
39 | 
40 | 
41 | }
42 | 
43 | #endif
44 | 


--------------------------------------------------------------------------------
/graehl/shared/format.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl-http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #ifndef GRAEHL__SHARED__FORMAT_HPP
15 | #define GRAEHL__SHARED__FORMAT_HPP
16 | #pragma once
17 | 
18 | #include <boost/format.hpp>
19 | #include <iomanip>
20 | 
21 | namespace fm {
22 | using std::string;
23 | using '\n';
24 | using std::flush;
25 | using boost::format;
26 | using boost::io::group;
27 | using boost::io::str;
28 | using std::setfill;
29 | using std::setw;
30 | using std::hex;
31 | using std::dec;
32 | using std::showbase;
33 | using std::left;
34 | using std::right;
35 | using std::internal;
36 | }
37 | 
38 | #define FSTR(x, y) fm::str(fm::format(x) % y)
39 | 
40 | 
41 | #endif
42 | 


--------------------------------------------------------------------------------
/graehl/shared/ftoa_append.hpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/graehl/shared/ftoa_append.hpp


--------------------------------------------------------------------------------
/graehl/shared/glibc_memcpy.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | /// http://www.win.tue.nl/~aeb/linux/misc/gcc-semibug.html
3 | #if !defined(__APPLE__) && defined(__linux__) && defined(__GNUC__) && defined(__LP64__) \
4 |     && !defined(USE_LATEST_MEMCPY) /* only under 64 bit gcc */
5 | __asm__(".symver memcpy,memcpy@GLIBC_2.2.5");
6 | #endif
7 | 


--------------------------------------------------------------------------------
/graehl/shared/have_64_bits.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl-http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | /** \file
15 | 
16 |     detect: pointer and size_t are 64 bits
17 | */
18 | 
19 | #ifndef GRAEHL_SHARED__HAVE_64_BITS_HPP
20 | #define GRAEHL_SHARED__HAVE_64_BITS_HPP
21 | #pragma once
22 | 
23 | #ifndef HAVE_64_BITS
24 | 
25 | // Check windows
26 | #if defined(_WIN32) || defined(_WIN64)
27 | #if defined(_WIN64)
28 | #define HAVE_64_BITS 1
29 | #else
30 | #define HAVE_64_BITS 0
31 | #endif
32 | #elif __x86_64__ || __ppc64__
33 | #define HAVE_64_BITS 1
34 | #else
35 | #define HAVE_64_BITS 0
36 | #endif
37 | 
38 | #endif  // HAVE_64_BITS
39 | 
40 | #endif
41 | 


--------------------------------------------------------------------------------
/graehl/shared/identity.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl - http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #ifndef GRAEHL_SHARED__IDENTITY_HPP
15 | #define GRAEHL_SHARED__IDENTITY_HPP
16 | 
17 | namespace graehl {
18 | 
19 | template <class V>
20 | struct identity
21 | {
22 |   typedef V argument_type;
23 |   typedef V result_type;
24 |   result_type operator()(argument_type a) const { return a; }
25 | };
26 | 
27 | template <class V>
28 | struct identity_ref
29 | {
30 |   typedef V argument_type;
31 |   typedef V result_type;
32 |   result_type const& operator()(argument_type const& a) const { return a; }
33 |   result_type & operator()(argument_type & a) const { return a; }
34 | };
35 | 
36 | // should be safe as identity<V const&>
37 | template <class V>
38 | struct identity_cref
39 | {
40 |   typedef V const& argument_type;
41 |   typedef V const& result_type;
42 |   result_type operator()(argument_type a) const { return a; }
43 | };
44 | 
45 | 
46 | }
47 | 
48 | 
49 | #endif
50 | 


--------------------------------------------------------------------------------
/graehl/shared/inline.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl-http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | /** \file
15 | 
16 |     force inlining on or off:
17 | 
18 |     ALWAYS_INLINE void f() { g(); }
19 |     NEVER_INLINE void f2() { g(); }
20 | */
21 | 
22 | #ifndef INLINE_JG_2014_11_12_HPP
23 | #define INLINE_JG_2014_11_12_HPP
24 | #pragma once
25 | 
26 | #ifndef ALWAYS_INLINE
27 | #if defined(__GNUC__) || defined(__clang__)
28 | #define ALWAYS_INLINE inline __attribute__((__always_inline__))
29 | #elif defined(_MSC_VER)
30 | #define ALWAYS_INLINE __forceinline
31 | #else
32 | #define ALWAYS_INLINE inline
33 | #endif
34 | #endif
35 | 
36 | #ifndef NEVER_INLINE
37 | #if defined(__GNUC__) || defined(__clang__)
38 | #define NEVER_INLINE __attribute__((__noinline__))
39 | #elif defined(_MSC_VER)
40 | #define NEVER_INLINE __declspec(noinline)
41 | #else
42 | #define NEVER_INLINE
43 | #endif
44 | #endif
45 | 
46 | #endif
47 | 


--------------------------------------------------------------------------------
/graehl/shared/karma_tostr.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl - http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #ifndef KARMA_GENERATE_HPP
15 | #define KARMA_GENERATE_HPP
16 | 
17 | #include <boost/spirit/karma.hpp>
18 | 
19 | namespace karma = boost::spirit::karma;
20 | 
21 | template <class T>
22 | bool tostr(std::string& str, T const& value)
23 | {
24 |   std::back_insert_iterator<std::string> sink(str);
25 |   return karma::generate(sink, value);
26 | }
27 | 
28 | template <class T>
29 | std::string tostr(T const& value)
30 | {
31 |   string str;
32 |   std::back_insert_iterator<std::string> sink(str);
33 |   karma::generate(sink, value);
34 |   return str;
35 | }
36 | 
37 | 
38 | #endif
39 | 


--------------------------------------------------------------------------------
/graehl/shared/lc_ascii.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl-http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | /** \file
15 | 
16 |     case-insensitive string keys
17 | */
18 | 
19 | #ifndef GRAEHL_SHARED__LC_ASCII_HPP
20 | #define GRAEHL_SHARED__LC_ASCII_HPP
21 | #pragma once
22 | 
23 | 
24 | namespace graehl {
25 | 
26 | inline char lc_ascii(char c) {
27 |   if (c >= 'A' && c <= 'Z') c -= ('A' - 'a');
28 |   return c;
29 | }
30 | 
31 | template <class String>
32 | String& lc_ascii_inplace(String& s) {
33 |   for (typename String::iterator i = s.begin(), e = s.end(); i != e; ++i) *i = lc_ascii(*i);
34 |   return s;
35 | }
36 | 
37 | template <class String>
38 | void append_lc_ascii(String& r, char const* s) {
39 |   while (*s) r.push_back(lc_ascii(*s++));
40 | }
41 | 
42 | template <class String>
43 | void set_lc_ascii(String& r, char const* s) {
44 |   r.clear();
45 |   append_lc_ascii(r, s);
46 | }
47 | 
48 | 
49 | }
50 | 
51 | #endif
52 | 


--------------------------------------------------------------------------------
/graehl/shared/lerp.hpp:
--------------------------------------------------------------------------------
 1 | /** \file
 2 | 
 3 |     fused multiply-and-add optimized linear interpolation
 4 | 
 5 |     https://en.wikipedia.org/wiki/FMA_instruction_set
 6 | 
 7 |     want compiler to enable FMA3 (3 arg) not FMA4 (4 arg)
 8 | */
 9 | 
10 | #ifndef LERP_JG_2015_06_17_HPP
11 | #define LERP_JG_2015_06_17_HPP
12 | #pragma once
13 | 
14 | namespace graehl {
15 | 
16 | template <class T>
17 | T fma(T a, T b, T c) {
18 |   return a * b + c;
19 | }
20 | 
21 | /// \return ta*a + (1-ta)*b, optimized for fma
22 | template <class T>
23 | T lerp(T a, T b, T ta) {
24 |   return fma(t, v1, fma(-t, v0, v0));
25 | }
26 | 
27 | 
28 | }
29 | 
30 | #endif
31 | 


--------------------------------------------------------------------------------
/graehl/shared/likely.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl-http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | /** \file
15 | 
16 |     branch prediction annotations. note: gcc already has some heuristics that guess
17 |     ok. add predictions only if you're sure or you benchmarked.
18 | 
19 |     usage: if (likely(a>b)) ; // meaning you expect a>b to be true.
20 | */
21 | 
22 | #ifndef LIKELY_GRAEHL_2015_10_21_HPP
23 | #define LIKELY_GRAEHL_2015_10_21_HPP
24 | #pragma once
25 | 
26 | /// standard-ish from linux kernel code but with a safe(ish) longer name:
27 | /// usage: if (likely_true(a>b)) ...
28 | /// meaning you /// expect a>b to be true.
29 | #ifdef _MSC_VER
30 | #define likely_true(x) (x)
31 | #define likely_false(x) (x)
32 | #else
33 | #define likely_true(x) __builtin_expect(!!(x), 1)
34 | #define likely_false(x) __builtin_expect(!!(x), 0)
35 | #endif
36 | #endif
37 | 


--------------------------------------------------------------------------------
/graehl/shared/lz4.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl-http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #ifndef GRAEHL__SHARED__LZ4_H
15 | #define GRAEHL__SHARED__LZ4_H
16 | #pragma once
17 | 
18 | #ifndef LZ4__INLINE
19 | #if defined(GRAEHL__SINGLE_MAIN)
20 | #define LZ4__INLINE 1
21 | #else
22 | #define LZ4__INLINE 0
23 | #endif
24 | #endif
25 | 
26 | namespace lz4 {
27 | #include "lz4.c"
28 | #include "lz4.h"
29 | 
30 | 
31 | }
32 | 
33 | #endif
34 | 


--------------------------------------------------------------------------------
/graehl/shared/maybe_update_bound.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl - http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #ifndef GRAEHL__SHARED__MAYBE_UPDATE_BOUND_HPP
15 | #define GRAEHL__SHARED__MAYBE_UPDATE_BOUND_HPP
16 | 
17 | namespace graehl {
18 | 
19 | // see also associative container versions in assoc_container.hpp
20 | 
21 | template <class To, class From>
22 | inline void maybe_increase_max(To &to, const From &from) {
23 |   if (to<from)
24 |     to = from;
25 | }
26 | 
27 | template <class To, class From>
28 | inline void maybe_decrease_min(To &to, const From &from) {
29 |   if (from<to)
30 |     to = from;
31 | }
32 | 
33 | } //graehl
34 | 
35 | 
36 | #endif
37 | 


--------------------------------------------------------------------------------
/graehl/shared/mean_field_normalize.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl - http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #ifndef GRAEHL__SHARED__MEAN_FIELD_NORMALIZE_HPP
15 | #define GRAEHL__SHARED__MEAN_FIELD_NORMALIZE_HPP
16 | 
17 | #include <digamma.hpp>
18 | #include <weight.h>
19 | 
20 | namespace graehl {
21 | 
22 | struct mean_field_scale
23 | {
24 |   bool linear; // if linear, then don't use alpha.  otherwise convert to exp(digamma(alpha+x))
25 |   double alpha;
26 | 
27 |   // returns exp(digamma(x))
28 |   template <class Real>
29 |   logweight<Real> operator()(logweight<Real> const& x) const
30 |   {
31 |     if (linear)
32 |       return x;
33 |     double r = x.getReal();
34 |     if (x < .0001) // until we can compute digamma in logspace, this will be the answer.  and, can't ask digamma(0), because it's negative inf.  but exp(-inf)=0
35 |       return 0;
36 |     logweight<Real> ret;
37 |     ret.setLn(digamma(alpha+r));
38 |   }
39 | };
40 | 
41 | }
42 | 
43 | 
44 | #endif
45 | 


--------------------------------------------------------------------------------
/graehl/shared/must_eof.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl - http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #ifndef GRAEHL_SHARED__MUST_EOF_HPP
15 | #define GRAEHL_SHARED__MUST_EOF_HPP
16 | 
17 | #include <stdexcept>
18 | #include <string>
19 | 
20 | namespace graehl {
21 | 
22 | template <class I>
23 | inline void must_eof(I &in, char const* msg="Expected end of input, but got: ")
24 | {
25 |   char c;
26 |   if (in >> c)
27 |     throw std::runtime_error(msg+std::string(1, c));
28 | }
29 | 
30 | }
31 | 
32 | 
33 | #endif
34 | 


--------------------------------------------------------------------------------
/graehl/shared/new_shared.hpp:
--------------------------------------------------------------------------------
 1 | /** \file
 2 | 
 3 |     make_shared but return a new pointer to shared_ptr that must be deleted, i.e.
 4 |     instead of new shared_ptr<T>(new T(...)), new_shared<T>(...)
 5 | */
 6 | 
 7 | #ifndef NEW_SHARED_GRAEHL_HPP
 8 | #define NEW_SHARED_GRAEHL_HPP
 9 | #pragma once
10 | 
11 | #include <memory>
12 | #include <utility>
13 | 
14 | namespace graehl {
15 | 
16 | /// equivalent to new shared_ptr<T>(new T(args...)), new_shared<T>(args...) but with make_shared allocation benefit
17 | /// perhaps C++2x will also allow the shared_ptr to be singly allocated contiguous to its implementation
18 | template <class T, class... A>
19 | std::shared_ptr<T> *new_shared(A&&... args) {
20 |   return new std::shared_ptr<T>(std::make_shared<T>(std::forward<A>(args)...));
21 | }
22 | 
23 | } // namespace graehl
24 | 
25 | #endif
26 | 


--------------------------------------------------------------------------------
/graehl/shared/no_locking.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl-http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #ifndef GRAEHL_SHARED__NO_LOCKING_HPP
15 | #define GRAEHL_SHARED__NO_LOCKING_HPP
16 | #pragma once
17 | 
18 | #include <boost/thread/mutex.hpp>
19 | 
20 | /*
21 | 
22 |   intent:
23 | 
24 |   template <class Locking=graehl::no_locking> // or graehl::locking
25 |   struct collection : private Locking
26 |   {
27 |   void some_operation()
28 |   {
29 |   typename Locking::lock(*this);
30 | // or bool do_lock=...;
31 | //    typename Locking::scoped_lock(*this, do_lock);
32 | // (locks if do_lock)
33 | }
34 | };
35 | */
36 | 
37 | namespace graehl {
38 | 
39 | struct no_locking {
40 |   typedef no_locking self_type;
41 |   typedef no_locking mutex_type;
42 |   struct guard_type {
43 |     guard_type(self_type const& l) {}
44 |     guard_type(self_type const& l, bool b) {}
45 |   };
46 | };
47 | 
48 | 
49 | }
50 | 
51 | #endif
52 | 


--------------------------------------------------------------------------------
/graehl/shared/nondet_random.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/graehl/shared/nondet_random.cpp


--------------------------------------------------------------------------------
/graehl/shared/noreturn.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl-http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | /** \file
15 | 
16 |     avoid no-return-value compiler warnings for infinite loops and throw that
17 |     will never return from a fn
18 | 
19 |     usage: void f() NORETURN
20 | 
21 |     c++11 alternative: void f() [[noreturn]] didn't work for me
22 | 
23 |     perhaps you can put NORETURN before or after the decl; after works
24 | */
25 | 
26 | #ifndef NORETURN_JG2012613_HPP
27 | #define NORETURN_JG2012613_HPP
28 | #pragma once
29 | 
30 | #if defined(__GNUC__) && __GNUC__ >= 3 || defined(__clang__)
31 | #define NORETURN __attribute__((noreturn))
32 | #else
33 | #define NORETURN
34 | #endif
35 | 
36 | #if defined(__clang__)
37 | #define ANALYZER_NORETURN _attribute__((analyzer_noreturn))
38 | #else
39 | #define ANALYZER_NORETURN
40 | #endif
41 | 
42 | #if defined(_MSC_VER)
43 | #define NORETURNPRE __declspec(noreturn)
44 | #else
45 | #define NORETURNPRE
46 | #endif
47 | 
48 | #endif
49 | 


--------------------------------------------------------------------------------
/graehl/shared/null_deleter.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl-http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | /** \file
15 | 
16 |     shared_ptr helper for nondeleting references (where the refcount is
17 |     meaningless, but you pay for it anyway to simplify - everything can be a
18 |     shared_ptr)
19 | */
20 | 
21 | #ifndef GRAEHL__SHARED__NULL_DELETER_HPP
22 | #define GRAEHL__SHARED__NULL_DELETER_HPP
23 | #pragma once
24 | 
25 | namespace graehl {
26 | 
27 | struct null_deleter {
28 |   template <class T>
29 |   void operator()(T const*) const {}
30 |   void operator()(void const*) const {}
31 | };
32 | 
33 | template <class V>
34 | std::shared_ptr<V> no_delete(V& v) {
35 |   return std::shared_ptr<V>(&v, null_deleter());
36 | }
37 | 
38 | template <class V>
39 | std::shared_ptr<V> no_delete(V* v) {
40 |   return std::shared_ptr<V>(v, null_deleter());
41 | }
42 | 
43 | 
44 | }
45 | 
46 | #endif
47 | 


--------------------------------------------------------------------------------
/graehl/shared/null_output_iterator.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl - http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #ifndef GRAEHL_SHARED__NULL_OUTPUT_ITERATOR_HPP
15 | #define GRAEHL_SHARED__NULL_OUTPUT_ITERATOR_HPP
16 | 
17 | #include <iterator>
18 | 
19 | namespace graehl {
20 | 
21 | struct null_output_iterator {
22 |   typedef std::output_iterator_tag iteratory_category;
23 |   typedef void value_type;
24 |   typedef void difference_type;
25 |   typedef void pointer;
26 |   typedef void reference;
27 |   template <class V>
28 |   void operator = (V const& v) const {}
29 |   null_output_iterator const& operator*() const { return *this; }
30 |   void operator++() const {}
31 |   void operator++(int) const {}
32 | };
33 | 
34 | }
35 | 
36 | 
37 | #endif
38 | 


--------------------------------------------------------------------------------
/graehl/shared/os_memory.hpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graehl/carmel/ff27a1497b28dfdcc7f785455bf9bd0c18c07681/graehl/shared/os_memory.hpp


--------------------------------------------------------------------------------
/graehl/shared/podcpy.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl - http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #ifndef GRAEHL_SHARED__PODCPY_HPP
15 | #define GRAEHL_SHARED__PODCPY_HPP
16 | 
17 | #include <cstring>
18 | 
19 | namespace graehl {
20 | 
21 | template <class P> inline
22 | void podset(P& dst, unsigned char c = 0)
23 | {
24 |   std::memset((void*)&dst, c, sizeof(dst));
25 | }
26 | 
27 | template <class P> inline
28 | void podzero(P& dst)
29 | {
30 |   std::memset((void*)&dst, 0, sizeof(dst));
31 | }
32 | 
33 | template <class P> inline
34 | P &podcpy(P& dst, P const& src)
35 | {
36 |   std::memcpy((void*)&dst, (void*)&src, sizeof(dst));
37 | }
38 | 
39 | }
40 | 
41 | 
42 | #endif
43 | 


--------------------------------------------------------------------------------
/graehl/shared/prefix_option.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl-http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | /** \file for boost program options with opt="long-name,l" "prefix-" =>
15 | "prefix-long-name" - short option is stripped to avoid conflict */
16 | 
17 | #ifndef GRAEHL_SHARED__PREFIX_OPTION_HPP
18 | #define GRAEHL_SHARED__PREFIX_OPTION_HPP
19 | #pragma once
20 | 
21 | 
22 | #include <string>
23 | 
24 | namespace graehl {
25 | 
26 | inline std::string prefix_option(std::string opt, std::string const& prefix = "") {
27 |   if (prefix.empty()) return opt;
28 |   std::string::size_type nopt = opt.size();
29 |   if (nopt > 2 && opt[nopt - 2] == ',') opt.resize(nopt - 2);
30 |   return prefix + opt;
31 | }
32 | 
33 | inline std::string suffix_option(std::string opt, std::string const& suffix = "") {
34 |   if (suffix.empty()) return opt;
35 |   std::string::size_type nopt = opt.size();
36 |   if (nopt > 2 && opt[nopt - 2] == ',') opt.resize(nopt - 2);
37 |   return opt + suffix;
38 | }
39 | 
40 | 
41 | }
42 | 
43 | #endif
44 | 


--------------------------------------------------------------------------------
/graehl/shared/printlines.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl - http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #ifndef GRAEHL_SHARED__PRINTLINES_HPP
15 | #define GRAEHL_SHARED__PRINTLINES_HPP
16 | 
17 | namespace graehl {
18 | 
19 | template <class I, class O>
20 | void printlines(O &o, I i, I end, const char *endl)
21 | {
22 |   for (; i!=end; ++i)
23 |     o << *i << endl;
24 | }
25 | 
26 | template <class I, class O>
27 | void printlines(O &o, I const& i, const char *endl="\n")
28 | {
29 |   printlines(o, i.begin(), i.end(), endl);
30 | }
31 | 
32 | }
33 | 
34 | #endif
35 | 


--------------------------------------------------------------------------------
/graehl/shared/program_options_config_example.txt:
--------------------------------------------------------------------------------
1 | log-file=/tmp/log.example.txt
2 | 


--------------------------------------------------------------------------------
/graehl/shared/sample/sample.graph:
--------------------------------------------------------------------------------
1 | 6
2 | (0 5 1)
3 | (5 1 5) (5 2 10) (1 2 9) (3 4 2) (4 2 3) (5 3 3)
4 | (5 4 8) (2 4 100) (3 2 6)
5 | (0 0 20)
6 | 


--------------------------------------------------------------------------------
/graehl/shared/sample/sample.lattice:
--------------------------------------------------------------------------------
1 | 5
2 | (0 1 2)
3 | (0 2 4)
4 | (1 3 6)
5 | (1 2 3)
6 | (2 4 5)
7 | (2 3 3)
8 | (3 4 1)
9 | 


--------------------------------------------------------------------------------
/graehl/shared/sample/sample.lattice.carmel:
--------------------------------------------------------------------------------
1 | 5
2 | (0 1 2)
3 | (0 2 4)
4 | (1 3 6)
5 | (1 2 3)
6 | (2 4 5)
7 | (2 3 3)
8 | (3 4 1)
9 | 


--------------------------------------------------------------------------------
/graehl/shared/sample/simple.cycle.graph:
--------------------------------------------------------------------------------
1 | 2
2 | (0 1 1)
3 | (0 0 20)
4 | (1 0 5)
5 | 


--------------------------------------------------------------------------------
/graehl/shared/semiring.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl - http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | // unfinished, unused
15 | #ifndef SEMIRING_HPP
16 | #define SEMIRING_HPP
17 | 
18 | 
19 | /*template <class C>
20 | struct semiring_traits {
21 |     typedef C value_type;
22 |     static inline value_type exponential(double exponent) {
23 |         return exponential<C>(exponent);
24 |     }
25 |     static inline value_type exponential(float exponent) {
26 |         return exponential<C>(exponent);
27 |     }
28 | };
29 | */
30 | 
31 | #include <graehl/shared/weight.h>
32 | 
33 | 
34 | #endif
35 | 


--------------------------------------------------------------------------------
/graehl/shared/set_difference.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl - http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #ifndef GRAEHL_SHARED__SET_DIFFERENCE_HPP
15 | #define GRAEHL_SHARED__SET_DIFFERENCE_HPP
16 | 
17 | #include <set>
18 | 
19 | namespace graehl {
20 | 
21 | template <class K>
22 | struct set_difference : public std::set<K>
23 | {
24 |   void add(K const& k)
25 |   {
26 |     this->insert(k);
27 |   }
28 |   bool subtract(K const& k)
29 |   {
30 |     return this->erase(k);
31 |   }
32 | };
33 | 
34 | 
35 | }
36 | 
37 | #endif
38 | 


--------------------------------------------------------------------------------
/graehl/shared/stacktrace.hpp:
--------------------------------------------------------------------------------
 1 | /** \file
 2 | 
 3 |  like 'backtrace' in gdb
 4 | 
 5 |  (linux only so far)
 6 | */
 7 | 
 8 | #ifndef STACKTRACE_GRAEHL_2016_08_29_HPP
 9 | #define STACKTRACE_GRAEHL_2016_08_29_HPP
10 | #pragma once
11 | 
12 | #include <iostream>
13 | 
14 | #ifdef __linux__
15 | #include <execinfo.h>
16 | #include <csignal>
17 | #endif
18 | 
19 | namespace graehl {
20 | 
21 | static const int MAX_TRACE_DEPTH = 255;
22 | 
23 | inline void stacktrace(std::ostream& o = std::cerr) {
24 | #ifdef __linux__
25 |   void* trace[MAX_TRACE_DEPTH];
26 |   int trace_size = ::backtrace(trace, MAX_TRACE_DEPTH);
27 |   char** messages = ::backtrace_symbols(trace, trace_size);
28 |   o << "\n!!Stack backtrace:\n";
29 |   for (int i = 0; i < trace_size; ++i) o << "!! " << messages[i] << '\n';
30 | #endif
31 | }
32 | 
33 | 
34 | }
35 | 
36 | #endif
37 | 


--------------------------------------------------------------------------------
/graehl/shared/static_fgets_buf.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl - http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | /** \file
15 | 
16 |  .
17 | */
18 | 
19 | #ifndef STATIC_FGETS_BUF_JG_2014_12_31_H
20 | #define STATIC_FGETS_BUF_JG_2014_12_31_H
21 | #pragma once
22 | 
23 | #ifndef READ_BUFSIZE
24 | #define READ_BUFSIZE (8 * 1024 * 1024)
25 | #endif
26 | 
27 | #ifndef FGETS_UNLOCKED
28 | #if _GNU_SOURCE
29 | #define FGETS_UNLOCKED fgets_unlocked
30 | #else
31 | #define FGETS_UNLOCKED fgets
32 | #endif
33 | #endif
34 | 
35 | static char buf[READ_BUFSIZE], bufstdio[READ_BUFSIZE];
36 | #ifndef FALSE_SHARING_PROTECT
37 | #define FALSE_SHARING_PROTECT 72
38 | #endif
39 | 
40 | static inline void set_static_bufstdio(FILE *fp) {
41 |   setvbuf(fp, bufstdio + FALSE_SHARING_PROTECT, _IOFBF, READ_BUFSIZE - FALSE_SHARING_PROTECT);
42 | }
43 | 
44 | #endif
45 | 


--------------------------------------------------------------------------------
/graehl/shared/static_itoa.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl - http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #ifndef GRAEHL_SHARED__STATIC_ITOA_H
15 | #define GRAEHL_SHARED__STATIC_ITOA_H
16 | 
17 | #include <graehl/shared/itoa.hpp>
18 | #include <graehl/shared/threadlocal.hpp>
19 | 
20 | 
21 | namespace graehl {
22 | 
23 | namespace {
24 | static const int utoa_bufsize = 40; // 64bit safe.
25 | static const int utoa_bufsizem1 = utoa_bufsize-1; // 64bit safe.
26 | THREADLOCAL char utoa_buf[utoa_bufsize]; // note: 0 initialized
27 | }
28 | 
29 | inline char *static_utoa(unsigned n) {
30 |   assert(utoa_buf[utoa_bufsizem1]==0);
31 |   return utoa(utoa_buf+utoa_bufsizem1, n);
32 | }
33 | 
34 | inline char *static_itoa(int n) {
35 |   assert(utoa_buf[utoa_bufsizem1]==0);
36 |   return itoa(utoa_buf+utoa_bufsizem1, n);
37 | }
38 | 
39 | }//graehl
40 | 
41 | #endif
42 | 


--------------------------------------------------------------------------------
/graehl/shared/string.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl - http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | // string: as opposed to a tree.
15 | #ifndef STRING_HPP
16 | #define STRING_HPP
17 | 
18 | #include <graehl/shared/dynamic_array.hpp>
19 | 
20 | #include <graehl/tt/ttconfig.hpp>
21 | #include <iostream>
22 | #include <graehl/shared/myassert.h>
23 | #include <graehl/shared/genio.h>
24 | //#include <vector>
25 | #include <graehl/shared/dynamic_array.hpp>
26 | #include <algorithm>
27 | #include <functional>
28 | 
29 | #include <graehl/shared/tree.hpp>
30 | 
31 | namespace graehl {
32 | 
33 | template <class L, class Alloc=std::allocator<L> > struct String : public array<L,Alloc> {
34 |   typedef L Label;
35 | };
36 | 
37 | }
38 | 
39 | #endif
40 | 


--------------------------------------------------------------------------------
/graehl/shared/string_tr.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl - http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #ifndef STRING_TR_HPP
15 | #define STRING_TR_HPP
16 | 
17 | 
18 | namespace graehl {
19 | 
20 | // [] to {}
21 | 
22 | template <class O,class S,class F>
23 | void write_tr(O &o,S const& s,F map) {
24 |   for (typename S::const_iterator i=s.begin(),e=s.end();i!=e;++i)
25 |     o<<map(*i);
26 | }
27 | 
28 | template <class S,class F>
29 | S tr(S const& s,F map) {
30 |   S r(s);
31 |   for (typename S::iterator i=s.begin(),e=s.end();i!=e;++i)
32 |     *i=map(*i);
33 |   return r;
34 | }
35 | 
36 | }
37 | 
38 | 
39 | 
40 | #endif
41 | 


--------------------------------------------------------------------------------
/graehl/shared/stringkey.cc:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl - http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #include "stringkey.h"
15 | 
16 | namespace graehl {
17 | StringKey StringKey::empty("");
18 | }
19 | 


--------------------------------------------------------------------------------
/graehl/shared/strstrsep.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl - http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #ifndef STRSTRSEP_H
15 | #define STRSTRSEP_H
16 | 
17 | char *strstrsep(char **stringp, const char *delim);
18 | char *strsep_(char **stringp, const char *delims);
19 | 
20 | /**
21 |    strsep(stringp, " \t\n").
22 | */
23 | inline char* strsepspaces(char **stringp) {
24 |   char* s;
25 |   if ((s = *stringp) == NULL) return NULL;
26 |   char c;
27 |   for (char *tok = s;;) {
28 |     c = *s++;
29 |     if (!c) {
30 |       *stringp = NULL;
31 |       return tok;
32 |     } else if (c == ' ' || c == '\n' || c == '\t') {
33 |       s[-1] = 0;
34 |       *stringp = s;
35 |       return tok;
36 |     }
37 |   }
38 |   /* NOTREACHED */
39 | }
40 | 
41 | static inline char *unstrstr(char *lasttok, char *begin) {
42 |   while (--lasttok >= begin) {
43 |     if (*lasttok == 0)
44 |       *lasttok = ' ';
45 |   }
46 |   return begin;
47 | }
48 | 
49 | 
50 | #endif
51 | 


--------------------------------------------------------------------------------
/graehl/shared/test/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	g++ -ggdb -O0 -I/home/graehl/t -I/home/graehl/src/boost tree.cpp -o tree
3 | #	g++ -I.. -I../../boost slist.cpp -o slist
4 | #	g++ -I.. -I../../boost weight_underflow.cpp -o weight_underflow
5 | 


--------------------------------------------------------------------------------
/graehl/shared/test/backtrace.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl - http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #define DEBUG
15 | #define MAIN
16 | 
17 | #include "debugprint.hpp"
18 | 
19 | void joe() {
20 |     BACKTRACE;
21 |     throw std::logic_error("something went wrong.");
22 | }
23 | 
24 | void murphy() {
25 |     BACKTRACE;
26 |     joe();
27 | }
28 | 
29 | int main()
30 | {
31 |     DBPC2("hi",1);
32 |     try {
33 |         BACKTRACE;
34 |         murphy();
35 |     } catch (std::exception &e) {
36 |         std::cerr << e.what() << std::endl;
37 |         BackTrace::print_on(std::cerr);
38 |     }
39 | }
40 | 


--------------------------------------------------------------------------------
/graehl/shared/test/epsilon.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl - http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #include <graehl/shared/epsilon.hpp>
15 | 
16 | #include <fstream>
17 | #include <iostream>
18 | #include <iomanip>
19 | #include <cstdlib>
20 | 
21 | using namespace std;
22 | using namespace graehl;
23 | 
24 | template <class Float>
25 | void showApart(Float a, Float b) {
26 |   cout<<"a="<<a<<" b="<<b<<" ieee_apart(a,b)="<<ieee_apart(a,b)<<"\n";
27 | }
28 | 
29 | 
30 | int main(int argc, char *argv[])
31 | {
32 |   if (argc!=3) {
33 |     cerr<<"given two floating point arguments; prints # of representable float, double between them\n";
34 |     return -1;
35 |   }
36 |   double a=atof(argv[1]);
37 |   double b=atof(argv[2]);
38 |   showApart((float)a,(float)b);
39 |   showApart(a,b);
40 | }
41 | 


--------------------------------------------------------------------------------
/graehl/shared/test/make.sh:
--------------------------------------------------------------------------------
1 | set -x
2 | g++ $* -O0 -ffast-math -I.. weight_underflow.cpp -I../../boost -o weight_underflow && ./weight_underflow
3 | #g++ -DSINGLE_PRECISION -I.. weight_underflow.cpp -I../../boost -o weight_underflow_single
4 | 
5 | 


--------------------------------------------------------------------------------
/graehl/shared/test/make_kbest.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | g++ $* -I/cache/boost -I.. LazyKbestTrees_test.cpp -o LazyKbestTrees_test && ./LazyKbestTrees_test
3 | 


--------------------------------------------------------------------------------
/graehl/shared/test/tree.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl - http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #include <iostream>
15 | 
16 | #define MAIN
17 | //#define SINGLE_PRECISION
18 | #define DOUBLE_PRECISION
19 | 
20 | #include <graehl/shared/tree.hpp>
21 | #include <graehl/shared/string_to.hpp>
22 | 
23 | int main(int argc, char *argv[])
24 | {
25 |     using namespace graehl;
26 |     using namespace std;
27 |     
28 |     if (argc<2) {
29 |         cerr<<"argument: tree with int labels";
30 |         return -1;
31 |     }
32 |     tree<int> t;
33 |     std::string s(argv[1]);
34 |     string_to(argv[1],t);
35 |     cout << t << "\n";
36 |     return 0;        
37 | }
38 | 


--------------------------------------------------------------------------------
/graehl/shared/the_null_ostream.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl - http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #ifndef GRAEHL__SHARED__THE_NULL_OSTREAM_HPP
15 | #define GRAEHL__SHARED__THE_NULL_OSTREAM_HPP
16 | 
17 | #include <graehl/shared/null_ostream.hpp>
18 | 
19 | #ifdef GRAEHL__SINGLE_MAIN
20 | # define GRAEHL__NULL_OSTREAM_MAIN
21 | #endif
22 | 
23 | #ifdef GRAEHL__NULL_OSTREAM_MAIN
24 | null_ostream the_null_ostream;
25 | #else
26 | /// singleton/constant (only need one)
27 | extern null_ostream the_null_ostream;
28 | #endif
29 | 
30 | #endif
31 | 


--------------------------------------------------------------------------------
/graehl/shared/time_space_report.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl - http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #ifndef GRAEHL__SHARED__TIME_SPACE_REPORT_HPP
15 | #define GRAEHL__SHARED__TIME_SPACE_REPORT_HPP
16 | 
17 | #include <graehl/shared/time_report.hpp>
18 | #include <graehl/shared/memory_stats.hpp>
19 | 
20 | namespace graehl {
21 | 
22 | struct time_space_change
23 | {
24 |   static char const* default_desc()
25 |   { return "\ntime and memory used: "; }
26 |   time_change tc;
27 |   memory_change mc;
28 |   void print(std::ostream &o) const
29 |   {
30 |     o << tc << ", memory " << mc;
31 |   }
32 | 
33 |   typedef time_space_change self_type;
34 |   TO_OSTREAM_PRINT
35 | };
36 | 
37 | typedef auto_report<time_space_change> time_space_report;
38 | 
39 | }
40 | 
41 | 
42 | #endif
43 | 


--------------------------------------------------------------------------------
/graehl/shared/unimplemented.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl - http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #ifndef GRAEHL_SHARED__UNIMPLEMENTED_HPP
15 | #define GRAEHL_SHARED__UNIMPLEMENTED_HPP
16 | 
17 | #include <stdexcept>
18 | 
19 | namespace graehl {
20 | 
21 | struct unimplemented_exception : public std::runtime_error
22 | {
23 |   unimplemented_exception(char const* c) : std::runtime_error(c) {  }
24 | };
25 | 
26 | inline void unimplemented(char const* m="unimplemented") {
27 |   throw unimplemented_exception(m);
28 | }
29 | 
30 | }
31 | 
32 | #endif
33 | 


--------------------------------------------------------------------------------
/graehl/shared/warning_pop.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl-http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #include <graehl/shared/warning_compiler.h>
15 | #ifdef _MSC_VER
16 | #pragma warning(pop)
17 | #elif defined(__clang__)
18 | #pragma clang diagnostic pop
19 | #elif HAVE_DIAGNOSTIC_PUSH
20 | #pragma GCC diagnostic pop
21 | 
22 | #endif
23 | 


--------------------------------------------------------------------------------
/graehl/shared/warning_push.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl-http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #include <graehl/shared/warning_compiler.h>
15 | #ifdef _MSC_VER
16 | #pragma warning(push)
17 | #elif defined(__clang__)
18 | #pragma clang diagnostic push
19 | #elif HAVE_DIAGNOSTIC_PUSH
20 | #pragma GCC diagnostic push
21 | 
22 | #endif
23 | 


--------------------------------------------------------------------------------
/graehl/shared/weight.cc:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl - http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #include <graehl/shared/glibc_memcpy.hpp>
15 | #include <graehl/shared/weight.h>
16 | 
17 | namespace graehl {
18 | 
19 | }
20 | 


--------------------------------------------------------------------------------
/sblm/Makefile:
--------------------------------------------------------------------------------
 1 | TRUNK=../..
 2 | LOCAL=/home/nlg-02/graehl/isd/hpc-opteron
 3 | BOOST=$(LOCAL)/include
 4 | BOOSTLIB=$(LOCAL)/lib
 5 | 
 6 | vpath %.cpp .:$(TRUNK)/xrsparse/src
 7 | 
 8 | clean:
 9 | 	rm -f *.o xrs-pcfg-events
10 | 
11 | xrs-pcfg-events: xrs.cpp xrs-pcfg-events.cpp
12 | 	g++ -o $@ $^ -I$(TRUNK)/xrsparse -I $(TRUNK)/gusc -I$(BOOST) -L$(BOOSTLIB) -lboost_thread -pthread
13 | 


--------------------------------------------------------------------------------
/sblm/README:
--------------------------------------------------------------------------------
 1 | lexical items are quoted as in sbmt rules. they're lexical because they're non-variable tree leaves. but in the event file we strip off the variable prefix. so we leave the quotes to distinguish the 2 in PCFG rhs
 2 | 
 3 | had-pcfg-probs - hadoop driver. to test: local=1 ~/blobs/sblm/latest/had-pcfg-probs 1000.eng-parse
 4 | 1000.eng-parse - some trees in ghkm-input format.
 5 | pcfg-map - output sblm pcfg events
 6 | fast-lhs-sums-map - produce sblm lhs counts
 7 | cat-pcfg-for-divide - produce event count lhs-sum. prefaced with (TOTAL_NT) and (TOTAL_LEX) sums for unigram bo.
 8 | add-pcfg-feature - for had-rules, output id\tfeats\n
 9 | 
10 | had-rules --pcfg=training.pcfg-counts
11 | 


--------------------------------------------------------------------------------
/sblm/TODO:
--------------------------------------------------------------------------------
 1 | test everything.
 2 | 
 3 | had-* works
 4 | add-* works
 5 | 
 6 | what about had-rules pipeline integration? - test it, may work. argument is had-rules --pcfg=training.pcfg-counts or had-rules -p training.pcfg-counts
 7 | 
 8 | length distribution in pcfg backoff - exponential p(stop)?
 9 | 
10 | unigram backoff given parent, child index -> parent,* -> *
11 | 
12 | no. either you saw rewrite
13 | 
14 | backoff features get fixed weight, or some fixed method?
15 | 
16 | SGT? other smoothing of counts per lhs? binned counts? #1count pcfg rewrites feat?
17 | 
18 | nice thing about non-tuned params: measure ppx of data
19 | 
20 | validate smoothing methods using held-out trees.
21 | 
22 | 


--------------------------------------------------------------------------------
/sblm/count.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import sys, itertools
 3 | 
 4 | # count.py
 5 | # input:  key \t ... \t count
 6 | # output: key \t sum
 7 | 
 8 | def stdinfields():
 9 |     for line in sys.stdin:
10 |         yield line.rstrip().split("\t")
11 | 
12 | if __name__ == "__main__":
13 |     for (key,records) in itertools.groupby(stdinfields(), lambda r: r[0]):
14 |         sumcount = sum(int(r[-1]) for r in records)
15 |         print "%s\t%s" % (key, sumcount)
16 | 
17 | 


--------------------------------------------------------------------------------
/sblm/dumpx.py:
--------------------------------------------------------------------------------
1 | ../gextract/dumpx.py


--------------------------------------------------------------------------------
/sblm/example.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env pypy
 2 | #-*- python -*-
 3 | 
 4 | #hadoop mapper for PCFG: sbmt training format tree input -> parent children\t1
 5 | 
 6 | version="0.1"
 7 | 
 8 | test=True
 9 | test_in='1000.eng-parse'
10 | default_in=None
11 | 
12 | import os,sys
13 | sys.path.append(os.path.dirname(sys.argv[0]))
14 | 
15 | import unittest
16 | 
17 | import tree
18 | import optparse
19 | 
20 | from graehl import *
21 | from dumpx import *
22 | 
23 | ### main:
24 | 
25 | def main(opts):
26 |     log("pcfg-map v%s"%version)
27 |     log(' '.join(sys.argv))
28 | 
29 | import optfunc
30 | @optfunc.arghelp('input','input file here (None = STDIN should be default in production)')
31 | 
32 | def options(input=default_in,test=test):
33 |     if test:
34 |         sys.argv=sys.argv[0:1]
35 |         input=test_in
36 |     main(Locals())
37 | 
38 | optfunc.main(options)
39 | 
40 | 


--------------------------------------------------------------------------------
/sblm/fast-lhs-sums-map:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # faster: fast-lhs-sums-map | sort | count.py
 4 | # warning: set high enough timeout! you will see no output until end.
 5 | # TODO:
 6 | # TODO: cause output to happen every N sec. instead, stderr progress
 7 | 
 8 | # slower: lhs-sums-map | precombine.py | count.py | sort | count.py
 9 | 
10 | # input: LHS ... count
11 | # (any whitespace terminates LHS)
12 | 
13 | # output: LHS\tSUM
14 | 
15 | # could have made output repeat LHS (preserve original line incl. exact whitespace). but didn't.
16 | 
17 | my %c;
18 | my $lil=1000;
19 | my $big=$lil*70;
20 | select STDERR;
21 | $|=1;
22 | select STDOUT;
23 | while(<>) {
24 |     print STDERR "." unless $. % $lil;
25 |     print STDERR "$.\n" unless $. % $big;
26 | #    my ($r,$rest)=split / /,$_,2;
27 |     /^(\S+).*\t(\S+)\s*$/ or die "expected lhs,...,TAB,count,NEWLINE in $_";
28 |     $c{$1}+=$2;
29 | }
30 | print STDERR "\nDONE.\n";
31 | for (keys %c) {
32 | #    &debug('event',$_,$c{$_});
33 |     print "$_\t$c{$_}\n";
34 | }
35 | 
36 | 


--------------------------------------------------------------------------------
/sblm/graehl.py:
--------------------------------------------------------------------------------
1 | ../gextract/graehl.py


--------------------------------------------------------------------------------
/sblm/lhs-sums-map:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # input: LHS ...
 4 | # (any whitespace terminates LHS)
 5 | 
 6 | # output: LHS\t...
 7 | 
 8 | # could have made output repeat LHS (preserve original line incl. exact whitespace). but didn't.
 9 | 
10 | while(<>) {
11 |     my ($r,$rest)=split ' ',$_,2;
12 |     print $r,"\t",$rest;
13 | }
14 | 


--------------------------------------------------------------------------------
/sblm/optfunc.py:
--------------------------------------------------------------------------------
1 | ../gextract/optfunc.py


--------------------------------------------------------------------------------
/sblm/pcfg-backoff:
--------------------------------------------------------------------------------
 1 | #-*- python -*-
 2 | 
 3 | # using backoff.py toolkit (should eventually allow mapreduce) for training and evaluating a simple PCFG sblm
 4 | 
 5 | import os,sys
 6 | sys.path.append(os.path.realpath(os.path.dirname(sys.argv[0])))
 7 | 
 8 | from graehl import *
 9 | from dumpx import *
10 | from pcfg import *
11 | from backoff import *
12 | 
13 | class PCFG(Model):
14 |     pass
15 | 
16 | optfunc.main(backoff_main_opts)
17 | 


--------------------------------------------------------------------------------
/sblm/pcfg-map:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env pypy
 2 | #-*- python -*-
 3 | 
 4 | #hadoop mapper for PCFG:
 5 | #sbmt training format trees on stdin (or -input)
 6 | #print PARENT CHILDREN+\t1\n
 7 | 
 8 | version="0.1"
 9 | 
10 | test=True
11 | test_in='10.eng-parse'
12 | default_in='-'
13 | 
14 | import os,sys
15 | sys.path.append(os.path.realpath(os.path.dirname(sys.argv[0])))
16 | 
17 | import unittest
18 | 
19 | import tree
20 | import optparse
21 | 
22 | from graehl import *
23 | from dumpx import *
24 | from pcfg import *
25 | 
26 | 
27 | ### main:
28 | 
29 | def print_pcfg_event(t,digit2at=True,out=sys.stdout):
30 |     ev=sbmt_lhs_pcfg_event(t,digit2at)
31 | #    if ev is None: return
32 |     out.write(event2str(ev))
33 |     out.write("\t1\n")
34 | 
35 | 
36 | def main(opts):
37 |     log("pcfg-map v%s"%version)
38 |     log(' '.join(sys.argv))
39 |     for line in open_in(opts.input):
40 |         t=raduparse(line)
41 |         if t is None:
42 |             continue
43 |         for n in t.preorder():
44 |             print_pcfg_event(n,opts.digit2at,sys.stdout)
45 | 
46 | import optfunc
47 | @optfunc.arghelp('input','input file here (- means STDIN)')
48 | 
49 | def options(input=default_in,test=False,digit2at=True):
50 |     if test:
51 |         sys.argv=sys.argv[0:1]
52 |         input=test_in
53 |     main(Locals())
54 | 
55 | optfunc.main(options)
56 | 
57 | 


--------------------------------------------------------------------------------
/sblm/pcfg-map-precomb:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | d=$(readlink -nfs $(dirname $0))
3 | buflines=${buflines:-1000000}
4 | $d/pcfg-map  | $d/precombine.py -b $buflines | $d/count.py
5 | 


--------------------------------------------------------------------------------
/sblm/precombine.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import sys, getopt, collections
 3 | 
 4 | # precombine.py [-k <keysize>] [-b <bufsize>]
 5 | # prepare map output for input to a combiner
 6 | # <keysize> = number of key fields (default 1)
 7 | # <bufsize> = maximum number of records in buffer (default 100000)
 8 | 
 9 | if __name__ == "__main__":
10 |     opts, args = getopt.gnu_getopt(sys.argv[1:], 'k:b:')
11 |     opts = dict(opts)
12 | 
13 |     n_keys = int(opts.get('-k', 1))
14 |     buf_size = int(opts.get('-b', 100000))
15 | 
16 |     buf = collections.defaultdict(list)
17 |     count = 0
18 |     for line in sys.stdin:
19 |         record = line.rstrip().split('\t')
20 |         key = tuple(record[:n_keys])
21 |         buf[key].append(record)
22 |         count += 1
23 | 
24 |         if count >= buf_size:
25 |             for key, records in buf.iteritems():
26 |                 for record in records:
27 |                     print "\t".join(record)
28 |             buf.clear()
29 |             count = 0
30 | 
31 |     for key, records in buf.iteritems():
32 |         for record in records:
33 |             print "\t".join(record)
34 | 


--------------------------------------------------------------------------------
/sblm/rules:
--------------------------------------------------------------------------------
1 | NP-0(NESTED(JJ("bonus") PIG("pig") NESTED(JJ("bonus") x1:NP-0)) NN-2 ("volume") x0:NNS-0) -> "pommes" "frites" x0 ### count=3 id=122
2 | NP-0(NESTED(JJ("bonus") PIG("pig") NESTED(JJ("bonus") x0:NP-0)) NN-2 ("volume") x1:NNS-0) -> "pommes" x1 "frites" x0 ### count=3 id=121
3 | NP-0(NNP-0 ("agency") NNS-0("proposals")) -> "frites" ### id=124 count=1
4 | NP-0(NNP-0( "agency") NNS-0("proposals")) -> "pommes" "frites" ### id=123 count=3
5 | NP-0(NNP-0( "agency") NNS-0("proposals")) -> "frites" ### id=125 count=1
6 | NPB-0(NNP-0( "agency") NNS-0("proposals")) -> "pommes" "frites" ### id=126 count=3
7 | NPB-0(NNP-0( "agency") NNS-0("proposals")) -> "frites" ### id=127 count=1
8 | 


--------------------------------------------------------------------------------
/sblm/test.sh:
--------------------------------------------------------------------------------
 1 | . ~graehl/isd/hints/bashlib.sh
 2 | export PATH=~graehl/t/graehl/util:$PATH
 3 | in=${1:-10.eng-parse}
 4 | pre=${pre:-{$in%.eng-parse}.}
 5 | showvars_required in pre
 6 | export local=1
 7 | savemap=tmp.count.map iomr-hadoop $in ${pre}counted ./pcfg-map ./count.py
 8 | savemap=tmp.sums.map iomr-hadoop ${pre}counted ${pre}lhs-sums ./lhs-sums-map ./count.py
 9 | ./lhs-sums-map ${pre}counted | mapsort | ./count.py > ${pre}lhs-sums
10 | ./cat-pcfg-for-divide ${pre}lhs-sums ${pre}counted
11 | 


--------------------------------------------------------------------------------
/sblm/tree.py:
--------------------------------------------------------------------------------
1 | ../gextract/tree.py


--------------------------------------------------------------------------------
/util/.gdbinit:
--------------------------------------------------------------------------------
1 | catch throw
2 | r
3 | 


--------------------------------------------------------------------------------
/util/.gitignore:
--------------------------------------------------------------------------------
1 | .gitconfig
2 | 


--------------------------------------------------------------------------------
/util/.octaverc:
--------------------------------------------------------------------------------
1 | setenv("GNUTERM","x11")
2 | PS1 ">> "
3 | 


--------------------------------------------------------------------------------
/util/.svn.authorsfile:
--------------------------------------------------------------------------------
 1 | mhopkins = Mark Hopkins <mhopkins@sdl.com>
 2 | mdreyer = Markus Dryer <markus@sdl.com>
 3 | jmay = Jonathan May <jmay@sdl.com>
 4 | skohli = Saiyam Kohli <skohli@sdl.com>
 5 | zwang = Ziyuan Wang <zwang@sdl.com>
 6 | graehl = Jonathan Graehl <graehl@gmail.com>
 7 | jgraehl = Jonathan Graehl <graehl@gmail.com>
 8 | jturian = Joseph Turian <turian@iro.umontreal.ca>
 9 | marcu = Daniel Marcu <marcu@isi.edu>
10 | olegb = Oleg Botchkarev <olegb@languageweaver.com>
11 | ithayer = Ignacio Thayer <ithayer@gmail.com>
12 | pust = Michael Pust <pust@isi.edu>
13 | wwang = Wei Wang <wwang@languageweaver.com>
14 | quamrul = Quamrul Tipu <quamrul@microsoft.com>
15 | lhuang = Liang Huang <lhuang@isi.edu>
16 | 


--------------------------------------------------------------------------------
/util/C-small.cc:
--------------------------------------------------------------------------------
 1 | /**
 2 | 
 3 |  */
 4 | 
 5 | #define MAXCASES 100
 6 | #include "codejam.hh"
 7 | 
 8 | struct Case : CaseBase {
 9 |   I y;
10 |   void read() {
11 |     y = gety();
12 |   }
13 |   I gety() {
14 |     return -1;
15 |   }
16 |   void print() {
17 |     PUTU(y);
18 |   }
19 |   void show1() { cerr << " => " << y; }
20 |   void solve() {}
21 | };
22 | 
23 | CASES_MAIN(Case)
24 | 


--------------------------------------------------------------------------------
/util/addlicense.sh:
--------------------------------------------------------------------------------
 1 | addlicense() {
 2 |     tmpfile=$(mktemp ${tmpdir:-/tmp}/license.XXXXXX)
 3 |     for f in "$@"; do
 4 |         if grep -q "WARRANT" $f; then
 5 |             echo "$f had a WARRANT string - licensed already?"
 6 |             head -10 $f
 7 |             echo ...
 8 |             echo
 9 |         else
10 |             cat $license $f > $tmpfile && mv $tmpfile $f
11 |         fi
12 |     done
13 | }
14 | LICENSE_DIR=${LICENSE_DIR:-`dirname $0`}
15 | findc() {
16 |     find . -name '*.hpp' -o -name '*.cpp' -o -name '*.ipp' -o -name '*.cc' -o -name '*.hh' -o -name '*.c' -o -name '*.h'
17 | }
18 | addlicenses() {
19 |     local license=${1:-$LICENSE_DIR/license.txt}
20 |     if [[ -f $license ]] ; then
21 |         addlicense `findc`
22 |     else
23 |         echo "usage: cd src; addlisencec ../license.txt"
24 |     fi
25 | }
26 | 


--------------------------------------------------------------------------------
/util/alignment-links.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | usage = '''
 4 | Show e,f,a (tab separated) in a more legible format with alignment links after word {#i:j k}. e and f are on alternating lines of output
 5 | alignment a is pairs (s t)* where s 0-based indexes e, t 0-based indexes f. indices > #words (space sep) in e or f are considered NULL alignments (not aligned to any word) and ignored.
 6 | '''
 7 | 
 8 | import argparse
 9 | import sys
10 | 
11 | parser=argparse.ArgumentParser(description=usage)
12 | 
13 | def aword(i, a, w):
14 |     return '{%d:%s}%s' % (i, ' '.join(map(str, a)), w)
15 | 
16 | def awords(a, w):
17 |     return ' '.join(aword(i, a[i], w[i]) for i in range(len(w)))
18 | 
19 | 
20 | def forfiles(infiles):
21 |     for f in infiles:
22 |         for line in f:
23 |             forline(line)
24 | 
25 | def forline(line):
26 |     fields = line.split('\t')
27 |     if len(fields) < 3: return
28 |     S, T, A = fields[-3:]
29 |     S = S.split()
30 |     T = T.split()
31 |     A = A.split()
32 |     al = [(int(A[i]), int(A[i+1])) for i in range(0, len(A), 2)]
33 |     s2t = [[] for _ in S]
34 |     t2s = [[] for _ in T]
35 |     for s,t in al:
36 |         if s < len(S) and t < len(T):
37 |             s2t[s].append(t)
38 |             t2s[t].append(s)
39 |     print(awords(s2t, S))
40 |     print(awords(t2s, T))
41 |     print
42 | 
43 | def main(infiles):
44 |     forfiles([open(x, 'r') for x in infiles] if len(infiles) else [sys.stdin])
45 | 
46 | if __name__ == '__main__':
47 |     main(sys.argv[1:])
48 | 


--------------------------------------------------------------------------------
/util/bash.txt:
--------------------------------------------------------------------------------
 1 | inside [[
 2 | 
 3 | ||             logical or (double brackets only)
 4 | &&           logical and (double brackets only)
 5 | <            string comparison (no escaping necessary within double brackets)
 6 | -lt          numerical comparison
 7 | =             string equality
 8 | ==         string matching with globbing (double brackets only, see below)
 9 | =~            string matching with regular expressions (double brackets only , see below)
10 | -n            string is non-empty
11 | -z            string is empty
12 | -eq           numerical equality
13 | 
14 | -ne           numerical inequality
15 | 
16 | [ "$t" == abc* ]]         # true (globbing)
17 | [[ "$t" == "abc*" ]]       # false (literal matching)
18 | [[ "$t" =~ [abc]+[123]+ ]] # true (regular expression)
19 | [[ "$t" =~ "abc*" ]]       # false (literal matching)
20 | 
21 | Note, that starting with bash version 3.2 the regular or globbing expression
22 | must not be quoted. If your expression contains whitespace you can store it in a variable:
23 | r="a b+"
24 | [[ "a bbb" =~ $r ]]        # true
25 | 
26 | 
27 | Avoiding Temporary Files
28 | 
29 | Some commands expect filenames as parameters  so straightforward pipelining does not work.
30 | This is where <() operator comes in handy as it takes a command and transforms it into something
31 | which can be used as a filename:
32 | 
33 | # download and diff two webpages
34 | diff <(wget -O - url1) <(wget -O - url2)
35 | 


--------------------------------------------------------------------------------
/util/c++space:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -i~
 2 | while(<>) {
 3 |     if (/^( +)}\s*$/) {
 4 |         print $delay if ($delay);
 5 |         $indentlvl = length $1;
 6 |         $delay=$_;
 7 |     } else {
 8 |         if ($delay) {
 9 |             if (/^( +)(else.*)$/ && length($1)==$indentlvl) {
10 |                 $_="$1} $2\n";
11 |             } else {
12 |                 print $delay;
13 |             }
14 |             $delay=undef;
15 |         }
16 |         s/ (if|for|while|switch|foreach)\(/ $1 (/;
17 |         s/(?<=\S){/ {/;
18 |         s/\):/) :/;
19 |         print;
20 |     }
21 | }
22 | 


--------------------------------------------------------------------------------
/util/ccache-wrapper.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | name=`basename $0`
 3 | d=`dirname $0`
 4 | if [[ $d != . ]] ; then
 5 |   export PATH=`dirname $0`:$PATH
 6 | fi
 7 | ccbasename=${name#ccache-}
 8 | CCACHE_DIR=${CCACHE_DIR:-/local/graehl/ccache}
 9 | mkdir -p $CCACHE_DIR || CCACHE_DIR=
10 | if [[ -d $CCACHE_DIR ]] ; then
11 |     export CCACHE_DIR=$CCACHE_DIR
12 | fi
13 | exec ccache $ccbasename "$@"
14 | 


--------------------------------------------------------------------------------
/util/charvocab.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python3
 2 | from __future__ import print_function
 3 | import sys
 4 | import codecs
 5 | from collections import Counter
 6 | 
 7 | # python 2/3 compatibility
 8 | if sys.version_info < (3, 0):
 9 |     sys.stderr = codecs.getwriter('UTF-8')(sys.stderr)
10 |     sys.stdout = codecs.getwriter('UTF-8')(sys.stdout)
11 |     sys.stdin = codecs.getreader('UTF-8')(sys.stdin)
12 | else:
13 |     sys.stderr = codecs.getwriter('UTF-8')(sys.stderr.buffer)
14 |     sys.stdout = codecs.getwriter('UTF-8')(sys.stdout.buffer)
15 |     sys.stdin = codecs.getreader('UTF-8')(sys.stdin.buffer)
16 | 
17 | c = Counter()
18 | 
19 | for line in sys.stdin:
20 |     for char in line.rstrip("\r\n"):
21 |         c[char] += 1
22 | 
23 | for key,f in sorted(c.items(), key=lambda x: x[1], reverse=True):
24 |     print("%s %s # U+%04x"%(key, f, ord(key)))
25 | 


--------------------------------------------------------------------------------
/util/check-condor:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | condor_q -format "%s " ClusterId -format "%s " Iwd  -format "%s\n" Out \
 4 |   | sort -k2 \
 5 |   | perl -MTerm::ANSIColor -ane '$dir=$F[1]; if($dir ne $prev_dir) {print ($.>1?"\n":""); print color "bold blue"; ++$cnt_dirs; print "($cnt_dirs) $dir\n"; print color "reset"; $cnt_jobs=0} ++$cnt_jobs;
 6 | my $host="";
 7 | my $g = $F[2];
 8 | $g =~ s/.out(put)?$//;
 9 |  my $hostf = "$dir/$g.err.$F[0]";
10 |  if ( -f $hostf ) {
11 |  my $fh;
12 |  open $fh,$hostf or die "opening $hostf";
13 |  my $l = <$fh>;
14 | chomp $l;
15 |  $host="$1" if $l =~ /on (.*)$/;
16 | $hosts{$host} = 1;
17 |  }
18 | print "$cnt_jobs $F[0] | $F[2] $host\n"; $prev_dir=$dir;
19 | END { foreach (keys %hosts) {
20 | $out=`set -x;ssh -o StrictHostKeyChecking=no -o BatchMode=yes -o ConnectTimeout=20 $_ "top -b -n 1 -u $ENV{USER} | grep '"'^ *[0-9]'"' | grep -v top | grep -v grep | grep -v ssh | grep -v bash | grep -v perl"`;
21 | chomp $out;
22 | print "$_ $out\n";
23 | }}
24 | '
25 | 


--------------------------------------------------------------------------------
/util/close-ns-inplace.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -i~
 2 | 
 3 | my @lines;
 4 | 
 5 | while (<>) {
 6 |     push @lines, $_;
 7 |     if (eof(ARGV)) {
 8 |         $i=$#lines;
 9 |         $m=$i - 10;
10 |         $m = 0 if ($m < 0);
11 |         while($i > $m && $lines[$i] =~ /^#endif|^\s*$/) { --$i; }
12 |         if ($lines[$i] =~ s#^(}+) *//(ns|.*namespace).*$#$1#) {
13 |             if ($i >= 2) {
14 |                 if ($lines[$i-1] =~ /\S/) {
15 |                     $lines[$i-1] .= "\n\n";
16 |                 } elsif ($lines[$i-2] =~ /\S/) {
17 |                     $lines[$i-2] .= "\n";
18 |                 }
19 |             }
20 |         } else {
21 |             $e=$i;
22 |             while($i > $m && $lines[$i] =~ /^}\s*$/) { --$i; }
23 |             $nclose = $e - $i;
24 |             if ($nclose) {
25 |                 $s = $i;
26 |                 while ($s > 0 && $lines[$s] =~ /^\s*$/) { --$s; }
27 |                 ++$s;
28 |                 splice @lines,$s,$e-$s+1,"\n","\n",('}' x $nclose)."\n";
29 |             }
30 |         }
31 |         print for (@lines);
32 |         @lines=();
33 |     }
34 | }
35 | 


--------------------------------------------------------------------------------
/util/codejam-example.cc:
--------------------------------------------------------------------------------
 1 | #define MAXCASES 100
 2 | #include "codejam.hh"
 3 | 
 4 | struct Case : CaseBase {
 5 |   typedef map<u64, U> Runs;
 6 |   Runs runs;
 7 |   u64 N, K;
 8 |   u64 floorhalf;
 9 |   u64 ceilhalf;
10 |   u64 take() {
11 |     assert(!runs.empty());
12 |     auto i = runs.end();
13 |     --i;
14 |     u64 k = i->first;
15 |     U& n = i->second;
16 |     if (!--n) runs.erase(i);
17 |     return k;
18 |   }
19 |   void enter() {
20 |     u64 last = take();
21 |     assert(last);
22 |     --last;
23 |     floorhalf = last / 2;
24 |     ceilhalf = last - floorhalf;
25 |     add(floorhalf);
26 |     add(ceilhalf);
27 |   }
28 |   void add(u64 x) {
29 |     if (x) ++runs[x];
30 |   }
31 | 
32 |   void read() {
33 |     N = GETu64;
34 |     K = GETu64;
35 |     assert(N);
36 |     assert(K);
37 |     assert(K <= N);
38 |     runs.clear();
39 |     runs[N] = 1;
40 |   }
41 |   void print() {
42 |     putU(ceilhalf);
43 |     putsp();
44 |     putU(floorhalf);
45 |   }
46 |   void show1() {
47 |   }
48 |   void solve() {
49 |     for (u64 i = 0; i < K; ++i) enter();
50 |   }
51 | };
52 | 
53 | CASES_MAIN(Case)
54 | 


--------------------------------------------------------------------------------
/util/color.xetex:
--------------------------------------------------------------------------------
1 | \PassOptionsToPackage{dvipsnames,usenames}{color}
2 | \usepackage{color}
3 | 


--------------------------------------------------------------------------------
/util/config.fish:
--------------------------------------------------------------------------------
1 | function l --description 'List entire contents of directory using long format'
2 |   ls -lah $argv
3 | end
4 | 
5 | function lt --description 'List (by time) entire contents of directory using long format'
6 |   ls -lhrt $argv
7 | end
8 | 


--------------------------------------------------------------------------------
/util/datespan.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env pypy
 2 | from graehl import *
 3 | import sys
 4 | if __name__ == '__main__':
 5 |     for x in sys.argv[1:]:
 6 |         l,u=filedaterange(x,False)
 7 |         sys.stderr.write('%s %s\n'%(l,u))
 8 |         a=u-l if (u is not None and l is not None) else '???'
 9 |         l,u=(min(nonone((ctime(x),l))),max(nonone((mtime(x),u))))
10 |         sys.stderr.write('%s %s\n'%(l,u))
11 |         b=u-l
12 |         print '%s\n%s\n\n'%(a,b),
13 | 


--------------------------------------------------------------------------------
/util/dictdiff:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env pypy
 2 | #-*- python -*-
 3 | from graehl import *
 4 | from nbest import *
 5 | from dumpx import *
 6 | usage="""
 7 | given two input files, print sorted (optionally absolute) differences between their numeric f=val.
 8 | (note: multiply appearing keys use the last value)
 9 | """
10 | 
11 | def main(rest_=[],sortabs=False,bypercent=False,alphabetical=False,usage_=usage,header=True):
12 |    fnames=rest_
13 |    fs=map(readfrom,fnames)
14 |    if len(fs)!=2:
15 |       error(usage)
16 |    desc='NAME\t[%s] - [%s]\tPERCENT CHANGE'%fnames
17 |    ds=[]
18 |    for f in fs:
19 |       d=dict()
20 |       for l in readfrom(f):
21 |          for k,v in yieldfields_num(l):
22 |             d[k]=float(v)
23 |       ds.append(d)
24 |    #ds=[dict(flatten(yieldfields_num(l) for l in readfrom(f))) for f in fs] #dict to remove dups. flatten to combine across all file lines
25 |    da,db=ds
26 |    absf=abs if sortabs else identity
27 |    dd=[(k,v,v/max(abs(da.get(k,0.)),abs(db.get(k,0.)))) for (k,v) in dict_diff(da,db,diff).iteritems()]
28 |    if alphabetical:
29 |       keyf=identity
30 |    elif percent:
31 |       keyf=lambda x:(absf(x[2]),absf(x[1]))
32 |    else:
33 |       keyf=lambda x:absf(x[1])
34 |    dd.sort(key=keyf)
35 |    if header:
36 |       print desc
37 |    else:
38 |       info(desc)
39 |    for k,v,frac in dd:
40 |       print '%s\t%s\t%s'%(k,v,percent_change(frac))
41 | import optfunc
42 | optfunc.main(main)
43 | 


--------------------------------------------------------------------------------
/util/dotprod.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import sys
 3 | 
 4 | def readmap(f):
 5 |     m = {}
 6 |     if type(f) == str:
 7 |         f = open(f, 'r')
 8 |     for line in f:
 9 |         (k, v) = line.split()
10 |         if k in m:
11 |             raise "duplicate %s" % k
12 |         m[k] = float(v)
13 |     return m
14 | 
15 | 
16 | def dotprod(m1, m2):
17 |     sum = 0.0
18 |     for k in m1:
19 |         if k in m2:
20 |             v1 = m1[k]
21 |             v2 = m2[k]
22 |             sys.stderr.write('+ (%s * %s = %s) // %s\n' % (v1, v2, v1 * v2, k))
23 |             sum += v1 * v2
24 |     return sum
25 | 
26 | (f1, f2) = sys.argv[1:]
27 | m1 = readmap(f1)
28 | m2 = readmap(f2)
29 | print dotprod(m1, m2)
30 | 


--------------------------------------------------------------------------------
/util/dropcaches.c:
--------------------------------------------------------------------------------
 1 | /** for linux,
 2 | 
 3 |     echo 3 | sudo tee /proc/sys/vm/drop_caches
 4 | 
 5 |     might work but might prompt for password
 6 | 
 7 |     you can't +suid a shell script, but you could +suid this.
 8 | 
 9 | */
10 | #include <stdio.h>
11 | 
12 | char const* const dropname = "/proc/sys/vm/drop_caches";
13 | 
14 | int main() {
15 |   FILE *f = fopen(dropname, "w");
16 |   if (f) {
17 |     fprintf(f, "3\n");
18 |     fclose(f);
19 |   } else {
20 |     fprintf(stderr, "Couldn't write to %s - you must run as (setuid) root on a Linux system?\n", dropname);
21 |     return 1;
22 |   }
23 |   return 0;
24 | }
25 | 


--------------------------------------------------------------------------------
/util/dumpx.py:
--------------------------------------------------------------------------------
1 | ../gextract/dumpx.py


--------------------------------------------------------------------------------
/util/edit:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | case $(uname) in
 3 |     Darwin)
 4 |         lwarch=Apple
 5 |         ;;
 6 |     Linux)
 7 |         lwarch=FC12
 8 |         shopt -s globstar || true
 9 |         ;;
10 |     *)
11 |         lwarch=Windows ;;
12 | esac
13 | 
14 | if [[ $lwarch = Windows ]] ; then
15 |     emacscli=emacsclient
16 |     emacssrv=Emacs
17 | else
18 | #    emacsapp=/Applications/Emacs.app/Contents/MacOS/
19 |     emacsapp=/usr/local
20 |     emacscli=$emacsapp/bin/emacsclient
21 |     emacssrv=$emacsapp/bin/emacs
22 | fi
23 | 
24 | if [[ $CONSOLE ]] ; then
25 |     exec /usr/bin/emacs -nw "$@"
26 | else
27 |     exec $emacscli -a $emacssrv "$@"
28 | fi
29 | 


--------------------------------------------------------------------------------
/util/etree.py:
--------------------------------------------------------------------------------
1 | ../sblm/etree.py


--------------------------------------------------------------------------------
/util/featstats.py:
--------------------------------------------------------------------------------
1 | stats.py


--------------------------------------------------------------------------------
/util/findscripts.sh:
--------------------------------------------------------------------------------
1 | interp=$1
2 | shift
3 | dir=${2:-.}
4 | for f in $(find $dir -type f -size -1000k ! -name '*~' ! -name '*svn*') ; do
5 |     if head -1 $f | grep '^#!/' | fgrep -q "$interp" ; then
6 |         echo $f
7 |     fi
8 | done
9 | 


--------------------------------------------------------------------------------
/util/fix-include-guard-inplace.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -i~
 2 | 
 3 | my @lines;
 4 | while (<>) {
 5 |     push @lines, $_;
 6 |     if (eof(ARGV)) {
 7 |         $i=$#lines;
 8 |         $e=$i;
 9 |         $m=$i - 10;
10 |         $m = 0 if ($m < 0);
11 |         @endif = ();
12 |         while($i > $m) {
13 |             $_ = $lines[$i];
14 |             if (/^\#endif/) {
15 |                 last if scalar @endif;
16 |                 @endif = ("\n#endif\n");
17 |             } elsif (/\S/) {
18 |                 last;
19 |             }
20 |             --$i;
21 |         }
22 |         $len = $e-$i;
23 |         splice @lines,$i+1,$len,@endif if ($len > 0);
24 |         print for (@lines);
25 |         @lines=();
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/util/fixunrpn_:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 | 
3 | while(<>) {
4 |     chomp;
5 |     next unless /\t(.)(.*_)$/;
6 |     $l1 = lc($1);
7 |     print "$1$2\t$l1$2\n" if ($l1 ne $1);
8 | }
9 | 


--------------------------------------------------------------------------------
/util/float-round.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | use strict;
 3 | use utf8;
 4 | binmode STDIN, ':utf8';
 5 | binmode STDOUT, ':utf8';
 6 | 
 7 | my $default_precision=$ENV{DIGITS}||5;
 8 | 
 9 | sub real_prec {
10 |     my ($n,$prec)=@_;
11 |     $prec=$default_precision unless defined $prec;
12 |     sprintf("%.${prec}g",$n);
13 | }
14 | 
15 | my $num_match=qr/[+\-]?(?:\.\d+|\d+(?:\.\d*)?(?:[eE][+\-]?\d+)?)/;
16 | 
17 | while(<>) {
18 |     s/($num_match)/real_prec($1)/eg;
19 |     print;
20 | }
21 | 


--------------------------------------------------------------------------------
/util/forall.sub:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl -i~
2 | 
3 | while(<>) {
4 |     s{(?:BOOST_FOREACH|foreach) \(([^,)]+), (.+)+\)( |$)}{for ($1 : $2)$3};
5 |     print;
6 | }
7 | 


--------------------------------------------------------------------------------
/util/format-doxygen-c-comment:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w -i~
 2 | use strict;
 3 | my $com = 0;
 4 | my $wantspaces = 0;
 5 | my $hang = 3;
 6 | my $lastblank = 0;
 7 | my $kwstarts = '\\';
 8 | while(<>) {
 9 |     chomp;
10 |     if (m{^( *)(/\*\*)(.*)}) {
11 |         my $body = $3;
12 |         my $open = $2;
13 |         my $space = $1;
14 |         if (!m{\*/} || m{\*/\s*$}) {
15 |             $wantspaces = length($space) + $hang;
16 |             $com = 1;
17 |             $lastblank = !($body =~ m{\S});
18 |             if ($body =~ s/^( *)[@\\]/$1$kwstarts/) {
19 |                 $_ = "$space$open$body";
20 |             }
21 |             s/[@\\]brief ?//;
22 |             $com = 0 if (m{\*/});
23 |         }
24 |     } elsif ($com) {
25 |         if (m{\*/}) {
26 |             $com = 0;
27 |         } elsif (m{\S}) {
28 |             s/^( *)\* /$1 /;
29 |             m{^( *)} || die;
30 |             my $needspaces = $wantspaces - length($1);
31 |             $_ = (' ' x $needspaces) . $_ if ($needspaces > 0);
32 |             my $nl = $lastblank ? '' : "\n";
33 |             s/^( *)[@\\]/$nl$1$kwstarts/;
34 |             s/[@\\]brief ?//;
35 |         }
36 |         $lastblank = !/\S/;
37 |     }
38 |     print $_,"\n";
39 | }
40 | 


--------------------------------------------------------------------------------
/util/giraffe:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | . ~graehl/isd/hints/bashlib.sh
3 | unset PBS_O_WORKDIR
4 | echo $d/giraffe.0.3 $(realpath "$@")
5 | exec $d/giraffe.0.3 $(realpath "$@")
6 | 


--------------------------------------------------------------------------------
/util/gist:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | 
 3 | cmd = %Q{curl https://gist.github.com/gists -s -L -o /dev/null -w "%{url_effective} " }
 4 | 
 5 | files = ARGV.empty? ? Dir["**/*"] : Dir[*ARGV].uniq
 6 | 
 7 | files.select { |f| File.file?(f) }.each_with_index do |path, i|
 8 |   cmd << %Q{-F "file_ext[gistfile#{i}]=#{File.extname(path)[1..-1]}" }
 9 |   cmd << %Q{-F "file_name[gistfile#{i}]=#{File.basename(path)}" }
10 |   cmd << %Q{-F "file_contents[gistfile#{i}]=<#{path}" }
11 | end
12 | 
13 | exec cmd
14 | 


--------------------------------------------------------------------------------
/util/gitalias.sh:
--------------------------------------------------------------------------------
1 | git config --global alias.co checkout
2 | git config --global alias.br branch
3 | git config --global alias.ci commit
4 | git config --global alias.st status
5 | git config --global alias.unstage 'reset HEAD --'
6 | git config --global alias.last 'log -1 HEAD'
7 | 


--------------------------------------------------------------------------------
/util/gnuplot.auto.inc:
--------------------------------------------------------------------------------
1 | set ytics autofreq tc lt 1
2 | 


--------------------------------------------------------------------------------
/util/graehl.py:
--------------------------------------------------------------------------------
1 | ../gextract/graehl.py


--------------------------------------------------------------------------------
/util/growth:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env pypy
 2 | #-*- python -*-
 3 | #TODO: approx #lines/sec (look in new bytes only)
 4 | from graehl import *
 5 | 
 6 | def mtime_age(path):
 7 |     return str(datenow()-mtime(path))
 8 | 
 9 | tsep='\t'
10 | fsep=' '
11 | 
12 | import time
13 | def main(rest_='',sleep=600,fields='SsMm',N=-1):
14 |     logcmd()
15 |     fs=rest_
16 |     captions={'S':'size','s':'size/sec','M':'mtime','m':'delta(mtime)'}
17 |     #print "sleep=%s"%sleep
18 |     print "start=%s"%datenow()
19 |     for f in fs: print "file=%s"%f
20 |     print tsep.join([captions[f] for f in fields])
21 |     sleep=float(sleep)
22 |     s=None
23 |     n=0
24 |     a=dict()
25 |     while True:
26 |         if N>=0 and n>N: break
27 |         s2=[(i,mtime(fs[i]),filesize(fs[i])) for i in range(len(fs))]
28 |         if s is not None:
29 |             a['M']=[str(x[1]) for x in s2]
30 |             a['m']=[str(x[1]-s[x[0]][1]) for x in s2]
31 |             a['S']=[mega(x[2]) for x in s2]
32 |             a['s']=[mega((x[2]-s[x[0]][2])/sleep) for x in s2]
33 |             print tsep.join(fsep.join(a[k]) for k in fields)
34 |             time.sleep(sleep)
35 |         s=s2
36 |         n+=1
37 | import optfunc
38 | optfunc.main(main)
39 | 


--------------------------------------------------------------------------------
/util/hexnorm.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -i~
 2 | my %id;
 3 | my $idre = qr/(?:0x|thread:)([0-9a-f]+)/;
 4 | my $opre = '#x';
 5 | my $nid = 0;
 6 | sub getid {
 7 |     my ($x) = @_;
 8 |     $opre.(exists $id{$x} ? $id{$x} : ($id{$x} = $nid++));
 9 | }
10 | while(<>) {
11 |     s/$idre/getid($1)/eg;
12 |     print;
13 | }
14 | 


--------------------------------------------------------------------------------
/util/indent-c-comment:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w -i~
 2 | use strict;
 3 | my $com = 0;
 4 | my $wantspaces = 0;
 5 | my $hang = 3;
 6 | while(<>) {
 7 |     if (m{^( *)/\*\*} && !m{\*/}) {
 8 |         $com = 1;
 9 |         $wantspaces = length($1) + $hang;
10 |     } elsif ($com) {
11 |         if (m{\*/}) {
12 |             $com = 0;
13 |         } elsif (m{\S}) {
14 |             s/^( *)\* /$1  /;
15 |             m{^( *)} || die;
16 |             my $needspaces = $wantspaces - length($1);
17 |             print ' ' x $needspaces if ($needspaces > 0);
18 |             s/^( *)\@/$1\\/;
19 |         }
20 |     }
21 |     print;
22 | }
23 | 


--------------------------------------------------------------------------------
/util/joinleft:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env pypy
 2 | from graehl import *
 3 | from collections import defaultdict
 4 | 
 5 | def main(rest_=['-'],keyfields=1,sep='\t',npad=0,allow_over_npad=True,padval='',sort=True,header=False):
 6 |     t=ListDict()
 7 |     keys=[]
 8 |     i=0
 9 |     ncol=[0 for _ in rest_]
10 |     for i,f in ival(rest_):
11 |         for l in open_in(f):
12 |             l=l.rstrip()
13 |             f=l.split(sep,keyfields)
14 |             k=tuple(f[:keyfields])
15 |             if k not in t: keys.append(k)
16 |             v=f[keyfields:]
17 |             maxeq(ncol,i,len(v))
18 |             at_expand(t[k],i,v,[])
19 |             #v=pad(v,npad,padval,npad==0)
20 |             #t[k]+=v
21 |         i+=1
22 |     if sort:
23 |         keys=sorted(keys)
24 |         for i in indices[ncols]:
25 |             ncols[k]=max(ncols[k],npad) if allow_over_npad else npad
26 |     for k in keys:
27 |         print sep.join(list(k)+flatlist(pad(l,ncol[i],pad=padval) for (i,l) in ival(t[k])))
28 | 
29 | import optfunc
30 | optfunc.main(main)
31 | 


--------------------------------------------------------------------------------
/util/license.txt:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jonathan Graehl - http://graehl.org/
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 


--------------------------------------------------------------------------------
/util/localgcc.sh:
--------------------------------------------------------------------------------
 1 | case $(uname) in
 2 |     Darwin)
 3 |         lwarch=Apple
 4 |         ;;
 5 |     Linux)
 6 |         lwarch=FC12
 7 |         shopt -s globstar || true
 8 |         ;;
 9 |     *)
10 |         lwarch=Windows ;;
11 | esac
12 | gccprefix=${gccprefix:-/local/gcc}
13 | appendld() {
14 |     if [[ $lwarch = Apple ]] ; then
15 |         DYLD_FALLBACK_LIBRARY_PATH+=":$1"
16 |         export DYLD_FALLBACK_LIBRARY_PATH=${DYLD_FALLBACK_LIBRARY_PATH#:}
17 |     else
18 |         LD_RUN_PATH+=":$1"
19 |         export LD_RUN_PATH=${LD_RUN_PATH#:}
20 |         LD_LIBRARY_PATH+=":$1"
21 |         export LD_LIBRARY_PATH=${LD_RUN_PATH#:}
22 |     fi
23 | }
24 | if [[ $NOLOCALGCC = 1 ]] ; then
25 |     gccprefix=
26 | fi
27 | if [[ -d $gccprefix ]] ; then
28 |     export PATH=$gccprefix/bin:$PATH
29 |     appendld "$gccprefix/lib64"
30 | fi
31 | export CXX=ccache-g++
32 | export CC=ccache-gcc
33 | 


--------------------------------------------------------------------------------
/util/mflist.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | my $d="\t";
 3 | while(<>) {
 4 |     last if /Most Recent Quarter Data/;
 5 | }
 6 | $l=10;
 7 | while(<>) {
 8 | if (/<td align="left">([^<]+)<\/td>/) { $n=$1; $l=0; } else {
 9 |     if (/<td align="center">([^<]+)<\/td>/) {
10 |         if ($l==0) {
11 |             $t=$1;
12 |         } elsif ($l==1) {
13 |             $c=$1;
14 |             $c=~s/,//g;$c=int($c+.499);
15 |             print "$t$d$c$d$n\n";
16 |         }
17 |     } else {
18 |         $l=10;
19 | }
20 |     ++$l;
21 | } }
22 | 


--------------------------------------------------------------------------------
/util/nbest.py:
--------------------------------------------------------------------------------
1 | ../sblm/nbest.py


--------------------------------------------------------------------------------
/util/nfeats:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env pypy
 2 | #-*- python -*-
 3 | usage="""
 4 | (from file weights, or from positional arguments (nbests)) take weight vector or nbest features, and print number of features starting with pre with abs(weight or value)>=epsilon
 5 | """
 6 | from graehl import *
 7 | from dumpx import *
 8 | def main(pre='',epsilon=0,weights='',rest_=[],printfeats=False,usage_=usage):
 9 |     if len(weights):
10 |         l=firstline(weights)
11 |         fvs=[str2weights(l)]
12 |     elif len(rest):
13 |         fvs=flatten(dict(yieldfields_num(l) for l in readfrom(f)) for f in rest_)
14 |     fs=set()
15 |     for fv in fvs:
16 |         for (f,v) in fv.iteritems():
17 |             if f.startswith(pre) and v>=epsilon:
18 |                 fs.add(f)
19 |     dump(fs)
20 |     if printfeats:
21 |         for f in sorted(fs):
22 |             print f
23 |     print len(fs)
24 | 
25 | import optfunc
26 | optfunc.main(main)
27 | 
28 | 


--------------------------------------------------------------------------------
/util/no-trailing-space-inplace.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl -i~
2 | while(<>) {
3 |     chomp;
4 |     s/\s+$//;
5 |     print $_,"\n";
6 | }
7 | 


--------------------------------------------------------------------------------
/util/optfunc.py:
--------------------------------------------------------------------------------
1 | ../gextract/optfunc.py


--------------------------------------------------------------------------------
/util/pandoc.constantia.css:
--------------------------------------------------------------------------------
 1 | body {
 2 |     margin: auto;
 3 |     padding-right: 1em;
 4 |     padding-left: 1em;
 5 |     max-width: 44em;
 6 |     border-left: 1px solid black;
 7 |     border-right: 1px solid black;
 8 |     color: black;
 9 |     font-family: Verdana, sans-serif;
10 |     font-size: 100%;
11 |     line-height: 140%;
12 |     color: #333;
13 | }
14 | pre {
15 |     border: 1px dotted gray;
16 |     background-color: #ececec;
17 |     color: #1111111;
18 |     padding: 0.5em;
19 | }
20 | code {
21 |     font-family: monospace;
22 | }
23 | h1 a, h2 a, h3 a, h4 a, h5 a {
24 |     text-decoration: none;
25 |     color: #7a5ada;
26 | }
27 | h1, h2, h3, h4, h5 { font-family: verdana;
28 |                      font-weight: bold;
29 |                      border-bottom: 1px dotted black;
30 |                      color: #7a5ada; }
31 | h1 {
32 |         font-size: 130%;
33 | }
34 | 
35 | h2 {
36 |         font-size: 110%;
37 | }
38 | 
39 | h3 {
40 |         font-size: 95%;
41 | }
42 | 
43 | h4 {
44 |         font-size: 90%;
45 |         font-style: italic;
46 | }
47 | 
48 | h5 {
49 |         font-size: 90%;
50 |         font-style: italic;
51 | }
52 | 
53 | h1.title {
54 |         font-size: 200%;
55 |         font-weight: bold;
56 |         padding-top: 0.2em;
57 |         padding-bottom: 0.2em;
58 |         text-align: left;
59 |         border: none;
60 | }
61 | 
62 | dt code {
63 |         font-weight: bold;
64 | }
65 | dd p {
66 |         margin-top: 0;
67 | }
68 | 
69 | #footer {
70 |         padding-top: 1em;
71 |         font-size: 70%;
72 |         color: gray;
73 |         text-align: center;
74 |         }
75 | 


--------------------------------------------------------------------------------
/util/pandoc.css:
--------------------------------------------------------------------------------
 1 | body {
 2 |     margin: auto;
 3 |     padding-right: 1em;
 4 |     padding-left: 1em;
 5 |     max-width: 44em; 
 6 |     border-left: 1px solid black;
 7 |     border-right: 1px solid black;
 8 |     color: black;
 9 |     font-family: Verdana, sans-serif;
10 |     font-size: 100%;
11 |     line-height: 140%;
12 |     color: #333; 
13 | }
14 | pre {
15 |     border: 1px dotted gray;
16 |     background-color: #ececec;
17 |     color: #1111111;
18 |     padding: 0.5em;
19 | }
20 | code {
21 |     font-family: monospace;
22 | }
23 | h1 a, h2 a, h3 a, h4 a, h5 a { 
24 |     text-decoration: none;
25 |     color: #7a5ada; 
26 | }
27 | h1, h2, h3, h4, h5 { font-family: verdana;
28 |                      font-weight: bold;
29 |                      border-bottom: 1px dotted black;
30 |                      color: #7a5ada; }
31 | h1 {
32 |         font-size: 130%;
33 | }
34 | 
35 | h2 {
36 |         font-size: 110%;
37 | }
38 | 
39 | h3 {
40 |         font-size: 95%;
41 | }
42 | 
43 | h4 {
44 |         font-size: 90%;
45 |         font-style: italic;
46 | }
47 | 
48 | h5 {
49 |         font-size: 90%;
50 |         font-style: italic;
51 | }
52 | 
53 | h1.title {
54 |         font-size: 200%;
55 |         font-weight: bold;
56 |         padding-top: 0.2em;
57 |         padding-bottom: 0.2em;
58 |         text-align: left;
59 |         border: none;
60 | }
61 | 
62 | dt code {
63 |         font-weight: bold;
64 | }
65 | dd p {
66 |         margin-top: 0;
67 | }
68 | 
69 | #footer {
70 |         padding-top: 1em;
71 |         font-size: 70%;
72 |         color: gray;
73 |         text-align: center;
74 |         }
75 | 


--------------------------------------------------------------------------------
/util/pcfg.py:
--------------------------------------------------------------------------------
1 | ../sblm/pcfg.py


--------------------------------------------------------------------------------
/util/pychecks.sh:
--------------------------------------------------------------------------------
1 | d=`dirname $0`
2 | . $d/aliases.sh
3 | pycheckers $($d/findscripts.sh py "$@")
4 | 


--------------------------------------------------------------------------------
/util/random-c-array.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | import md5
 5 | import itertools
 6 | 
 7 | def reproducible_random(seed):
 8 |     """ chain md5 """
 9 |     hash = seed
10 |     while True:
11 |         digest = md5.md5(hash).digest()
12 |         for c in digest:
13 |             yield ord(c)
14 |         hash = digest + hash[0:len(digest)]
15 | 
16 | def usage():
17 |     sys.stderr.write("arg1 = # of bytes, arg2 = seed\n")
18 |     sys.exit(1)
19 | 
20 | def main(args):
21 |     seed = "random-c-array.py-encrypted-seed-seed"
22 |     n = 32
23 |     if len(args) >= 1:
24 |         n = int(args[0])
25 |         if len(args) == 2:
26 |             seed =args[1]
27 |         elif len(args) > 2:
28 |             usage()
29 |     sys.stderr.write('[n=%s] [seed=%s]\n'%(n, seed))
30 |     bytes = itertools.islice(reproducible_random(seed), n)
31 |     print ', '.join(map(str, bytes))
32 | 
33 | if __name__ == '__main__':
34 |     main(sys.argv[1:])
35 | 


--------------------------------------------------------------------------------
/util/relpath:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #
 3 | # Print relative path from $1 to $2 e.g. /a/b/c/d to /a/b/c1/d1 = ../../c1/d1
 4 | # Author: Cimarron Taylor, graehl
 5 | 
 6 | import os, sys
 7 | 
 8 | def pathsplit(p, rest=[]):
 9 |     (h,t) = os.path.split(p)
10 |     if len(h) < 1: return [t]+rest
11 |     if len(t) < 1: return [h]+rest
12 |     return pathsplit(h,[t]+rest)
13 | 
14 | def commonpath(l1, l2, common=[]):
15 |     if len(l1) < 1: return (common, l1, l2)
16 |     if len(l2) < 1: return (common, l1, l2)
17 |     if l1[0] != l2[0]: return (common, l1, l2)
18 |     return commonpath(l1[1:], l2[1:], common+[l1[0]])
19 | 
20 | def relpath(p1, p2):
21 |     (common,l1,l2) = commonpath(pathsplit(p1), pathsplit(p2))
22 |     p = []
23 |     if len(l1) > 0:
24 |         p = [ '../' * len(l1) ]
25 |     p = p + l2
26 |     if len(p)==0:
27 |         return '.'
28 |     return os.path.join( *p )
29 | 
30 | if __name__ == '__main__':
31 |     frompath = sys.argv[1]
32 |     topath = sys.argv[2]
33 |     print(relpath(os.path.abspath(frompath),os.path.abspath(topath)))
34 | 


--------------------------------------------------------------------------------
/util/relpathp:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #
 3 | # Print relative path from $1 to $2 e.g. /a/b/c/d to /a/b/c1/d1 = ../../c1/d1
 4 | # Author: Cimarron Taylor, graehl
 5 | 
 6 | import os, sys
 7 | 
 8 | def pathsplit(p, rest=[]):
 9 |     (h,t) = os.path.split(p)
10 |     if len(h) < 1: return [t]+rest
11 |     if len(t) < 1: return [h]+rest
12 |     return pathsplit(h,[t]+rest)
13 | 
14 | def commonpath(l1, l2, common=[]):
15 |     if len(l1) < 1: return (common, l1, l2)
16 |     if len(l2) < 1: return (common, l1, l2)
17 |     if l1[0] != l2[0]: return (common, l1, l2)
18 |     return commonpath(l1[1:], l2[1:], common+[l1[0]])
19 | 
20 | def relpath(p1, p2):
21 |     (common,l1,l2) = commonpath(pathsplit(p1), pathsplit(p2))
22 |     p = []
23 |     if len(l1) > 0:
24 |         p = [ '../' * len(l1) ]
25 |     p = p + l2
26 |     if len(p)==0:
27 |         return '.'
28 |     return os.path.join( *p )
29 | 
30 | def test(p1,p2):
31 |     print("from", p1, "to", p2, " -> ", relpath(p1, p2))
32 | 
33 | if __name__ == '__main__':
34 |     frompath = sys.argv[1]
35 |     topath = sys.argv[2]
36 |     home=os.path.abspath(frompath)
37 |     (d,b)=os.path.split(topath)
38 |     full=os.path.abspath(topath)
39 |     (cd,_,_)=commonpath(home,d)
40 |     (cfull,_,_)=commonpath(home,full)
41 |     if len(cfull)>len(cd):
42 |         print(relpath(home,full))
43 |     else:
44 |         print(os.path.join(relpath(home,d),b))
45 | 


--------------------------------------------------------------------------------
/util/sample/alignment-links.tsv:
--------------------------------------------------------------------------------
1 | a	b	wie andere ich meinen namen __LW_AT__?	how do i change my name __LW_AT__?	0 6 4 5 3 4 5 3 2 2 0 1 4 0
2 | 


--------------------------------------------------------------------------------
/util/space-brace-inplace.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl -i~
2 | 
3 | my $id = '[A-Za-z_][0-9A-Za-z_]*';
4 | while(<>) {
5 |     s/(struct|class)\s+($id)({)/$1 $2 $3/;
6 |     s/(namespace)\s+($id)({)/$1 $2 $3/;
7 |     print;
8 | }
9 | 


--------------------------------------------------------------------------------
/util/split.lua:
--------------------------------------------------------------------------------
 1 | #!/home/graehl/torch/install/bin/luajit
 2 | 
 3 | local function grjoined(tokens)
 4 |     return table.concat(tokens, ' ')
 5 | end
 6 | 
 7 | local function grsplit(str, sSeparator, nMax, bRegexp)
 8 |    sSeparator = sSeparator or ' '
 9 |    assert(sSeparator ~= "")
10 |    assert(nMax == nil or nMax >= 1)
11 |    local aRecord = {}
12 |    if str:len() > 0 then
13 |       local bPlain = not bRegexp
14 |       nMax = nMax or -1
15 |       local nStart = 1, 1
16 |       local nFirst, nLast = str:find(sSeparator, nStart, bPlain)
17 |       while nFirst and nMax ~= 0 do
18 |          table.insert(aRecord, str:sub(nStart, nFirst-1))
19 |          nStart = nLast + 1
20 |          nFirst, nLast = str:find(sSeparator, nStart, bPlain)
21 |          nMax = nMax - 1
22 |       end
23 |       table.insert(aRecord, str:sub(nStart))
24 |    end
25 |    return aRecord
26 | end
27 | 
28 | local function deBPE(tokens, bpecont)
29 |     return grsplit(string.gsub(table.concat(tokens, ' '), bpecont .. ' ', ''), ' ')
30 | end
31 | 


--------------------------------------------------------------------------------
/util/splitutf8.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -CSDA
 2 | use utf8;
 3 | use 5.014;
 4 | 
 5 | my $space = $ENV{chars} ? '' : ' ';
 6 | 
 7 | while(<>) {
 8 |     chomp;
 9 |     my @f=split $space,$_;
10 |     my $n = scalar @f;
11 |     my $mid = int(($n + 1) / 2);
12 |     my @right = splice @f, $mid;
13 |     print STDERR join($space, @right),"\n";
14 |     print join($space, @f),"\n";
15 | }
16 | 


--------------------------------------------------------------------------------
/util/start-hadoop:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #usage: `start-hadoop [-opts] [hadoop-dir]`
 3 | # to set up environment. NOTE BACKTICKS.
 4 | 
 5 | BLOBS=${BLOBS:-~graehl/blobs}
 6 | . $BLOBS/bashlib/unstable/bashlib.sh
 7 | 
 8 | pbshad=/home/nlg-02/pust/bin/pbs_hadoop.py
 9 | ourhad=$d/pbs_hadoop.py
10 | [[ -x $ourhad && ! -x $pbshad ]] && pbshad=ourhad
11 | export HADOOP_DIR=$(scratchdir)/hadoop
12 | 
13 | #make_nodefile #needs more work; e.g.: export PBS_ENVIRONMENT=PBS_INTERACTIVE
14 | 
15 | exportenv() {
16 |  echo export HADOOP_CONF_DIR=$HADOOP_DIR/conf
17 |  echo export HADOOP_HOME=${HADOOP_HOME:-/home/nlg-01/chiangd/pkg/hadoop}
18 | }
19 | `exportenv`
20 | showvars_required pbshad HADOOP_DIR HADOOP_CONF_DIR
21 | exportenv
22 | $pbshad "$@" $HADOOP_DIR 1>&2
23 | 
24 | 


--------------------------------------------------------------------------------
/util/stats.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env pypy
 2 | from graehl import *
 3 | from collections import defaultdict
 4 | def stats_main(input='numbers.txt',mean=True,variance=True,stddev=True,error=True,sparse=True,skipblank=True):
 5 |     v=defaultdict(lambda:Stats(mean=mean,variance=variance,stddev=stddev,stderror=error))
 6 |     if input=='-':
 7 |         input=sys.stdin
 8 |     if type(input)==str:
 9 |         input=open(input)
10 |     N=0
11 |     for line in input:
12 |         fs=line.split()
13 |         name=None
14 |         haven=False
15 |         for i in range(0,len(fs)):
16 |             f=fs[i]
17 |             if name is None:
18 |                 name=i
19 |             try:
20 |                 e=f.find('=')
21 |                 if e>0:
22 |                     name=f[:e]
23 |                     ff=float(f[e+1:])
24 |                 else:
25 |                     ff=float(f)
26 |                 v[name].count(ff)
27 |                 haven=True
28 |                 name=None
29 |             except ValueError:
30 |                 name=f
31 |         if haven or not skipblank: N+=1
32 |     if sparse:
33 |         for s in v.itervalues():
34 |             s.N=N
35 |     write_dict(v)
36 | 
37 | import optfunc
38 | optfunc.main(stats_main)
39 | 


--------------------------------------------------------------------------------
/util/subst.pypy.sh:
--------------------------------------------------------------------------------
1 | d=`dirname $0`
2 | #-perm -u+x
3 | $d/util/subst.pl "$@" -v -t $d/util/use-pypy.subst  -i -e `find . -size -1000k ! -name '*~' ! -name '*svn*'`
4 | 


--------------------------------------------------------------------------------
/util/svndiff.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Configure your favorite diff program here.
 3 | DIFF="diff"
 4 | 
 5 | # Subversion provides the paths we need as the sixth and seventh
 6 | # parameters.
 7 | LEFT=${6}
 8 | RIGHT=${7}
 9 | 
10 | nleft=${3}
11 | nright=${5}
12 | 
13 | #. ~/u/bashlib.sh
14 | #showvars_required nleft LEFT nright RIGHT
15 | # Call the diff command (change the following line to make sense for
16 | # your merge program).
17 | $DIFF  -w -u -b --label="$nleft" $LEFT --label "$nright" $RIGHT
18 | 
19 | # Return an errorcode of 0 if no differences were detected, 1 if some were.
20 | # Any other errorcode will be treated as fatal.
21 | 


--------------------------------------------------------------------------------
/util/template.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env pypy
 2 | #-*- python -*-
 3 | usage="""
 4 | the purpose of this script.
 5 | """
 6 | 
 7 | from graehl import *
 8 | from collections import defaultdict
 9 | import os,sys
10 | #sys.path.append(os.path.dirname(sys.argv[0]))
11 | 
12 | import optfunc
13 | @optfunc.arghelp('rest_','input files')
14 | def main(rest_=['-'],keyfields=1,sep='\t',usage_=usage):
15 |     """-h usage"""
16 |     logcmd(True)
17 |     for f in rest_:
18 |         for l in open_in(f):
19 |             print sep.join(l.split(sep)[0:keyfields])
20 | 
21 | optfunc.main(main)
22 | 


--------------------------------------------------------------------------------
/util/test.grf:
--------------------------------------------------------------------------------
1 | wc -l giraffe
2 | wc -l giraffe.david
3 | wc -l giraffe.split
4 | wc -l libgraehl.pl
5 | wc -l test.grf
6 | 


--------------------------------------------------------------------------------
/util/unionfind.hh:
--------------------------------------------------------------------------------
 1 | 
 2 | template <class Data>
 3 | struct UnionNode {
 4 |   Data data;
 5 |   mutable UnionNode *parent;
 6 |   U rank;
 7 |   UnionNode(Data const& data=Data()) : data(data), parent(this), rank() {}
 8 |   typedef UnionNode *Ptr;
 9 |   Ptr repr() const {
10 |     if (parent != this)
11 |       parent = parent->repr();
12 |     return parent;
13 |   }
14 |   Ptr merge(Ptr o) {
15 |     return unionMergeRoots(repr(), o->repr());
16 |   }
17 |   UnionNode & operator += (UnionNode& o) {
18 |     return *merge(&o);
19 |   }
20 | };
21 | 
22 | template <class PtrT>
23 | PtrT unionMergeRoots(PtrT a, PtrT b) {
24 |   if (a == b) return a;
25 |   if (a->rank < b->rank)
26 |     return a->parent = b;
27 |   else {
28 |     if (a->rank == b->rank)
29 |       ++a->rank;
30 |     return b->parent = a;
31 |   }
32 | }
33 | 


--------------------------------------------------------------------------------
/util/unrpn_:
--------------------------------------------------------------------------------
1 | membersuffix=_ `dirname $0`/unrpn "$@"
2 | 


--------------------------------------------------------------------------------
/util/why-empty.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | use strict;
 3 | use utf8;
 4 | die 'why-empty.pl filea fileb' unless scalar @ARGV == 2;
 5 | my $a = shift;
 6 | my $b = shift;
 7 | sub opened {
 8 |     my $f;
 9 |     open($f, $_[0]) or die "open $_[0]";
10 |     $f
11 | }
12 | my $af = &opened($a);
13 | my $bf = &opened($b);
14 | my @a = <$af>;
15 | my @b = <$bf>;
16 | my $an = scalar @a;
17 | my $bn = scalar @b;
18 | sub countwords {
19 | #    my @a = split ' ',$_[0];
20 | #    scalar @a;
21 |     return length shift;
22 | }
23 | my @difflines;
24 | die "#lines differ: $an != $bn for why-empty.pl $a $b" unless $an == $bn;
25 | for (0.. ($an - 1)) {
26 |     my $al = $a[$_];
27 |     chomp $al;
28 |     my $bl = $b[$_];
29 |     chomp $bl;
30 |     my $ac = &countwords($al);
31 |     my $bc = &countwords($bl);
32 |     if (($ac == 0) != ($bc == 0)) {
33 |         print "$_: $ac $bc ||| $al ||| $bl\n";
34 |         push @difflines, $_+1;
35 |     }
36 | }
37 | my $ndiff = scalar @difflines;
38 | die "$ndiff differently empty lines. line-numbers: ".join(' ',@difflines) if $ndiff;
39 | 


--------------------------------------------------------------------------------
/util/yuminstall.txt:
--------------------------------------------------------------------------------
 1 | haskell-platform
 2 | scala
 3 | emacs
 4 | pychecker
 5 | valgrind
 6 | subversion
 7 | git
 8 | autogen
 9 | cmake
10 | automake
11 | xz
12 | pypy
13 | tbb
14 | sbt
15 | maven
16 | nodejs
17 | flex
18 | texlive
19 | bison
20 | levien-inconsolata-fonts
21 | zlib-devel
22 | time
23 | scipy
24 | ppl
25 | clang-analyzer
26 | less
27 | boost
28 | cabal-install
29 | bind-utils
30 | bc
31 | R
32 | ccache
33 | condor
34 | readline-devel
35 | ndisc6
36 | traceroute
37 | pyOpenSSL
38 | patch
39 | cloog-ppl
40 | cppcheck
41 | upx
42 | tmux
43 | screen
44 | hg
45 | texlive-dejavu
46 | dejavu-sans-mono-fonts
47 | cabextract
48 | 


--------------------------------------------------------------------------------