├── README.md ├── bencode.ml ├── cxx_wrapped.h ├── git-branches ├── idn.ml ├── indente.ml ├── measure.ml ├── memory.pikchr ├── memsum ├── mlvalues.py ├── next.ml ├── oasis-query.sh ├── opam-url ├── pay.ml ├── php_serialize.ml ├── pmp ├── pmpa └── pvolume.sh /README.md: -------------------------------------------------------------------------------- 1 | * [Bencode](bencode.ml) - parse [bencode](http://www.bittorrent.org/beps/bep_0003.html)'d structures (Bittorrent metadata) 2 | * [cxx_wrapped.h](cxx_wrapped.h) - simple template to wrap C++ object as OCaml custom value (used for example in [ocaml-hypertable](https://github.com/ygrek/ocaml-hypertable/)) 3 | * [git-branches](git-branches) - git subcommand to compactly show every branch (with its top commit) in the repository 4 | * [idn.ml](idn.ml) - copy of punycode and IDN module for OCaml (previously available at http://caml.ru/~dima/ocaml/idn.ml) with some bugfixes 5 | * [indente.ml](indente.ml) - detect common beginner error of else branch scope in OCaml code 6 | * [Measure](measure.ml) - simple module to measure code speed and allocation rate 7 | * [memsum](memsum) - shell script to quickly see memory usage summed over process groups 8 | * [mlvalues.py](mlvalues.py) - GDB/python script to inspect OCaml runtime, heap and individual values 9 | * [oasis-query.sh](oasis-query.sh) - shell helper to extract useful information from [oasis](https://github.com/ocaml/oasis) 10 | * [opam-url](opam-url) - shell helper to generate url section of [opam](https://opam.ocaml.org) file 11 | * [Php_serialize](php_serialize.ml) - parse and construct [php\_serialize](http://php.net/manual/en/function.serialize.php)'d values 12 | * [pay.ml](pay.ml) - simple group expenses recording and bill splitting 13 | * [pmp](pmp) - modified [poor man's profiler](http://poormansprofiler.org/) script (with OCaml names demangling) 14 | * [pmpa](pmpa) - poor man's allocation profiler ([description](https://inbox.ocaml.org/caml-list/20110807150719.34376e5e605354e296c528ca@gmail.com/)) 15 | * [pvolume.sh](pvolume.sh) - pulseaudio volume and output device helper 16 | -------------------------------------------------------------------------------- /bencode.ml: -------------------------------------------------------------------------------- 1 | (** 2 | http://www.bittorrent.org/beps/bep_0003.html 3 | 4 | This is free and unencumbered software released into the public domain. 5 | For more information, please refer to 6 | *) 7 | 8 | (* open Prelude *) 9 | 10 | let (>>) x f = f x 11 | let fail fmt = Printf.ksprintf failwith fmt 12 | 13 | open Printf 14 | open ExtLib 15 | 16 | module type Number = sig 17 | type t 18 | val neg : t -> t 19 | val of_int : int -> t 20 | val to_int : t -> int 21 | val of_string : string -> t 22 | val to_string : t -> string 23 | val zero : t 24 | val add : t -> t -> t 25 | val mul : t -> t -> t 26 | end 27 | 28 | module Make(N : Number) = struct 29 | 30 | type t = 31 | | S of string 32 | | I of N.t 33 | | L of t list 34 | | D of (string * t) list 35 | 36 | let decode_stream ?(hints=[]) chars = 37 | let ten = N.of_int 10 in 38 | let digit c = N.of_int (Char.code c - Char.code '0') in 39 | let rec loop acc = parser 40 | | [< x=parse_one; t; >] -> loop (x::acc) t 41 | | [< >] -> List.rev acc 42 | and parse_one = parser 43 | | [< s=parse_string >] -> S s 44 | | [< ''i'; n=parse_int_num; ''e' >] -> I n 45 | | [< ''l'; l=loop []; ''e' >] -> L l 46 | | [< ''d'; d=loop_d []; ''e' >] -> D d 47 | and loop_d acc = parser 48 | | [< k=parse_string; v=parse_one; t >] -> loop_d ((k,v) :: acc) t 49 | | [< >] -> List.rev acc 50 | and parse_string = parser 51 | | [< n = parse_pos_num; '':'; s = take (N.to_int n) >] -> s 52 | and parse_int_num = parser 53 | | [< ''-'; x = parse_pos_num >] -> N.neg x 54 | | [< x = parse_pos_num >] -> x 55 | and parse_pos_num = parser 56 | | [< ''0' >] -> N.zero 57 | | [< ''1'..'9' as c; n = parse_digits (digit c) >] -> n 58 | and parse_digits n = parser 59 | | [< ''0'..'9' as c; t >] -> parse_digits (N.add (N.mul n ten) (digit c)) t 60 | | [< >] -> n 61 | and take n chars = 62 | let s = String.make n '\000' in 63 | for i = 0 to n-1 do s.[i] <- Stream.next chars done; s 64 | in 65 | let main () = 66 | let r = parse_one chars in 67 | if not (List.mem `IgnoreTail hints) then Stream.empty chars; 68 | r 69 | in 70 | let show () = 71 | let tail = Stream.npeek 10 chars >> List.map (String.make 1) >> String.concat "" in 72 | sprintf "Position %u : %s" (Stream.count chars) tail 73 | in 74 | try 75 | main () 76 | with 77 | | Stream.Error _ | Stream.Failure -> failwith (show ()) 78 | 79 | let rec print out = 80 | let pr fmt = IO.printf out fmt in 81 | function 82 | | I n -> pr "%s " (N.to_string n) 83 | | S s -> pr "\"%s\" " (String.slice ~last:10 s) 84 | | L l -> pr "( "; List.iter (print out) l; pr ") " 85 | | D d -> pr "{ "; List.iter (fun (k,v) -> pr "%s: " k; print out v) d; pr "} " 86 | 87 | let to_string t = 88 | let module B = Buffer in 89 | let b = B.create 100 in 90 | let puts s = bprintf b "%u:%s" (String.length s) s in 91 | let rec put = function 92 | | I n -> bprintf b "i%se" (N.to_string n) 93 | | S s -> puts s 94 | | L l -> B.add_char b 'l'; List.iter put l; B.add_char b 'e' 95 | | D d -> 96 | B.add_char b 'd'; 97 | List.iter (fun (s,x) -> puts s; put x) (List.sort ~cmp:(fun (s1,_) (s2,_) -> compare s1 s2) d); 98 | B.add_char b 'e' 99 | in 100 | put t; B.contents b 101 | 102 | (** @raise exn on error *) 103 | let decode s = 104 | Stream.of_string s >> decode_stream 105 | 106 | let key s k v = 107 | match v with 108 | | D l -> k (try List.assoc s l with Not_found -> fail "no key '%s'" s) 109 | | _ -> fail "not a dictionary" 110 | 111 | let int = function I n -> n | _ -> fail "int" 112 | let str = function S s -> s | _ -> fail "str" 113 | let list k v = match v with L l -> k l | _ -> fail "list" 114 | let listof k v = match v with L l -> List.map k l | _ -> fail "listof" 115 | let dict k v = match v with D l -> k l | _ -> fail "dict" 116 | 117 | end 118 | 119 | include Make(Int64) 120 | 121 | -------------------------------------------------------------------------------- /cxx_wrapped.h: -------------------------------------------------------------------------------- 1 | // Simple template to wrap C++ object as OCaml custom value 2 | // Author: ygrek 3 | // Version: 2020-02-18 4 | 5 | // This is free and unencumbered software released into the public domain. 6 | // Anyone is free to copy, modify, publish, use, compile, sell, or 7 | // distribute this software, either in source code form or as a compiled 8 | // binary, for any purpose, commercial or non-commercial, and by any means. 9 | // For more information, please refer to 10 | 11 | // value wrapped::alloc(Ptr,tag=0,mem=0,max=1) 12 | // creates custom value with pointer to C++ object inside 13 | // finalizer will release pointer (whether destructor will be called 14 | // depends on the semantics of the pointer) 15 | // tag is optional tag attached to custom value 16 | // mem and max are parameters of caml_alloc_custom 17 | // value wrapped::alloc_mem(Ptr,tag,used) 18 | // same as wrapped::alloc but using caml_alloc_custom_mem 19 | // void wrapped::release(value) 20 | // releases wrapped pointer 21 | // Ptr const& wrapped::get(value) 22 | // returns pointer to wrapped object 23 | // raises OCaml Invalid_argument exception if pointer was already released 24 | // size_t wrapped::count() 25 | // returns the number of currently allocated Ptr wrappers 26 | // 27 | // wrapped<> manages smart pointers to C++ objects 28 | // wrapped_ptr<> manages raw pointers (owns pointed object, release() destroys object) 29 | // 30 | // generational_global_root is a RAII wrapper to register GC roots 31 | // caml_release_runtime is a RAII wrapper to release runtime lock in the given scope 32 | // without_runtime_lock executes function with runtime lock released 33 | 34 | extern "C" { 35 | #define CAML_NAME_SPACE 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | } 44 | 45 | #include 46 | 47 | // name used as identifier for custom_operations 48 | // should be instantiated for each wrapped pointer class 49 | template 50 | char const* ml_name(); 51 | 52 | // Ptr is a smart pointer class, 53 | // e.g.: std::auto_ptr, boost::shared_ptr, boost::intrusive_ptr 54 | template 55 | class wrapped 56 | { 57 | private: 58 | struct ml_wrapped 59 | { 60 | ml_wrapped(Ptr x, size_t t) : tag(t), p(x) {} // copy is ok 61 | ml_wrapped(typename Ptr::element_type* x, size_t t) : tag(t), p(x) {} 62 | size_t tag; 63 | Ptr p; 64 | }; 65 | 66 | static size_t count_; 67 | 68 | #define Wrapped_val(v) (*(ml_wrapped**)Data_custom_val(v)) 69 | 70 | static void finalize(value v) 71 | { 72 | release(v); 73 | delete Wrapped_val(v); 74 | } 75 | 76 | public: 77 | typedef Ptr type; 78 | 79 | static size_t count() { return count_; } 80 | static char const* name() { return ml_name(); } 81 | static size_t tag(value v) { return Wrapped_val(v)->tag; } 82 | 83 | static Ptr const& get(value v) // do not copy 84 | { 85 | Ptr const& p = Wrapped_val(v)->p; 86 | //printf("get %lx : %s\n",(size_t)p.get(),name()); 87 | if (NULL == p.get()) caml_invalid_argument(name()); 88 | return p; 89 | } 90 | 91 | static void release(value v) 92 | { 93 | Ptr& p = Wrapped_val(v)->p; 94 | //printf("release %lx : %s\n",(size_t)p.get(),name()); 95 | if (NULL == p.get()) return; 96 | count_--; 97 | p.reset(); 98 | } 99 | 100 | template 101 | static value alloc(TPtr p, size_t tag = 0, mlsize_t wmem = 0, mlsize_t wmax = 1) 102 | { 103 | //printf("alloc %lx : %s\n",(size_t)p.get(),name()); 104 | CAMLparam0(); 105 | CAMLlocal1(v); 106 | 107 | static struct custom_operations wrapped_ops = { 108 | const_cast(name()), 109 | finalize, 110 | custom_compare_default, 111 | custom_hash_default, 112 | custom_serialize_default, 113 | custom_deserialize_default, 114 | #if defined(custom_compare_ext_default) 115 | custom_compare_ext_default, 116 | #endif 117 | #if defined(custom_fixed_length_default) 118 | custom_fixed_length_default, 119 | #endif 120 | }; 121 | 122 | v = caml_alloc_custom(&wrapped_ops, sizeof(ml_wrapped*), wmem, wmax); 123 | Wrapped_val(v) = new ml_wrapped(p, tag); 124 | count_++; 125 | 126 | CAMLreturn(v); 127 | } 128 | 129 | // caml_alloc_custom_mem was added in the same release as fixed_length field 130 | #if defined(custom_fixed_length_default) 131 | template 132 | static value alloc_mem(TPtr p, size_t tag, mlsize_t used) 133 | { 134 | //printf("alloc %lx : %s\n",(size_t)p.get(),name()); 135 | CAMLparam0(); 136 | CAMLlocal1(v); 137 | 138 | static struct custom_operations wrapped_ops = { 139 | const_cast(name()), 140 | finalize, 141 | custom_compare_default, 142 | custom_hash_default, 143 | custom_serialize_default, 144 | custom_deserialize_default, 145 | custom_compare_ext_default, 146 | custom_fixed_length_default, 147 | }; 148 | 149 | v = caml_alloc_custom_mem(&wrapped_ops, sizeof(ml_wrapped*), used); 150 | Wrapped_val(v) = new ml_wrapped(p, tag); 151 | count_++; 152 | 153 | CAMLreturn(v); 154 | } 155 | #endif 156 | 157 | #undef Wrapped_val 158 | 159 | }; //wrapped 160 | 161 | template 162 | size_t wrapped::count_ = 0; 163 | 164 | template 165 | struct raw_ptr 166 | { 167 | #if __cplusplus >= 201103L 168 | typedef std::unique_ptr ptr; 169 | #else 170 | typedef std::auto_ptr ptr; 171 | #endif 172 | }; 173 | 174 | template 175 | struct wrapped_ptr : public wrapped::ptr> 176 | { 177 | typedef wrapped::ptr> base; 178 | static T* get(value v) 179 | { 180 | return base::get(v).get(); 181 | } 182 | static value alloc(T* p, size_t tag = 0, mlsize_t wmem = 0, mlsize_t wmax = 1) 183 | { 184 | return base::alloc(p,tag,wmem,wmax); 185 | } 186 | #if defined(caml_alloc_custom_mem) 187 | static value alloc_mem(T* p, size_t tag, mlsize_t used) 188 | { 189 | return base::alloc_mem(p,tag,used); 190 | } 191 | #endif 192 | }; // wrapped_ptr 193 | 194 | #if defined(__GNUC__) 195 | #define UNUSED __attribute__((unused)) 196 | #else 197 | #define UNUSED 198 | #endif 199 | 200 | static size_t UNUSED wrapped_tag(value x) { return wrapped_ptr::tag(x); } 201 | 202 | class caml_release_runtime // : boost::noncopyable 203 | { 204 | public: 205 | caml_release_runtime() { caml_release_runtime_system(); } 206 | ~caml_release_runtime() { caml_acquire_runtime_system(); } 207 | private: 208 | caml_release_runtime( const caml_release_runtime& ); 209 | const caml_release_runtime& operator=( const caml_release_runtime& ); 210 | }; 211 | 212 | // compatibiliy 213 | typedef caml_release_runtime caml_blocking_section; 214 | 215 | class generational_global_root // : boost::noncopyable 216 | { 217 | public: 218 | generational_global_root(value v) 219 | { 220 | v_ = v; 221 | caml_register_generational_global_root(&v_); 222 | } 223 | ~generational_global_root() 224 | { 225 | caml_remove_generational_global_root(&v_); 226 | v_ = Val_unit; 227 | } 228 | void set(value v) 229 | { 230 | caml_modify_generational_global_root(&v_, v); 231 | } 232 | value get() 233 | { 234 | return v_; 235 | } 236 | private: 237 | generational_global_root(generational_global_root const&); 238 | const generational_global_root& operator=(generational_global_root const&); 239 | private: 240 | value v_; 241 | }; 242 | 243 | #if __cplusplus >= 201103L 244 | template 245 | auto without_runtime_lock(Func f, Args && ... args) -> decltype(f(std::forward(args)...)) 246 | { 247 | caml_blocking_section lock; 248 | return f(std::forward(args)...); 249 | } 250 | #endif 251 | -------------------------------------------------------------------------------- /git-branches: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | 3 | for branch in $(git for-each-ref --format='%(refname:short)' refs/remotes/ refs/heads/ "$@" | grep -v '^origin/HEAD$'); do 4 | echo -e "$(git show --format='%ci %<(24)%cr %<(24)%an' $branch -- | head -n 1) \\t$branch" 5 | done | sort -r | less -XF 6 | -------------------------------------------------------------------------------- /idn.ml: -------------------------------------------------------------------------------- 1 | (* Punycode and IDN library for OCaml *) 2 | (* License: without restrictions *) 3 | (* Author: dima@caml.ru *) 4 | 5 | (* Fixes by: ygrek and cyberhuman *) 6 | (* Version: 2013/08/29 *) 7 | 8 | exception Bad_input 9 | exception Overflow 10 | 11 | (* Parameters *) 12 | 13 | let base = 36 14 | let tmin = 1 15 | let tmax = 26 16 | let skew = 38 17 | let damp = 700 18 | let initial_bias = 72 19 | let initial_n = 0x80 20 | let delimiter = 0x2D 21 | 22 | (* Encoding *) 23 | 24 | let basic p = p < 0x80 25 | 26 | let encode_digit d = 27 | if d < 26 then 28 | d + Char.code 'a' 29 | else if d < 36 then 30 | d - 26 + Char.code '0' 31 | else 32 | raise Bad_input 33 | 34 | let adapt delta num_points first = 35 | let delta = 36 | if first then delta / damp else (delta lsr 1) in 37 | let delta = ref (delta + (delta / num_points)) in 38 | let k = ref 0 in 39 | let lim = ((base - tmin) * tmax) / 2 in 40 | while (!delta > lim) do 41 | delta := !delta / (base - tmin); 42 | k := !k + base 43 | done; 44 | !k + (((base - tmin + 1) * !delta) / (!delta + skew)) 45 | 46 | let encode_data input_data = 47 | let n = ref initial_n in 48 | let delta = ref 0 in 49 | let bias = ref initial_bias in 50 | let basic_count = ref 0 in 51 | let buf = Buffer.create 32 in 52 | let out n = 53 | Buffer.add_char buf (Char.chr n) in 54 | 55 | Array.iter 56 | (fun c -> 57 | if basic c then 58 | begin 59 | out c; 60 | incr basic_count; 61 | end) 62 | input_data; 63 | 64 | if !basic_count > 0 then 65 | Buffer.add_char buf (Char.chr delimiter); 66 | 67 | let handled_count = ref !basic_count in 68 | 69 | while (!handled_count < Array.length input_data) do 70 | let m = ref max_int in 71 | Array.iter 72 | (fun c -> 73 | if c >= !n && c < !m then 74 | m := c) 75 | input_data; 76 | 77 | if !m - !n > (max_int - !delta) / (succ !handled_count) then 78 | raise Overflow; 79 | delta := !delta + (!m - !n) * (succ !handled_count); 80 | n := !m; 81 | 82 | Array.iter 83 | (fun c -> 84 | if c < !n then 85 | begin 86 | incr delta; 87 | if !delta = 0 then 88 | raise Overflow; 89 | end; 90 | if c = !n then 91 | begin 92 | let q = ref !delta in 93 | let k = ref base in 94 | (try 95 | while true do 96 | let t = 97 | if !k <= !bias then tmin 98 | else if !k >= !bias + tmax then tmax 99 | else !k - !bias in 100 | if !q < t then 101 | raise Exit; 102 | out (encode_digit (t + ((!q - t) mod (base - t)))); 103 | q := (!q - t) / (base - t); 104 | k := !k + base 105 | done 106 | with Exit -> ()); 107 | out (encode_digit !q); 108 | bias := adapt !delta (succ !handled_count) (!handled_count = !basic_count); 109 | delta := 0; 110 | incr handled_count; 111 | end) 112 | input_data; 113 | incr delta; 114 | incr n; 115 | done; 116 | Buffer.contents buf 117 | 118 | (* Decoding *) 119 | 120 | let decode_digit p = 121 | if p < 48 then raise Bad_input else 122 | if p < 58 then p + 26 - 48 else 123 | if p < 65 then raise Bad_input else 124 | if p < 65 + 26 then p - 65 else 125 | if p < 97 then raise Bad_input else 126 | if p < 97 + 26 then p - 97 else 127 | raise Bad_input 128 | 129 | let decode_data input_data = 130 | let buflen = String.length input_data in 131 | let n = ref initial_n in 132 | let i = ref 0 in 133 | let bias = ref initial_bias in 134 | let buf = Array.make buflen 0 in 135 | 136 | let input_length = 137 | String.length input_data in 138 | 139 | let out = ref 0 in 140 | let data_pos = 141 | try 142 | let pos = String.rindex input_data (Char.chr delimiter) in 143 | for i = 0 to pos - 1 do 144 | Array.unsafe_set buf i (Char.code input_data.[i]) 145 | done; 146 | out := pos; 147 | pos + 1 148 | with _ -> 0 149 | in 150 | 151 | let j = ref data_pos in 152 | while !j < input_length do 153 | let oldi = ref !i in 154 | let w = ref 1 in 155 | let k = ref base in 156 | (try 157 | while true do 158 | if !j >= input_length then raise Bad_input; 159 | let digit = decode_digit (Char.code input_data.[!j]) in incr j; 160 | if digit > (max_int - !i) / !w then raise Overflow; 161 | i := !i + digit * !w; 162 | let t = 163 | if !k <= !bias then tmin 164 | else if !k >= !bias + tmax then tmax 165 | else !k - !bias 166 | in 167 | if digit < t then 168 | raise Exit; 169 | if !w > max_int / (base - t) then raise Overflow; 170 | w := !w * (base - t); 171 | k := !k + base 172 | done 173 | with Exit -> ()); 174 | let next = 175 | succ !out in 176 | bias := adapt (!i - !oldi) next (!oldi = 0); 177 | if !i / next > max_int - !n then raise Overflow; 178 | n := !n + !i / next; 179 | i := !i mod next; 180 | if !out >= buflen then raise Overflow; 181 | if !out > !i then 182 | Array.blit buf !i buf (!i + 1) (!out - !i); 183 | buf.(!i) <- !n; 184 | incr i; incr out; 185 | done; 186 | Array.sub buf 0 !out 187 | 188 | let upoints = Netconversion.uarray_of_ustring `Enc_utf8 189 | let ustring = Netconversion.ustring_of_uarray `Enc_utf8 190 | 191 | (* Punycode API *) 192 | 193 | let encode s = encode_data (upoints s) 194 | let decode s = ustring (decode_data s) 195 | 196 | (* Helpers *) 197 | 198 | let split domain = 199 | let rec make acc rest = 200 | try 201 | let pos = String.index rest '.' in 202 | make ((String.sub rest 0 pos)::acc) 203 | (String.sub rest (succ pos) ((String.length rest) - pos - 1)) 204 | with Not_found -> List.rev (rest::acc) 205 | in make [] domain 206 | 207 | let join = String.concat "." 208 | 209 | let need_encoding s = 210 | let l = 211 | String.length s in 212 | try 213 | for i = 0 to pred l do 214 | if not (basic (Char.code (String.unsafe_get s i))) then 215 | raise Exit 216 | done; false 217 | with Exit -> true 218 | 219 | let transcode s = 220 | if need_encoding s then 221 | "xn--" ^ encode s 222 | else s 223 | 224 | let transtext s = 225 | let l = String.length s in 226 | if l > 4 && String.sub s 0 4 = "xn--" then 227 | decode (String.sub s 4 (l - 4)) 228 | else s 229 | 230 | (* IDN api *) 231 | 232 | let encode_domain domain = 233 | join (List.map transcode (split domain)) 234 | 235 | let decode_domain domain = 236 | join (List.map transtext (split domain)) 237 | 238 | 239 | let () = 240 | assert ("他们为什么不说中文" = decode "ihqwcrb4cv8a8dqg056pqjye"); 241 | assert ("---禁刊拍賣網址---" = decode "-------5j3ji85am9zsk4ckwjm29b"); 242 | assert ("reality44hire-b9a" = encode (decode "reality44hire-b9a")); 243 | begin 244 | try 245 | let (_:string) = decode_domain "xn----7sbksbihemjgbjxflp8bn1jxc.xn--p1aiaudio_orlov_yum" in 246 | assert false 247 | with 248 | | Bad_input -> assert true 249 | | _ -> assert false 250 | end; 251 | () 252 | -------------------------------------------------------------------------------- /indente.ml: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env b0caml 2 | #directory "+ocaml/compiler-libs" 3 | 4 | open Printf 5 | 6 | let loc { Parsetree.pexp_loc = loc; _ } = Location.get_pos_info loc.loc_start 7 | let col expr = let (_,_,c) = loc expr in c 8 | 9 | let iter = { Ast_iterator.default_iterator with 10 | expr = fun iter expr -> 11 | begin match expr with 12 | | { pexp_desc = Pexp_sequence ({ pexp_desc = Pexp_ifthenelse (_cond,e_then,e_else); _ } as e_if, next); _ } -> 13 | let last = Option.value ~default:e_then e_else in 14 | if col last = col next && col next <> col e_if then 15 | let (file,line,_) = loc next in 16 | printf "Suspicious indentation of next expression after if at %s %d\n%!" file line 17 | | _ -> () 18 | end; 19 | Ast_iterator.default_iterator.expr iter expr 20 | } 21 | 22 | let parse file = 23 | iter.structure iter @@ Pparse.parse_implementation ~tool_name:"indetect" file 24 | 25 | let () = 26 | Sys.argv |> Array.iteri begin fun i file -> 27 | if i <> 0 then try parse file with exn -> fprintf stderr "Failed to parse %s : %s\n%!" file (Printexc.to_string exn) 28 | end 29 | -------------------------------------------------------------------------------- /measure.ml: -------------------------------------------------------------------------------- 1 | (** 2 | Simple module to measure code speed and allocation rate 3 | 4 | http://ygrek.org.ua/p/code/measure.ml 5 | 2011-08-08 6 | 7 | This is free and unencumbered software released into the public domain. 8 | For more information, please refer to 9 | *) 10 | 11 | open Printf 12 | open Gc 13 | 14 | let bytes_f f = (* oh ugly *) 15 | let a = abs_float f in 16 | if a < 1024. then sprintf "%dB" (int_of_float f) else 17 | if a < 1024. *. 1024. then sprintf "%dKB" (int_of_float (f /. 1024.)) else 18 | if a < 1024. *. 1024. *. 1024. then sprintf "%.1fMB" (f /. 1024. /. 1024.) else 19 | sprintf "%.1fGB" (f /. 1024. /. 1024. /. 1024.) 20 | 21 | let bytes x = bytes_f (float_of_int x) 22 | let bytes_64 x = bytes_f (Int64.to_float x) 23 | 24 | let words_f f = 25 | bytes_f (f *. (float_of_int (Sys.word_size / 8))) 26 | 27 | let words x = words_f (float_of_int x) 28 | 29 | let gc_diff st1 st2 = 30 | let allocated st = st.minor_words +. st.major_words -. st.promoted_words in 31 | let a = allocated st2 -. allocated st1 in 32 | let minor = st2.minor_collections - st1.minor_collections in 33 | let major = st2.major_collections - st1.major_collections in 34 | let compact = st2.compactions - st1. compactions in 35 | let heap = st2.heap_words - st1.heap_words in 36 | Printf.sprintf "allocated %7s, heap %7s, collect %2d %4d %5d" (words_f a) (words heap) compact major minor 37 | 38 | let measure f x = 39 | Gc.compact (); 40 | let st = Gc.quick_stat () in 41 | let t1 = Unix.gettimeofday () in 42 | let () = f x in 43 | let t2 = Unix.gettimeofday () in 44 | let st2 = Gc.quick_stat () in 45 | sprintf "%s, elapsed %.3f sec" (gc_diff st st2) (t2 -. t1) 46 | 47 | let show name f x = 48 | printf "%12s : %s\n%!" name (measure f x) 49 | 50 | (** 51 | Example usage: 52 | 53 | let src = "We should forget about small efficiencies, say about 97% of the time: premature optimization is the root of all evil" 54 | 55 | let run f = for i = 1 to 1_000_000 do ignore (f () : string) done 56 | let scanf () = Scanf.sscanf src "%s@:" (fun s -> s) 57 | let sub () = String.sub src 0 (String.index src ':') 58 | 59 | let () = 60 | Measure.show "scanf" run scanf; 61 | Measure.show "sub" run sub; 62 | () 63 | 64 | Compile: 65 | 66 | ocamlopt unix.cmxa measure.ml bench.ml -o bench 67 | 68 | Result: 69 | 70 | scanf : allocated 2.2GB, heap 0B, collect 0 1419 8888, elapsed 2.072 sec 71 | sub : allocated 76.3MB, heap 0B, collect 0 0 305, elapsed 0.162 sec 72 | 73 | *) 74 | -------------------------------------------------------------------------------- /memory.pikchr: -------------------------------------------------------------------------------- 1 | 2 | scale *= 1.5 3 | fontscale = 0.8 4 | 5 | B1: box width 3cm height 3cm thin color gray 6 | B2: box same 7 | B3: box same with .ne at previous.sw 8 | B4: box same 9 | 10 | T1: box invisible "Inactive" fit at B1.center 11 | T2: box same "Active" at B2.center 12 | T3: box same "MemFree" at B3.center 13 | T4: box same "Mapped" at B4.center 14 | 15 | move right 0.05 from T1.s 16 | arrow down even with T3.n chop "reclaim" aligned above 17 | move left 0.05 from T3.n 18 | arrow up even with T1.s chop "malloc" aligned above 19 | 20 | //arrow from T2.s+(0.1,0) to T4.n+(0.1,0) chop "reclaim" aligned above 21 | //arrow from T4.n-(0.1,0) to T2.s-(0.1,0) chop "touch" aligned above 22 | 23 | move up 0.05 from T1.e 24 | arrow right even with T2.w chop "touch" aligned above 25 | move down 0.05 from T1.e 26 | arrow same <- chop "MADV_FREE" below 27 | 28 | arrow from T3.e+(0,0.05) right even with T4.w chop "mmap" aligned above 29 | arrow same <- from T3.e-(0,0.05) chop "munmap" below 30 | 31 | arrow from 0.1 left of T3.ne to T2.sw chop "calloc" aligned rjust above 32 | arrow same <- from T3.ne "free" aligned below 33 | 34 | line invisible from B1.sw to B1.nw "allocated" above aligned 35 | line invisible from B3.sw to B3.nw "free" above aligned 36 | line invisible from B3.sw-(0.5cm,0) to B1.nw-(0.5cm,0) "userspace" above aligned 37 | "free" below at B3.s 38 | "mapped" below at B4.s 39 | line invisible from B3.sw to B4.se 40 | "kernel" at 0.5cm below previous 41 | -------------------------------------------------------------------------------- /memsum: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | # 3 | # Quick overview of memory usage by process groups 4 | # 5 | # This is free and unencumbered software released into the public domain. 6 | # For more information, please refer to 7 | 8 | ps h -eo pid,ppid,rss,args \ 9 | | awk '{ 10 | cmd = $4 11 | for (i=5;i<=NF;i++) { cmd = cmd " " $i } # reconstruct cmd 12 | name[$1] = cmd ; # save cmd for pid 13 | total += $3 14 | if ($2>10) { acc[$2] += $3 } # group rss by parent pid 15 | else { acc[cmd] += $3 } # for toplevel processes - group rss by cmd 16 | } 17 | END { 18 | shown = 0 19 | for (i in acc) { 20 | mem = acc[i]/1024/1024; 21 | if (mem > 1) { printf("%.1fG %s\n",mem,(i in name) ? name[i] : i); shown += acc[i]; } 22 | }; 23 | printf("%.1fG TOTAL (%.1fG shown)",total/1024/1024, shown/1024/1024) 24 | }' \ 25 | | sort -nk1 26 | -------------------------------------------------------------------------------- /mlvalues.py: -------------------------------------------------------------------------------- 1 | # Inspect OCaml heap and values in GDB 2 | # 2016/12/12 3 | # 4 | # https://github.com/ygrek/scraps/blob/master/mlvalues.py 5 | # (c) 2011 ygrek 6 | # (c) 2016 Amplidata, a HGST company. 7 | # 8 | # Licensed under the terms of BSD 3-clause 9 | # 10 | # Description 11 | # ----------- 12 | # 13 | # You will need GDB with python3. 14 | # This code reimplements Std.dump from extlib and is 15 | # limited to information available in runtime representation 16 | # of OCaml values. Not complete, doesn't handle cycles at all, 17 | # tested only on x64. 18 | # 19 | # At GDB prompt input: 20 | # source mlvalues.py 21 | # 22 | # And inspect any OCaml value: 23 | # ml_dump [/r[N]] # address or symbol representing OCaml value 24 | # optional /r flag controls the recursion depth limit when printing (default 1). 25 | # e.g.: 26 | # ml_dump caml_globals 27 | # ml_dump /r &camlList 28 | # 29 | # Or inspect an array of values: 30 | # ml_dump [/r[N]] 31 | # e.g.: 32 | # ml_dump 1 # inspecting single pointer to value 33 | # ml_dump gray_vals 5 34 | # 35 | # Inspect local (stack) GC roots: 36 | # ml_dump [/r[N]] caml_local_roots 37 | # 38 | # Show OCaml heap information: 39 | # ml_heap 40 | # 41 | # Scan OCaml heap region: 42 | # ml_scan [/r[N]] addr bytes 43 | # 44 | # Validate the OCaml heap: 45 | # ml_validate 46 | # This process can take a while and scans the entire heap, trying to 47 | # validate all values (e.g. string sizes, overflows, underflows, 48 | # stray pointers, etc.). Use it to find issues with the garbage 49 | # collector or OCaml run-time. 50 | # 51 | # Show a particular OCaml value, alternative to ml_dump: 52 | # ml_show [/r[] [verbosity] 53 | # Verbosity currently ranges from 0-3, with default being 1. 54 | # 0: Either type of value or error reason 55 | # 1: Type and value for simple types 56 | # 2: Type, value and sub-values for short aggregate/composite types 57 | # (Tries not to blow up on your screen) 58 | # 3: Type, value and sub-values of everything 59 | # OCaml lists are not detected, a higher recursion depth will 60 | # be needed to print it out entirely. 61 | # 62 | # Show memory address space info with OCaml specific-bits: 63 | # ml_target [verbosity] 64 | # This command is similar to "info target" but shows OCaml-specifics. 65 | # Verbosity is only valid for "all", values are 0-2, default is 1: 66 | # 0: OCaml stack, minor & major heap are displayed 67 | # 1: Same as 0 and static data, code and known GC metadata areads 68 | # 2: all memory types are displayed 69 | # e.g. 70 | # ml_target 0x00035208 71 | # ml_target all 3 72 | # 73 | # Find a particular value in memory: 74 | # ml_find 75 | # Similar to the gdb "find" command, but will also display the memory 76 | # type (see ml_target) where the value was found, making it easier to 77 | # find the right occurrence. 78 | # 79 | # Use Python class directly for detailed scrutiny, e.g.: 80 | # python x = OCamlValue(gdb.parse_and_eval("caml_globals")); print x.size_words() 81 | # 82 | # Changelog 83 | # --------- 84 | # 85 | # 2016-07-28 86 | # Add ml_validate, ml_show, ml_target and a host of utility code (by Amplidata) 87 | # 88 | # 2016-01-17 89 | # New command `ml_scan` to show values in OCaml heap 90 | # 91 | # 2015-10-22 92 | # Truncate long strings when printing 93 | # 94 | # 2014-04-29 95 | # Limit recursion depth when printing 96 | # 97 | # 2014-03-31 98 | # Be precise with type when reading runtime variables 99 | # 100 | # 2013-08-25 101 | # Use `cast` instead of `reinterpet_cast` for better compatibility (by ADEpt) 102 | # Better handling of missing debuginfo in OCaml runtime 103 | # Add usage examples 104 | # 105 | # 2013-08-01 106 | # Dump local (stack) GC roots with `ml_dump local_roots` 107 | # 108 | # 2012-08-09 109 | # Fix printing of strings with binary data (use 'latin1' codec) 110 | # 111 | # 2012-04-11 112 | # Dump the array of OCaml values with ml_dump 113 | # Inspect closure blocks 114 | # Tweak formatting 115 | # 116 | # 2012-03-07 117 | # New command `ml_heap` - shows some general information about OCaml heap: 118 | # * GC parameters 119 | # * total heap size 120 | # * allocated chunks layout 121 | # 122 | # 2011-12-27 123 | # Show symbol and name for custom values 124 | # Catch gdb.MemoryError and continue printing 125 | # Correctly lookup types 126 | # 127 | # 2011-12-24 128 | # Initial 129 | 130 | import functools 131 | import traceback 132 | import collections 133 | import bisect 134 | import struct 135 | import ctypes 136 | from enum import Enum 137 | 138 | def TraceMemoryError(f): 139 | """ 140 | Attribute that wraps the function in order to display a traceback 141 | when accessing invalid memory 142 | """ 143 | def wrapper(*args, **kwargs): 144 | try: 145 | return f(*args, **kwargs) 146 | except gdb.MemoryError as me: 147 | print("Function %s attempted to access invalid memory" % f.__name__) 148 | traceback.print_exc() 149 | raise 150 | return functools.update_wrapper(wrapper, f) 151 | 152 | def TraceAll(f): 153 | """ 154 | Attribute that wraps the function in order to display a traceback 155 | """ 156 | def wrapper(*args, **kwargs): 157 | try: 158 | return f(*args, **kwargs) 159 | except: 160 | traceback.print_exc() 161 | raise 162 | return functools.update_wrapper(wrapper, f) 163 | 164 | def try_dereference(gdbval): 165 | """ 166 | Attempt to dereference a gdb.Value returning None when the access fails. 167 | """ 168 | try: 169 | ret = gdbval.dereference() 170 | ret.fetch_lazy() 171 | return ret 172 | except gdb.MemoryError: 173 | return None 174 | 175 | def print_cont(*args, **kwargs): 176 | kwargs["end"] = '' 177 | print(*args, **kwargs) 178 | 179 | # gdb.Type's used often throughout the script 180 | intnat = size_t = charp = doublep = heap_chunk_head_p = caml_contextp = caml_thread_structp = None 181 | 182 | # do not lookup types at class level cause this script may be run before 183 | # the inferior image is loaded and gdb can't know sizeof(long) in advance 184 | def init_types(): 185 | global intnat, size_t, charp, doublep, heap_chunk_head_p, caml_contextp, caml_thread_structp 186 | 187 | if doublep is not None: 188 | return 189 | 190 | try: 191 | heap_chunk_head_p = gdb.lookup_type("heap_chunk_head").pointer() 192 | except: 193 | print("Didn't find 'heap_chunk_head'. Major heap walking is unavailable") 194 | pass 195 | 196 | try: 197 | caml_contextp = gdb.lookup_type("struct caml_context").pointer() 198 | except: 199 | print("Didn't find 'struct caml_context'. Stack walking unavailable.") 200 | pass 201 | 202 | try: 203 | caml_thread_structp = gdb.lookup_type("caml_thread_struct").pointer() 204 | except: 205 | print("Didn't find 'caml_thread_struct'. System thread roots unavailable.") 206 | pass 207 | 208 | intnat = gdb.lookup_type("intnat") 209 | size_t = gdb.lookup_type("size_t") 210 | charp = gdb.lookup_type("char").pointer() 211 | doublep = gdb.lookup_type("double").pointer() 212 | # keep this one last 213 | 214 | def get_value_safe(name, type=None): 215 | try: 216 | x = gdb.parse_and_eval(name) 217 | if type is not None: 218 | x = x.cast(type) 219 | return x 220 | except gdb.error: 221 | return None 222 | 223 | class MemoryType(Enum): 224 | """ 225 | Various types of memory we're interested in 226 | """ 227 | General = 0 228 | Stack = 1 229 | MajorHeap = 2 230 | MinorHeap = 3 231 | Code = 4 232 | StaticData = 5 233 | OtherStatic = 6 234 | Finalisers = 7 235 | GC_Metadata = 8 236 | LinuxSpecial = 9 237 | Unknown = 10 238 | 239 | Address = 100 240 | 241 | @classmethod 242 | def all(cls): 243 | return [ cls.General, cls.Stack, cls.MajorHeap, cls.MinorHeap, cls.Code, cls.StaticData, cls.OtherStatic, cls.Finalisers, cls.GC_Metadata, cls.LinuxSpecial, cls.Unknown] 244 | 245 | @functools.total_ordering 246 | class MemoryRange: 247 | """ 248 | A range of memory inside the process' memory address space 249 | """ 250 | def __init__(self, address, size, source, description, memtype=None): 251 | """ 252 | Args: 253 | address, size: describe memory range 254 | source: where this memory range was defined (e.g. a file, coredump or gdb) 255 | description: describes the contents of this memory range 256 | memtype: a MemoryType parameter if the contents are known 257 | or None otherwise. In this case this class will attempt 258 | to deduce the MemoryType from the description, following 259 | some standard gdb names 260 | """ 261 | # We might be passed gdb.Value's, so convert them to native int's to reduce memory footprint 262 | self.startaddr = int(address) 263 | self.size = int(size) 264 | self.source = source 265 | self.description = description 266 | if memtype is None: 267 | self.memtype = MemoryRange._determine_memtype(description) 268 | else: 269 | self.memtype = memtype 270 | 271 | @staticmethod 272 | def _determine_memtype(description): 273 | """ 274 | Determine the type of memory from the description 275 | This function knows most of the descriptions returned by 'info target' and 'info proc mappings' 276 | and will deduce the actual memory type based on that. 277 | """ 278 | if description.startswith('.'): 279 | t = description.split()[0] 280 | if t in [ ".data", ".bss", ".rodata" ]: 281 | return MemoryType.StaticData 282 | elif t == ".text": 283 | return MemoryType.Code 284 | else: 285 | return MemoryType.OtherStatic 286 | elif description.startswith('['): 287 | if description == "[stack]": 288 | return MemoryType.Stack 289 | elif description in [ "[vdso]", "[vsyscall]", "[vvar]" ]: 290 | return MemoryType.LinuxSpecial 291 | elif description == "[heap]": 292 | return MemoryType.General 293 | else: 294 | return MemoryType.Unknown 295 | elif description.startswith("load"): 296 | return MemoryType.General 297 | elif "libc" in description: 298 | # C library is known to have some odd-named sections 299 | return MemoryType.OtherStatic 300 | elif description == "text_env": 301 | return MemoryType.OtherStatic 302 | else: 303 | return MemoryType.Unknown 304 | 305 | @staticmethod 306 | def from_addr(address): 307 | """Create a single-address MemoryRange""" 308 | return MemoryRange(address, size_t.sizeof, "MemorySpace.from_addr", "address", MemoryType.Address) 309 | 310 | @staticmethod 311 | def part_of(other, start, size): 312 | """Create a MemoryRange that spans a chunk of the provided MemoryRange""" 313 | return MemoryRange(start, size, other.source, other.description, other.memtype) 314 | 315 | @property 316 | def lastaddr(self): 317 | """Last valid address in this range""" 318 | return self.startaddr + self.size - 1 319 | 320 | @property 321 | def endaddr(self): 322 | """Post-end address of this range""" 323 | return self.startaddr + self.size 324 | 325 | def __contains__(self, address): 326 | return self.startaddr <= address < self.endaddr 327 | 328 | def _overlaps(self, other): 329 | return self.startaddr <= other.startaddr < self.endaddr \ 330 | or other.startaddr <= self.startaddr < self.endaddr 331 | 332 | def __eq__(self, other): 333 | return isinstance(other, MemoryRange) and self.startaddr == other.startaddr 334 | 335 | def __lt__(self, other): 336 | return isinstance(other, MemoryRange) and self.startaddr < other.startaddr 337 | 338 | def settype(self, memtype, description=None): 339 | """ 340 | Override the memory type and, optionally, the description 341 | """ 342 | self.memtype = memtype 343 | if description is not None: 344 | self.description = description 345 | 346 | def __str__(self): 347 | return "0x%08X - 0x%08X is %15s|%s" % (self.startaddr, self.startaddr + self.size, self.memtype.name, self.description) 348 | 349 | 350 | class MemorySpace: 351 | """Describes the inferior process' memory address space/layout""" 352 | def __init__(self): 353 | self.have_accurate_info = True 354 | self.populate_ranges() 355 | self.annotate_stacks() 356 | self.annotate_major_heap() 357 | self.annotate_minor_heap() 358 | self.annotate_finalisers() 359 | 360 | def set_inaccurate(self, missing): 361 | print("Some debug information is missing from the binary: %s . Not all functionality is available" % missing) 362 | self.have_accurate_info = False 363 | 364 | def display(self, verbosity): 365 | """ 366 | Pretty print the memory address space with varying verbosity levels: 367 | 0: only OCaml Stack and Minor/Major Heap areas are displayed 368 | 1: In addition, static data, code and known GC metadata areas are displayed 369 | 2: all memory types are displayed 370 | """ 371 | if verbosity == 0: 372 | interesting = [ MemoryType.Stack, MemoryType.MinorHeap, MemoryType.MajorHeap ] 373 | elif verbosity == 1: 374 | interesting = [ MemoryType.Stack, MemoryType.MinorHeap, MemoryType.MajorHeap, 375 | MemoryType.StaticData, MemoryType.Code, MemoryType.GC_Metadata ] 376 | else: 377 | interesting = MemoryType.all() 378 | 379 | for r in self.ranges: 380 | if r.memtype in interesting: 381 | print("%s" % str(r)) 382 | 383 | def get_range(self, address): 384 | """Get the memory range containing the provided address or None otherwise.""" 385 | index = bisect.bisect(self.ranges, MemoryRange.from_addr(address)) 386 | if index >= len(self.ranges): 387 | return None 388 | memrange = self.ranges[index-1] 389 | if address in memrange: 390 | return memrange 391 | return None 392 | 393 | def is_address_of_type(self, address, *memtypes): 394 | """Return True of the address is contained in a memory range of one of the provided types""" 395 | memrange = self.get_range(address) 396 | return memrange is not None and memrange.memtype in memtypes 397 | 398 | def is_on_stack(self, address): 399 | """Indicate whether the address is on one of the inferior threads' stack""" 400 | return self.is_address_of_type(address, MemoryType.Stack) 401 | 402 | def is_in_heap(self, address): 403 | """Indicate whether the address points to data in the OCaml heap""" 404 | return self.is_address_of_type(address, MemoryType.MajorHeap, MemoryType.MinorHeap) 405 | 406 | # Beware, on some architectures data is sometimes stored intermingled with code in the .text section 407 | # Typically this is after the return instruction from a function 408 | def is_valid_data(self, address): 409 | """Indicate whether the address points to a memory range known to contain data""" 410 | return self.is_address_of_type(address, 411 | MemoryType.MajorHeap, MemoryType.MinorHeap, 412 | MemoryType.StaticData, MemoryType.Stack, 413 | MemoryType.Finalisers) 414 | 415 | def is_code(self, address): 416 | """Indicate whether the address points to a memory range containing code""" 417 | return self.is_address_of_type(address, MemoryType.Code) 418 | 419 | def search_memory_of_types(self, pattern, *memtypes): 420 | """Search all memory of the given types for the provided pattern. 421 | The pattern must adhere to the buffer interface, the simplest 422 | way to create this is probably struct.pack(...).""" 423 | inferior = gdb.selected_inferior() 424 | locations = [] 425 | for memrange in self.ranges: 426 | if memrange.memtype not in memtypes: 427 | continue 428 | 429 | loc = ctypes.c_void_p(memrange.startaddr).value 430 | end = ctypes.c_void_p(memrange.endaddr).value 431 | while loc < end: 432 | loc = inferior.search_memory(loc, end - loc, pattern) 433 | if loc is None or loc == 0: 434 | loc = end 435 | else: 436 | locations.append(loc) 437 | loc += size_t.sizeof 438 | 439 | return locations 440 | 441 | # TODO: make this function truncate the existing ranges, rather than delete them 442 | # It will allow annotate_split_range to keep return value over multiple splits 443 | def split_range_at(self, address): 444 | """Split a memory range at the provided address and returns both new ranges.""" 445 | index = bisect.bisect(self.ranges, MemoryRange.from_addr(address)) 446 | index -= 1 447 | # address is before any existing ranges 448 | if index < 0: 449 | return None, None 450 | 451 | memrange = self.ranges[index] 452 | # address points to the beginning of an existing range 453 | if memrange.startaddr == address: 454 | previous = None if index == 0 else self.ranges[index-1] 455 | return previous, memrange 456 | 457 | # address inside the memory range 458 | if address in memrange: 459 | first = MemoryRange(memrange.startaddr, address - memrange.startaddr, memrange.source, memrange.description, memrange.memtype) 460 | second = MemoryRange(address, memrange.startaddr + memrange.size - address, memrange.source, memrange.description, memrange.memtype) 461 | 462 | del self.ranges[index] 463 | bisect.insort(self.ranges, first) 464 | bisect.insort(self.ranges, second) 465 | return first, second 466 | 467 | # address (right) after the memory range and not contained by another memory range 468 | previous = memrange if address == memrange.endaddr else None 469 | return previous, None 470 | 471 | def tentative_add_range(self, memrange): 472 | """Add a memory range, leaving any existing overlaps untouched, yet filling holes where necessary""" 473 | # optimize the easy case. makes rest of code simpler 474 | if not len(self.ranges): 475 | bisect.insort(self.ranges, memrange) 476 | return 477 | 478 | probeaddr = memrange.startaddr 479 | while probeaddr < memrange.endaddr: 480 | index = bisect.bisect(self.ranges, MemoryRange.from_addr(probeaddr)) 481 | 482 | # before first 483 | if index == 0: 484 | nxt = self.ranges[index] 485 | lastaddr = nxt.startaddr 486 | bisect.insort(self.ranges, MemoryRange.part_of(memrange, probeaddr, lastaddr - probeaddr)) 487 | probeaddr = nxt.endaddr 488 | continue 489 | 490 | # after last 491 | if index >= len(self.ranges): 492 | prev = self.ranges[index-1] 493 | startaddr = prev.endaddr 494 | if startaddr <= probeaddr: 495 | bisect.insort(self.ranges, MemoryRange.part_of(memrange, probeaddr, memrange.endaddr - probeaddr)) 496 | probeaddr = memrange.endaddr 497 | else: 498 | probeaddr = startaddr 499 | continue 500 | 501 | # in between 2 502 | prev = self.ranges[index-1] 503 | if probeaddr in prev: 504 | probeaddr = prev.endaddr 505 | continue 506 | 507 | nxt = self.ranges[index] 508 | if nxt.startaddr in memrange: 509 | bisect.insort(self.ranges, MemoryRange.part_of(memrange, probeaddr, nxt.startaddr - probeaddr)) 510 | probeaddr = nxt.endaddr 511 | else: 512 | bisect.insort(self.ranges, MemoryRange.part_of(memrange, probeaddr, memrange.endaddr - probeaddr)) 513 | probeaddr = memrange.endaddr 514 | 515 | def annotate_split_range(self, address, size, memtype, description): 516 | """Annotate an existing range (or part thereof) as the specified MemoryType, splitting the range where necessary""" 517 | _, _ = self.split_range_at(address) # do not keep return values, following call may delete it from self.ranges 518 | end, _ = self.split_range_at(address+size) 519 | begin = self.get_range(address) 520 | 521 | begin.settype(memtype, description) 522 | if end != begin: 523 | print("Annotating '%s' over 2 separate ranges: %s and %s" % (description, str(begin), str(end))) 524 | # TODO: merge the two 525 | end.settype(memtype, description) 526 | 527 | def populate_ranges(self,): 528 | """Populate the memory ranges from coredump info or live gdb information""" 529 | self.ranges = list() 530 | # coredump: info target shows all sections in full detail 531 | # live debug: only file-backed sections are shown 532 | targetinfo = gdb.execute("info target", False, True) 533 | for line in targetinfo.splitlines(): 534 | line = line.strip() 535 | if line.startswith('`'): 536 | line = line.split("'")[1] 537 | source = line[1:] 538 | continue 539 | if not line.startswith("0x"): 540 | continue 541 | 542 | start, dash, end, str_is, memtype = line.split(maxsplit=4) 543 | assert(dash == '-' and str_is == 'is') 544 | start = int(start, 16) 545 | end = int(end, 16) 546 | new_range = MemoryRange(start, end-start, source, memtype) 547 | startoverlap = self.get_range(start) 548 | endoverlap = self.get_range(end) 549 | 550 | if endoverlap == startoverlap: 551 | endoverlap = None 552 | 553 | #TODO: splitup and punch holes/replace 554 | if memtype.startswith('.'): 555 | # gdb reports loadXXX sections on top of file-backed sections of the binary 556 | # probably because the kernel maps writeable pages on top of them 557 | # Therefore, keep the more accurate description from the file-backed section 558 | if startoverlap is not None and startoverlap.memtype == MemoryType.General: 559 | previous, current = self.split_range_at(start) 560 | self.ranges.remove(current) 561 | startoverlap = None 562 | if endoverlap is not None and endoverlap.memtype == MemoryType.General: 563 | current, end = self.split_range_at(end) 564 | self.ranges.remove(current) 565 | endoverlap = None 566 | 567 | if startoverlap is not None and endoverlap is not None: 568 | print("Overlapping memory ranges: %s in %s -> %s" % 569 | (new_range, str(startoverlap), str(endoverlap))) 570 | bisect.insort(self.ranges, new_range) 571 | 572 | # live target: run-time allocated memory and some file-backed sections 573 | # There typically is overlap with the 'info target' output, so give precedence 574 | # to the previously added ranges 575 | mappinginfo = gdb.execute("info proc mappings", False, True) 576 | for line in mappinginfo.splitlines(): 577 | line = line.strip() 578 | if not line.startswith("0x"): 579 | continue 580 | 581 | items = line.split() 582 | if len(items) == 4: 583 | start, end, size, offset = items 584 | source = "unknown" 585 | elif len(items) == 5: 586 | start, end, size, offset, source = items 587 | else: 588 | print("Unexpected line when parsing 'info proc mappings': %s" % line) 589 | continue 590 | 591 | start = int(start, 16) 592 | size = int(size, 16) 593 | end = int(end, 16) 594 | 595 | new_range = MemoryRange(start, size, source, source) 596 | self.tentative_add_range(new_range) 597 | 598 | def annotate_stacks(self): 599 | """ 600 | Mark all memoryranges containing thread stacks as such. 601 | We do this by taking the stack pointer of each thread 602 | and marking the target address' memory range. 603 | There typically are guard pages around stacks, they will 604 | not be marked as stack. 605 | """ 606 | curthread = gdb.selected_thread() 607 | try: 608 | for thread in gdb.selected_inferior().threads(): 609 | thread.switch() 610 | 611 | # This is different depending on gdb version 612 | try: 613 | frame = gdb.newest_frame() 614 | stackpointer = frame.read_register("sp") 615 | except: 616 | regname, as_hex, as_int = gdb.execute("info register sp", False, True).split() 617 | stackpointer = int(as_hex, 16) 618 | memrange = self.get_range(stackpointer) 619 | tid = thread.ptid[1] if thread.ptid[1] else thread.ptid[2] 620 | if memrange is None: 621 | print("Did not find stack of thread %d" % tid) 622 | continue 623 | memrange.settype(MemoryType.Stack, "Stack of thread %d(TID %d)" % (thread.num, tid)) 624 | finally: 625 | curthread.switch() 626 | 627 | def annotate_major_heap(self): 628 | """ 629 | Mark all memory ranges containing OCaml stack as such. 630 | Memory ranges are split when needed to avoid marking padding as actual heap. 631 | """ 632 | # TODO: we could provide a fallback path by manually taking the proper bytes as 633 | # the ml_heap command does 634 | if heap_chunk_head_p is None: 635 | self.set_inaccurate("major heap info") 636 | return 637 | 638 | heap_chunk_ptr = get_value_safe("caml_heap_start", heap_chunk_head_p) 639 | try: 640 | while heap_chunk_ptr is not None and heap_chunk_ptr != 0: 641 | heap_chunk_head_ptr = heap_chunk_ptr - 1 642 | heap_chunk_head = heap_chunk_head_ptr.dereference() 643 | 644 | block = heap_chunk_head["block"] 645 | size = heap_chunk_head["size"] 646 | 647 | memrange = self.get_range(heap_chunk_head_ptr) 648 | if memrange is not None: 649 | self.annotate_split_range(heap_chunk_ptr.cast(size_t), size, MemoryType.MajorHeap, "Major heap") 650 | else: 651 | new_range = MemoryRange(heap_chunk_ptr.cast(size_t), size, "gdb", "Major Heap", MemoryType.MajorHeap) 652 | assert(false) # This shouldn't happen 653 | self.tentative_add_range(new_range) 654 | 655 | heap_chunk_ptr = heap_chunk_head["next"].cast(heap_chunk_head_p) 656 | except gdb.MemoryError: 657 | print("OCaml major heap linked list is corrupt: last entry = 0x%08X" % (int(heap_chunk_ptr.cast(size_t)))) 658 | 659 | gray_vals = get_value_safe("gray_vals", size_t) 660 | gray_vals_cur = get_value_safe("gray_vals_cur", size_t) 661 | gray_vals_size = get_value_safe("gray_vals_size", size_t) 662 | gray_vals_end = get_value_safe("gray_vals_end", size_t) 663 | if gray_vals is not None and gray_vals_size is not None: 664 | self.annotate_split_range(gray_vals, gray_vals_size, MemoryType.GC_Metadata, "major GC's gray values") 665 | if gray_vals_cur is not None and gray_vals_end is not None: 666 | self.annotate_split_range(gray_vals_cur, gray_vals_end - gray_vals_cur, MemoryType.GC_Metadata, "major GC's current gray values") 667 | 668 | def annotate_minor_heap(self): 669 | """ 670 | Mark the minor heap memory range as such. 671 | """ 672 | minor_start = get_value_safe("caml_young_base", size_t) 673 | minor_end = get_value_safe("caml_young_end", size_t) 674 | if minor_start is None or minor_end is None: 675 | return 676 | minor_size = minor_end - minor_start 677 | 678 | memrange = self.get_range(minor_start) 679 | if memrange is not None: 680 | self.annotate_split_range(minor_start, minor_size, MemoryType.MinorHeap, "Minor heap") 681 | else: 682 | new_range = MemoryRange(minor_start, minor_size, "gdb", "Minor Heap", MemoryType.MinorHeap) 683 | self.set_inaccurate("minor heap memory map info") 684 | bisect.insort(self.ranges, new_range) 685 | 686 | def annotate_finalisers(self): 687 | """ 688 | Mark the table of finalisers as such. 689 | """ 690 | table = get_value_safe("final_table", size_t) 691 | size = get_value_safe("'finalise.d.c'::size", size_t) 692 | if size is None: 693 | size = get_value_safe("'finalise.c'::size", size_t) 694 | 695 | if table is None or size is None: 696 | self.set_inaccurate("finalisers") 697 | return 698 | 699 | if table != 0 and size != 0: 700 | self.annotate_split_range(table, size, MemoryType.Finalisers, "Finalisers table") 701 | 702 | memoryspace = None 703 | 704 | def init_memoryspace(reload=False): 705 | """Load memory space information from the inferior process.""" 706 | global memoryspace 707 | if memoryspace is not None and not reload: 708 | return 709 | 710 | try: 711 | memoryspace = MemorySpace() 712 | except: 713 | traceback.print_exc() 714 | raise 715 | 716 | def resolve(address): 717 | """Resolve an address to a symbol (function/variable name).""" 718 | symbol = gdb.execute("info symbol 0x%08X" % int(address.cast(size_t)), False, True).split(" ",1)[0] 719 | if symbol == "No": # FIXME "No symbol matches" 720 | return "0x%08X" % int(address.cast(size_t)) 721 | else: 722 | return "%s" % symbol 723 | 724 | # This class represents gdb.Value as OCaml value. 725 | # Analogue to stdlib Obj module. 726 | # 727 | # It probably contains more casts than strictly necessary, 728 | # but after all who am I to understand python type system? 729 | # Just a mere OCaml coder. And this is my first python program. 730 | class OCamlValue: 731 | 732 | VALUE_TAG = 0 733 | LAZY_TAG = 246 734 | CLOSURE_TAG = 247 735 | OBJECT_TAG = 248 736 | INFIX_TAG = 249 737 | FORWARD_TAG = 250 738 | NO_SCAN_TAG = 251 739 | ABSTRACT_TAG = 251 740 | STRING_TAG = 252 741 | DOUBLE_TAG = 253 742 | DOUBLE_ARRAY_TAG = 254 743 | CUSTOM_TAG = 255 744 | FINAL_TAG = 255 745 | INT_TAG = 1000 746 | OUT_OF_HEAP_TAG = 1001 747 | UNALIGNED_TAG = 1002 748 | 749 | VALID_TAGS = (VALUE_TAG, LAZY_TAG, CLOSURE_TAG, OBJECT_TAG, INFIX_TAG, FORWARD_TAG, NO_SCAN_TAG, ABSTRACT_TAG, STRING_TAG, 750 | DOUBLE_TAG, DOUBLE_ARRAY_TAG, CUSTOM_TAG) 751 | 752 | def __init__(self,v, parent=None, parentindex=None): 753 | if isinstance(v, OCamlValue): 754 | self.v = v.v 755 | elif not isinstance(v, gdb.Value): 756 | self.v = gdb.Value(v).cast(intnat) 757 | else: 758 | self.v = v.cast(intnat) 759 | self.parent = parent 760 | self.parentindex = parentindex 761 | if parent is not None and parentindex is None: 762 | raise Exception("If a parent is specified, the parentindex is also expected") 763 | 764 | def is_int(self): 765 | """Indicate whether the OCamlValue is an integer.""" 766 | return self.v & 1 != 0 767 | 768 | def is_block(self): 769 | """Indicate whether the OCamlValue is a pointer to an OCaml block.""" 770 | return self.v & 1 == 0 771 | 772 | @staticmethod 773 | def of_int(x): 774 | return OCamlValue(gdb.Value((x<<1) + 1)) 775 | 776 | @staticmethod 777 | def of_val(x): 778 | assert(x & 1 == 0) 779 | return OCamlValue(gdb.Value(x)) 780 | 781 | @staticmethod 782 | def of_bool(x): 783 | assert(x & (~1) == 0) 784 | return OCamlValue.of_int(x != 0) 785 | 786 | def int(self): 787 | """Get the integer value of this instance. Must only be called if it is an int.""" 788 | assert(self.is_int()) 789 | return self.v >> 1 790 | 791 | def val(self): 792 | """Get the gdb.Value of this instance.""" 793 | return self.v 794 | 795 | def _string(self,enc='latin1'): 796 | assert self.tag() == OCamlValue.STRING_TAG 797 | 798 | byte_size = self.size_bytes() 799 | if byte_size is None: 800 | return "Invalid string: could not determine string size: value outside the heap: value = 0x%X" % self.v 801 | 802 | padsize_byte = (self.v + (byte_size - 1)).cast(charp) 803 | padsize = try_dereference(padsize_byte) 804 | if padsize is None: 805 | return "Invalid string: pad byte not in valid memory. value=0x%X, size in bytes=%d, pad byte: 0x%X" % (self.v, byte_size, padsize_byte) 806 | 807 | slen = byte_size - 1 - padsize 808 | trailing_nul = try_dereference((self.v + slen).cast(charp)) 809 | assert(trailing_nul is not None) # we shouldn't get here if we could dereference padsize above 810 | if trailing_nul != 0: 811 | return "Invalid string: no NUL-byte at end of string. value=0x%X, size in bytes=%d, last byte=%d" % (self.v, byte_size, trailing_nul) 812 | 813 | if slen <= 1024: 814 | s = self.v.cast(charp).string(enc, 'ignore', slen) 815 | return s.__repr__() 816 | else: 817 | s = self.v.cast(charp).string(enc, 'ignore', 256) 818 | return "%s..<%d bytes total>" % (s.__repr__(), slen) 819 | 820 | def _float(self): 821 | assert self.tag() == OCamlValue.DOUBLE_TAG 822 | words = self.size_words() 823 | if words != doublep.target().sizeof: # Don't check for None, the assert already implies it 824 | return "Invalid float: size=%d" % words 825 | f = try_dereference(self.v.cast(doublep)) 826 | assert(f is not None) # This is quite unlikely, unless v is outside the heap, while the header is inside 827 | return "%f" % f 828 | 829 | def __cmp__(self,other): 830 | if self.v == other.v: 831 | return 0 832 | else: 833 | if self.v > other.v: 834 | return 1 835 | else: 836 | return -1 837 | 838 | def __str__(self): 839 | if self.is_int(): 840 | return ("%d" % self.int()) 841 | else: 842 | return ("0x%X" % self.val()) 843 | 844 | def __repr__(self): 845 | return "OCamlValue(%s)" % self.__str__() 846 | 847 | def hd(self): 848 | """Get the header value or None if inaccessible. Must only be called on a block.""" 849 | header = (self.v - intnat.sizeof).cast(size_t.pointer()) 850 | return try_dereference(header) 851 | 852 | def tag(self): 853 | """Get the block tag or None if inaccessible. Must only be called on a block.""" 854 | if self.is_int(): 855 | return OCamlValue.INT_TAG 856 | else: 857 | hd = self.hd() 858 | if hd is None: 859 | return OCamlValue.OUT_OF_HEAP_TAG 860 | return hd & 0xFF 861 | 862 | def _unsafe_field(self,i): 863 | """ 864 | Get the contents of the indicated field or None if inaccessible. 865 | Does not check boundaries nor validate this is a block. 866 | """ 867 | x = try_dereference( (self.v + (i * intnat.sizeof)).cast(intnat.pointer()) ) 868 | if x is None: 869 | return None 870 | return OCamlValue(x, parent=self, parentindex = i) 871 | 872 | def field(self,i): 873 | """ 874 | Get the contents of the indicated field or None if inaccessible. 875 | Must only be called on a block, cannot obtain a double from a double array. 876 | """ 877 | assert self.is_block() 878 | assert self.tag () != OCamlValue.DOUBLE_ARRAY_TAG # FIXME not implemented 879 | n = self.size_words() 880 | if n is None: 881 | return None 882 | if i < 0 or i >= n: 883 | raise IndexError("field %d size %d" % (i,n)) 884 | return self._unsafe_field(i) 885 | #t = intnat.array(n).pointer() 886 | #return OCamlValue(self.v.cast(t).dereference()[i]) 887 | 888 | def fields(self): 889 | """ 890 | Get a list of all fields of this block. 891 | Must only be called on a block, cannot obtain fields of a double array. 892 | When any access goes out of bounds, a single None value is appended to 893 | the list. 894 | """ 895 | assert self.is_block() 896 | assert self.tag () != OCamlValue.DOUBLE_ARRAY_TAG # FIXME not implemented 897 | 898 | words = self.size_words() 899 | if words is None: 900 | return [None] 901 | 902 | a = [] 903 | for i in range(int(words)): 904 | field = self._unsafe_field(i) 905 | a.append(field) 906 | if field is None: 907 | break # Append a single invalid value to indicate out-of-bounds to the user 908 | return a 909 | 910 | def size_words(self): 911 | """ 912 | Return the size of this block in number of words or None if inaccessible 913 | Must only be called on a block. 914 | """ 915 | assert self.is_block() 916 | hd = self.hd() 917 | if hd is None: 918 | return None 919 | return hd >> 10 920 | 921 | def size_bytes(self): 922 | """ 923 | Return the size of this block in number of bytes or None if inaccessible 924 | Must only be called on a block. 925 | """ 926 | size_words = self.size_words() 927 | if size_words is None: 928 | return None 929 | return size_words * intnat.sizeof 930 | 931 | def _is_list(self): 932 | """Indicate if this block describes a list.""" 933 | # TODO 934 | if self.is_int(): 935 | return self.int() == 0 936 | else: 937 | return self.size_words() == 2 and self.tag() == 0 and self.field(1)._is_list() 938 | 939 | def get_list(self): 940 | """Parse an OCaml list into a python list.""" 941 | a = [] 942 | l = self 943 | while l.is_block(): 944 | a.append(l.field(0)) 945 | l = l.field(1) 946 | return a 947 | 948 | def get_list_length(self): 949 | """Get the length of an OCaml list""" 950 | n = 0 951 | l = self 952 | while l.is_block(): 953 | n+=1 954 | l = l.field(1) 955 | return n 956 | 957 | def resolve(self): 958 | """Resolve the block pointer contained in this OCamlValue.""" 959 | return resolve(self.val()) 960 | 961 | def show_opaque(self,s): 962 | print_cont("<%s at 0x%x>" % (s,int(self.val()))) 963 | 964 | def show_seq(self,seq,delim,recurse,raw=False): 965 | for i, x in enumerate(seq): 966 | if i: 967 | print_cont(delim) 968 | if raw: 969 | print_cont(x.resolve()) 970 | else: 971 | x.show(recurse) 972 | 973 | # Verbosity: 974 | # Error information is always shown 975 | # 0: Print type of the OCamlValue 976 | # 1: Print type and value of the OCamlValue, only displays number of items for sequence types 977 | # 2: Print type and value of the OCamlValue, display full contents of not to long, 978 | # otherwise same as 1 979 | # Each of the following functions interprets the OCamlValue per the function name 980 | # and displays the value according to the given verbosity. 981 | # 982 | # The functions get all of their input through function arguments 983 | # to avoid excessive fetching through gdb.Value and duplicating parsing logic. 984 | 985 | def _stringify_int(self, value, verbosity): 986 | if verbosity == 0: 987 | return "Integer" 988 | else: 989 | return "Integer(%d/0x%08X)" % (value, value) 990 | 991 | def _stringify_invalid_block(self, pointer, reason, verbosity): 992 | if verbosity == 0: 993 | return "Invalid block(%s)" % reason 994 | else: 995 | return "Invalid block(%s, v=0x%08X)" % (reason, pointer) 996 | 997 | def _stringify_invalid_size(self, pointer, size, item, verbosity): 998 | if verbosity == 0: 999 | return "Invalid size %s" 1000 | else: 1001 | return "Invalid size %s(v=0x%08X, size=%d)" % (item, self.v, self.size_words()) 1002 | 1003 | def _stringify_generic(self, prefix, fields, verbosity): 1004 | size = len(fields) 1005 | if verbosity == 0: 1006 | return prefix 1007 | elif verbosity >= 2 and size <= 8: 1008 | return "%s [%s]" % (prefix, ', '.join(["0x%08X"%f.v for f in fields])) 1009 | else: 1010 | return "%s (%d items)" % (prefix, size) 1011 | 1012 | def _stringify_value(self, fields, verbosity): 1013 | return self._stringify_generic("Array/Tuple/Record/List entry", fields, verbosity) 1014 | 1015 | def _stringify_lazy_value(self, pointer, verbosity): 1016 | if verbosity == 0: 1017 | return "Lazy value" 1018 | else: 1019 | return "Lazy value (0x%08X)" % pointer 1020 | 1021 | def _stringify_lazy_result(self, fields, verbosity): 1022 | return self._stringify_generic("Lazy result", fields, verbosity) 1023 | 1024 | def _stringify_invalid_object(self, classval, objectid, reason, verbosity): 1025 | if verbosity == 0: 1026 | return "Object with %s" % reason 1027 | else: 1028 | return "Object with %s (cls=0x%08X, oid=0x%08X)" % (reason, classval, objectid) 1029 | 1030 | def _stringify_object(self, classval, objectid, fields, verbosity): 1031 | size = len(fields) 1032 | if verbosity == 0: 1033 | return "Object" 1034 | elif verbosity >= 2 and size <= 8: 1035 | return "Object (cls=0x%08X, oid=%d) [%s]" % (classval, objectid, 1036 | ', '.join(["0x%08X"%f.v for f in self.fields()[2:]])) 1037 | else: 1038 | return "Object (cls=0x%08X, oid=%d) [%d members]" % (classval, objectid, size) 1039 | 1040 | def _stringify_empty_closure(self, fields, verbosity): 1041 | if verbosity == 0: 1042 | return "Empty closure" 1043 | 1044 | size = len(fields) 1045 | if verbosity > 2 or (verbosity == 2 and size <= 8): 1046 | return "Empty closure (%s)" % ', '.join(["0x%08X"%f.v for f in fields]) 1047 | else: 1048 | return "Empty closure (%d items)" % size 1049 | 1050 | def _stringify_closure(self, functions, fields, verbosity): 1051 | prefix = "Infixed closure" if len(functions) > 1 else "Closure" 1052 | 1053 | if verbosity == 0: 1054 | return prefix 1055 | 1056 | funcnames = [] 1057 | for (function, real_function) in functions: 1058 | if real_function != function: 1059 | funcnames.append("%s via %s" % (real_function, function)) 1060 | else: 1061 | funcnames.append(function) 1062 | 1063 | targets = ", ".join(funcnames) 1064 | 1065 | size = len(fields) 1066 | if verbosity > 2 or (verbosity == 2 and size <= 8): 1067 | return "%s to %s(%s)" % (prefix, targets, 1068 | ', '.join(["0x%08X"%f.v for f in fields])) 1069 | else: 1070 | return "%s to %s(%d)" % (prefix, targets, size) 1071 | 1072 | def _stringify_closure_arity_mismatch(self, function, arity, size, reason, verbosity): 1073 | if verbosity == 0: 1074 | return "Closure with %s" % reason 1075 | else: 1076 | return "Closure to %s with %s(arity=0x%08X, size=%d)" % (function, reason, arity, size) 1077 | 1078 | def _stringify_string(self, string, verbosity): 1079 | size = len(string) 1080 | if verbosity == 0: 1081 | return "String" 1082 | elif (verbosity == 2 and size < 64) or verbosity > 2: 1083 | return "String '%s'" % string 1084 | else: 1085 | return "String '%s...%d total'" % (string[:48], size) 1086 | 1087 | def _stringify_double(self, value, verbosity): 1088 | if verbosity == 0: 1089 | return "Double" 1090 | else: 1091 | return "Double: %f" % value 1092 | 1093 | def _stringify_structured_block(self, fields, verbosity): 1094 | size = len(fields) 1095 | if verbosity == 0: 1096 | return "Structured block" 1097 | elif verbosity >= 2 and size < 8: 1098 | return "Structured block [%s]" % ', '.join(["0x%08X"%f.v for f in fields]) 1099 | else: 1100 | return "Structured block [%d total]" % size 1101 | 1102 | @TraceMemoryError 1103 | def try_parse(self, verbosity=0): 1104 | #print("Trying to validate: 0x%X" % self.v) 1105 | if self.v == 0: 1106 | # TODO: within some sections NULL pointers are expected, e.g. global_data 1107 | return False, "NULL pointer", [] 1108 | if self.is_int(): 1109 | return True, self._stringify_int(self.int(), verbosity), [] 1110 | 1111 | # It's a block... 1112 | 1113 | if (self.v & (intnat.sizeof-1)) != 0: 1114 | return False, self._stringify_invalid_block(int(self.v), "Unaligned pointer", verbosity), [] 1115 | 1116 | ptr = self.v.cast(intnat.pointer()) 1117 | hd = self.hd() 1118 | if hd is None: 1119 | return False, self._stringify_invalid_block(int(self.v), "Out-of-bounds header", verbosity), [] 1120 | if try_dereference(ptr) is None: 1121 | return False, self._stringify_invalid_block(int(self.v), "Out-of-bounds pointer", verbosity), [] 1122 | 1123 | if memoryspace.have_accurate_info and not memoryspace.is_valid_data(self.v): 1124 | memrange = memoryspace.get_range(self.v) 1125 | return False, "Value (0x%08X) not in data memory: %s (%s)" % (self.v, str(memrange), self.resolve()), [] 1126 | 1127 | word_size = self.size_words() 1128 | if self._unsafe_field(word_size - 1) is None: 1129 | return False, self._stringify_invalid_size(int(self.v), word_size, "of unknown type", verbosity), [] 1130 | 1131 | # TODO: there's a limit to block sizes allocated on the minor heap 1132 | if verbosity >= 2 and word_size > 1024*1024: 1133 | print("Warning: v=0x%08X is greater than 1MiB: %d"%(self.v, word_size)) 1134 | 1135 | # Pointers and size look acceptable 1136 | 1137 | tag = self.tag() 1138 | # These if/elif conditions are ordered according to expected prevalence for performance 1139 | # TODO: recognize and print lists properly. Now they will be printed as nested structures. 1140 | if tag == OCamlValue.VALUE_TAG: 1141 | fields = self.fields() 1142 | return True, self._stringify_value(fields, verbosity), fields 1143 | 1144 | elif tag == OCamlValue.STRING_TAG: 1145 | byte_size = self.size_bytes() 1146 | padsize_byte = (self.v + byte_size - 1).cast(charp) 1147 | padsize = padsize_byte.dereference() 1148 | if padsize > intnat.sizeof: 1149 | return False, "String with invalid padding byte: %d" % padsize, [] 1150 | 1151 | real_len = byte_size - 1 - padsize 1152 | trailing_nul = (self.v + real_len).cast(charp).dereference() 1153 | if trailing_nul != 0: 1154 | return False, "String without trailing NUL: %d" % trailing_nul, [] 1155 | 1156 | string = self.v.cast(charp).string('latin1', 'ignore', real_len) 1157 | return True, self._stringify_string(string, verbosity), [] 1158 | 1159 | elif tag == OCamlValue.DOUBLE_TAG: 1160 | if self.size_bytes() != doublep.target().sizeof: 1161 | return False, self._stringify_invalid_size(int(self.v), size, "double", verbosity), [] 1162 | 1163 | value = self.v.cast(doublep).dereference() 1164 | return True, self._stringify_double(value, verbosity), [] 1165 | 1166 | elif tag == OCamlValue.OBJECT_TAG: 1167 | if word_size < 2: 1168 | return False, self._stringify_invalid_size(int(self.v), word_size, "of object", verbosity), [] 1169 | 1170 | classval = self.field(0) 1171 | objectid = self.field(1) 1172 | if classval.is_int(): 1173 | return False, self._stringify_invalid_object(classval.val(), objectid.val(), "invalid class id"), [] 1174 | 1175 | if not objectid.is_int(): 1176 | return False, self._stringify_invalid_object(classval.val(), objectid.val(), "invalid object id"), [] 1177 | 1178 | # Beware: objectid was passed raw above, but is passed as integer below 1179 | fields = self.fields()[2:] 1180 | return True, self._stringify_object(classval.val(), objectid.int(), fields, verbosity), fields 1181 | 1182 | elif tag == OCamlValue.CLOSURE_TAG: 1183 | # The simplest closure contains 2 words: 1184 | # function pointer | arity of the function 1185 | # If data from the environment is added, it just comes after the arity. 1186 | # 1187 | # Some closures are special: 1188 | # caml_curryX (partial function application) and caml_tuplifyX (let add (a,b) = ...) 1189 | # These functions are generated by the compiler when needed and closures look like: 1190 | # function pointer of caml_curryX | arity | function pointer of target function 1191 | # function pointer of caml_tuplifyX | -arity | function pointer of target function 1192 | # According to some documentation, there is also caml_apply and other forms of 1193 | # caml_curryX_app that haven't been properly handled here yet. 1194 | # 1195 | # (See also documentation of the tag == OCamlValue.INFIX_TAG below) 1196 | # When a closure contains infix tags, it's pretty hard to distinguish between that 1197 | # and a standard closure containing an integer value that looks like an infix tag. 1198 | # Therefore we use the simple heuristic that if we find something that looks like 1199 | # an infix tag followed by a pointer to a code section, we treat it as an infix tag 1200 | # and continue parsing the closure. 1201 | if word_size < 2: 1202 | return False, self._stringify_invalid_size(int(self.v), word_size, "closure", verbosity), [] 1203 | 1204 | offset = 0 1205 | fields = self.fields() 1206 | functions = [] 1207 | have_more = True 1208 | tempfunc = fields[0].val() 1209 | while have_more: 1210 | function = fields[0].resolve() 1211 | arity = fields[1] 1212 | if not arity.is_int(): 1213 | return False, self._stringify_closure_arity_mismatch(function, int(arity.v), word_size, "non-integer arity", verbosity), [] 1214 | arity = arity.int() 1215 | 1216 | if abs(arity) < 1: 1217 | return False, self._stringify_closure_arity_mismatch(function, arity, word_size, "arity to small", verbosity), [] 1218 | 1219 | if function.startswith("caml_curry"): 1220 | skip_fields = 3 1221 | real_function = fields[2].resolve() 1222 | 1223 | elif function.startswith("caml_tuplify"): 1224 | arity = -arity 1225 | real_function = fields[2].resolve() 1226 | skip_fields = 3 1227 | 1228 | else: 1229 | skip_fields = 2 1230 | real_function = function 1231 | 1232 | functions.append( (function, real_function) ) 1233 | 1234 | if len(fields) >= skip_fields + 2 \ 1235 | and (fields[skip_fields].val() & 0xFF) == OCamlValue.INFIX_TAG \ 1236 | and memoryspace.is_code(fields[skip_fields + 1].val()): 1237 | 1238 | infix_offset = (fields[skip_fields].val() >> 10) 1239 | if infix_offset != offset + skip_fields + 1: 1240 | return False, "Closure with incorrect infix size", [] 1241 | skip_fields += 1 1242 | else: 1243 | have_more = False 1244 | 1245 | offset += skip_fields 1246 | fields = fields[skip_fields:] 1247 | 1248 | if len(functions) == 0: 1249 | return False, self._stringify_empty_closure(fields, verbosity), [] 1250 | return True, self._stringify_closure(functions, fields, verbosity), fields 1251 | 1252 | elif tag == OCamlValue.LAZY_TAG: 1253 | # This is the actual lazy value. When it is evaluated, the tag changes to either whatever 1254 | # fit the result (probably if it fits in this OCamlValue) or into a FORWARD_TAG otherwise 1255 | # with the field pointing to block containing the result. 1256 | if word_size != 1: 1257 | return False, self._stringify_invalid_size(int(self.v), word_size, "lazy value", verbosity), [] 1258 | 1259 | # TODO: fields of the lazy value? 1260 | return True, self._stringify_lazy_value(int(self.v), verbosity), self.fields() 1261 | 1262 | elif tag == OCamlValue.FORWARD_TAG: 1263 | # This is used for forwarding to the OCamlValue containing the result of a lazy evaluation 1264 | if word_size == 0: 1265 | return False, self._stringify_invalid_size(int(self.v), word_size, "lazy result", verbosity), [] 1266 | 1267 | fields = self.fields() 1268 | return True, self._stringify_lazy_result(fields, verbosity), fields 1269 | 1270 | elif tag == OCamlValue.INFIX_TAG: 1271 | # "let rec a x = ... and b y = ..." creates a closure with infixes: 1272 | # closure header | (a) closure data (1 or more words) | infix tag | (b) closure data (1 or more words) 1273 | # OCamlValue for a points to (a) and b points to (b) 1274 | # The size of the infix tag is the offset in words of (b) with respect to (a) 1275 | # For parsing, we just forward to the encapsulating closure 1276 | closure_offset = self.size_words() 1277 | return OCamlValue(self.v - (closure_offset*size_t.sizeof)).try_parse(verbosity) 1278 | 1279 | elif tag == OCamlValue.ABSTRACT_TAG: 1280 | # TODO: validate more? 1281 | return True, "Abstract %d" % word_size, [] 1282 | 1283 | elif tag == OCamlValue.CUSTOM_TAG: 1284 | # Custom values are used for things like Int64, Int32 and more. They haven't had 1285 | # special treatment here yet. See the show() method for some more info. 1286 | # TODO: validate more 1287 | return True, "Custom %d" % word_size, [] 1288 | 1289 | elif tag == OCamlValue.DOUBLE_ARRAY_TAG: 1290 | if (size_bytes % (doublep.target().sizeof)) != 0: 1291 | return False, self._stringify_invalid_size(int(self.v), size, "double array"), [] 1292 | # TODO: print actual values 1293 | return True, "Double array", [] 1294 | 1295 | else: 1296 | fields = self.fields() 1297 | return True, self._stringify_structured_block(fields, verbosity), fields 1298 | 1299 | @TraceMemoryError 1300 | def show(self,recurse): 1301 | if self.v == 0: 1302 | print_cont("NULL") # not a value 1303 | elif self.is_int(): 1304 | print_cont("%d" % self.int()) 1305 | elif self._is_list(): 1306 | print_cont("[") 1307 | if recurse > 0: 1308 | self.show_seq(self.get_list(), ';', recurse-1) 1309 | else: 1310 | print_cont("%d values" % self.get_list_length()) 1311 | print_cont("]") 1312 | else: 1313 | t = self.tag() 1314 | if t == 0: 1315 | print_cont("(") 1316 | if recurse > 0: 1317 | self.show_seq(self.fields(), ',', recurse-1) 1318 | else: 1319 | print_cont("%d fields" % self.size_words()) 1320 | print_cont(")") 1321 | elif t == OCamlValue.LAZY_TAG: 1322 | self.show_opaque("lazy") 1323 | elif t == OCamlValue.CLOSURE_TAG: 1324 | print_cont("Closure(") 1325 | if recurse > 0: 1326 | self.show_seq(self.fields(), ',', recurse-1, raw=True) 1327 | else: 1328 | print_cont("%d fields" % self.size_words()) 1329 | print_cont(")") 1330 | elif t == OCamlValue.OBJECT_TAG: 1331 | # | x when x = Obj.object_tag -> 1332 | # let fields = get_fields [] s in 1333 | # let clasz, id, slots = 1334 | # match fields with 1335 | # | h::h'::t -> h, h', t 1336 | # | _ -> assert false 1337 | # in 1338 | # (* No information on decoding the class (first field). So just print 1339 | # * out the ID and the slots. *) 1340 | # "Object #" ^ dump id ^ " (" ^ String.concat ", " (List.map dump slots) ^ ")" 1341 | # FIXME todo 1342 | self.show_opaque("object") 1343 | elif t == OCamlValue.INFIX_TAG: 1344 | self.show_opaque("infix") 1345 | elif t == OCamlValue.FORWARD_TAG: 1346 | self.show_opaque("forward") 1347 | elif t < OCamlValue.NO_SCAN_TAG: 1348 | print_cont("Tag%d(" % t) 1349 | if recurse > 0: 1350 | self.show_seq(self.fields(), ',', recurse-1) 1351 | else: 1352 | print_cont("%d fields" % self.size_words()) 1353 | print_cont(")") 1354 | elif t == OCamlValue.STRING_TAG: 1355 | print_cont("%s" % self._string()) 1356 | elif t == OCamlValue.DOUBLE_TAG: 1357 | print_cont("%s" % self._float()) 1358 | elif t == OCamlValue.ABSTRACT_TAG: 1359 | self.show_opaque("abstract") 1360 | elif t == OCamlValue.CUSTOM_TAG: 1361 | # FIXME better handle builtin caml custom values : int32, int64, etc 1362 | try: 1363 | sym = self.field(0).resolve() 1364 | except: 1365 | sym = '?' 1366 | try: 1367 | name = self.field(0).val().cast(charp.pointer()).dereference().string() 1368 | except: 1369 | name = '' 1370 | raise 1371 | self.show_opaque("custom " + sym + ' "' + name + '"') 1372 | elif t == OCamlValue.FINAL_TAG: 1373 | self.show_opaque("final") 1374 | elif t == OCamlValue.DOUBLE_ARRAY_TAG: 1375 | print_cont("") 1376 | # return "[|%s|]" % "; ".join([x.dump() for x in self.fields()]) 1377 | else: 1378 | self.show_opaque("unknown hd=0x%X" % int(self.hd())) 1379 | 1380 | # nil = OCamlValue.of_int(0) 1381 | # true = OCamlValue.of_int(1) 1382 | # false = OCamlValue.of_int(0) 1383 | 1384 | class DumpOCamlValue(gdb.Command): 1385 | """Recursively dumps runtime representation of OCaml value 1386 | 1387 | Dump value: ml_dump [/r[N]] 1388 | Dump the array of values: ml_dump [/r[N]] 1389 | Dump the pointer to value: ml_dump [/r[N]] 1 1390 | Dump local (stack) GC roots: ml_dump [/r[N]] local_roots 1391 | 1392 | Optional /r flag controls the recursion depth limit.""" 1393 | 1394 | def __init__(self): 1395 | gdb.Command.__init__(self, "ml_dump", gdb.COMMAND_DATA, gdb.COMPLETE_SYMBOL, False) 1396 | 1397 | def parse_as_addr(self,addr): 1398 | x = gdb.parse_and_eval(addr) 1399 | if x.address == None: 1400 | return x.cast(size_t.pointer()) 1401 | else: # l-value, prevent short read when no debugging info 1402 | return gdb.parse_and_eval("*((size_t*)&"+addr+")").cast(size_t.pointer()) 1403 | 1404 | def show_ptr(self, addr, recurse): 1405 | print_cont("*0x%x:" % int(addr.cast(size_t))) 1406 | OCamlValue(addr.dereference()).show(recurse) 1407 | print("") 1408 | 1409 | # ocaml runtime may be compiled without debug info so we have to be specific with types 1410 | # otherwise gdb may default to 32-bit int even on 64-bit arch and inspection goes loose 1411 | # NB values can be given by name or by address 1412 | @TraceAll 1413 | def invoke(self, arg, from_tty): 1414 | init_types() 1415 | init_memoryspace() 1416 | args = gdb.string_to_argv(arg) 1417 | recurse = 1 1418 | if len(args) > 0 and args[0].startswith("/r"): 1419 | s = args[0][2:] 1420 | if s == "": 1421 | recurse = float('inf') 1422 | else: 1423 | recurse = int(s) 1424 | args = args[1:] 1425 | if len(args) < 1 or len(args) > 2: 1426 | print("Wrong usage, see \"help ml_dump\"") 1427 | return 1428 | if len(args) == 2: 1429 | addr = self.parse_as_addr(args[0]) 1430 | for i in range(int(args[1])): 1431 | self.show_ptr(addr + i, recurse) 1432 | else: 1433 | if args[0] == "local_roots": 1434 | p = gdb.parse_and_eval("*(struct caml__roots_block**)&caml_local_roots") 1435 | while p != 0: 1436 | print("caml_frame 0x%x" % int(p.cast(size_t))) 1437 | for i in range(int(p['nitems'])): 1438 | self.show_ptr(p['tables'][i], recurse) 1439 | p = p['next'] 1440 | else: 1441 | addr = self.parse_as_addr(args[0]) 1442 | OCamlValue(addr).show(recurse) 1443 | print("") 1444 | 1445 | DumpOCamlValue() 1446 | 1447 | class ShowOCamlHeap(gdb.Command): 1448 | """Show some facts about OCaml heap: ml_heap [units] 1449 | 1450 | Specify "w" or "words" for `units` to use OCaml words rather than bytes""" 1451 | 1452 | def __init__(self): 1453 | gdb.Command.__init__(self, "ml_heap", gdb.COMMAND_NONE, gdb.COMPLETE_NONE, False) 1454 | 1455 | def e(self,x,t): 1456 | return gdb.parse_and_eval("*(("+str(t)+"*)&"+x+")") 1457 | 1458 | def malloced_size(self,x): 1459 | # see caml_aligned_malloc, FIXME Page_size = 4K assumption 1460 | return x + 4*size_t.sizeof + 4*1024 1461 | 1462 | @TraceAll 1463 | def invoke(self, arg, from_tty): 1464 | init_types() 1465 | init_memoryspace() 1466 | args = gdb.string_to_argv(arg) 1467 | units = "bytes" 1468 | unit = 1 1469 | if len(args) > 0 and (args[0] == "words" or args[0] == "w"): 1470 | unit = size_t.sizeof 1471 | units = "words" 1472 | 1473 | print(" major heap size = %d %s" % (self.e("caml_stat_heap_size","intnat") / unit, units)) 1474 | print(" major heap top size = %d %s" % (self.e("caml_stat_top_heap_size","intnat") / unit, units)) 1475 | print(" total heap chunks =", self.e("caml_stat_heap_chunks","intnat")) 1476 | print(" gray values = %d %s" % (self.e("gray_vals_size","size_t") * size_t.sizeof / unit, units)) 1477 | print("extra heap resources =", self.e("caml_extra_heap_resources","double")) 1478 | print() 1479 | print("minor heap :") 1480 | minor_size = self.e("caml_minor_heap_size","size_t") 1481 | minor_base = self.e("caml_young_base","size_t") 1482 | y_start = self.e("caml_young_start","size_t") 1483 | y_ptr = self.e("caml_young_ptr","size_t") 1484 | y_end = self.e("caml_young_end","size_t") 1485 | print("0x%x .. 0x%x - 0x%x (total %d used %d %s) malloc: 0x%x - 0x%x" % \ 1486 | (y_start, y_ptr, y_end, minor_size/unit, (y_end - y_ptr)/unit, units, minor_base, minor_base + self.malloced_size(minor_size))) 1487 | print() 1488 | print("major heap :") 1489 | # the following casting is needed, otherwise gdb may sign-extend values without debug info 1490 | v = self.e("caml_heap_start","size_t") 1491 | i = 0 1492 | while v != 0: 1493 | p = gdb.Value(v - 4 * size_t.sizeof) 1494 | # typedef struct { 1495 | # void *block; /* address of the malloced block this chunk live in */ 1496 | # asize_t alloc; /* in bytes, used for compaction */ 1497 | # asize_t size; /* in bytes */ 1498 | # char *next; 1499 | # } heap_chunk_head; 1500 | p = p.cast(size_t.pointer()) 1501 | block = p.dereference() 1502 | size = (p + 2).dereference() 1503 | print("%2d) chunk 0x%x - 0x%x (%d %s) malloc: 0x%x - 0x%x" % (i, v, v+size, size/unit, units, block, block+self.malloced_size(size))) 1504 | i = i + 1 1505 | v = (p + 3).dereference() 1506 | 1507 | ShowOCamlHeap() 1508 | 1509 | class ScanOCamlValue(gdb.Command): 1510 | """Scan region of OCaml heap and show all values in it 1511 | 1512 | ml_scan [/r[N]] addr_start [bytes] 1513 | 1514 | Optional /r flag controls the recursion depth limit.""" 1515 | 1516 | def __init__(self): 1517 | gdb.Command.__init__(self, "ml_scan", gdb.COMMAND_DATA, gdb.COMPLETE_SYMBOL, False) 1518 | 1519 | def parse_as_addr(self,addr): 1520 | x = gdb.parse_and_eval(addr) 1521 | if x.address == None: 1522 | return x.cast(size_t.pointer()) 1523 | else: # l-value, prevent short read when no debugging info 1524 | return gdb.parse_and_eval("*((size_t*)&"+addr+")").cast(size_t.pointer()) 1525 | 1526 | def show_val(self, addr, recurse): 1527 | print_cont("0x%x = " % int(addr.cast(size_t))) 1528 | OCamlValue(addr).show(recurse) 1529 | print("") 1530 | 1531 | @TraceAll 1532 | def invoke(self, arg, from_tty): 1533 | init_types() 1534 | init_memoryspace() 1535 | args = gdb.string_to_argv(arg) 1536 | recurse = 1 1537 | if len(args) > 0 and args[0].startswith("/r"): 1538 | s = args[0][2:] 1539 | if s == "": 1540 | recurse = float('inf') 1541 | else: 1542 | recurse = int(s) 1543 | args = args[1:] 1544 | if len(args) < 1 or len(args) > 2: 1545 | print("Wrong usage, see \"help ml_scan\"") 1546 | return 1547 | addr = self.parse_as_addr(args[0]) 1548 | if len(args) == 2: 1549 | addr_end = addr + int(args[1]) / size_t.sizeof 1550 | else: 1551 | addr_end = addr + 64 1552 | while addr < addr_end: 1553 | self.show_val(addr,recurse) 1554 | x = OCamlValue(addr) 1555 | addr = addr + x.size_words() + 1 1556 | 1557 | ScanOCamlValue() 1558 | 1559 | # The functions below mimic the behavior of the OCaml run-time when performing GC. 1560 | # In C programs, memory must be handled explicitly by the developer. 1561 | # In C++ programs, tools like shared_pointer, unique_pointer, ... make memory management 1562 | # easier, yet still explicit. Memory is lost when no longer referenced. 1563 | # In languages with GC, memory management is not explicit, but handled by the run-time. 1564 | # Referenced memory is discoverable through "roots", leaving the GC with the knowledge 1565 | # that all other allocated memory is unreferenced and can be reclaimed/reused. 1566 | # In OCaml there are multiple roots, each of which is described near the function 1567 | # that discovers it below. 1568 | 1569 | def get_value(name, type): 1570 | return gdb.parse_and_eval("""*( (%s*)(&%s) )""" % (type, name)) 1571 | 1572 | # Global roots are global values from C-code that are registered into the GC. 1573 | def get_global_roots(roots_list_name): 1574 | """ 1575 | Traverse the linked list of the provided global roots list and return a list of root addresses. 1576 | """ 1577 | roots_list = get_value_safe(roots_list_name) 1578 | ret = [] 1579 | if roots_list is None or roots_list == 0: 1580 | return ret 1581 | 1582 | global_root = roots_list['forward'].dereference() 1583 | while global_root != 0: 1584 | root = global_root.dereference()['root'].dereference() 1585 | ret.append((root, root+1, roots_list_name)) 1586 | global_root = global_root.dereference()['forward'].dereference() 1587 | return ret 1588 | 1589 | # Local roots are roots in the stack of C-code. 1590 | # They are automatically generated and registered from the CAMLxparam, CAMLlocal, ... macro's 1591 | # and linked together as a linked list. 1592 | def get_local_roots(roots, name): 1593 | ret = [] 1594 | while roots != 0: 1595 | root_struct = roots.dereference() 1596 | for i in range(int(root_struct['ntables'])): 1597 | for j in range(int(root_struct['nitems'])): 1598 | value = root_struct['tables'][i][j] 1599 | ret.append((value, value + size_t.sizeof, name)) 1600 | roots = root_struct['next'] 1601 | return ret 1602 | 1603 | # See byterun/finalise.c:caml_final_do_strong_roots 1604 | # Finalisers can be registered with the GC. They are functions 1605 | # that are called when the GC determines that the value is no 1606 | # longer used and the GC is about to reclaim the memory. 1607 | # These finalisers therefore contain both a closure to a function 1608 | # and an OCamlValue to the relevant block. 1609 | def get_final_roots(): 1610 | ret = [] 1611 | 1612 | young = get_value_safe("'finalise.d.c'::young", size_t) # avoid ambiguity 1613 | if young is None: 1614 | young = get_value_safe("'finalise.c'::young", size_t) 1615 | if young is None: 1616 | print("Didn't find 'finalise.c::young'. Young finaliser information is missing.") 1617 | return ret 1618 | 1619 | for i in range(int(young)): 1620 | final_struct = get_value_safe("final_table[%d]" % i) 1621 | if final_struct is None: 1622 | break 1623 | func = final_struct['fun'] 1624 | val = final_struct['val'] 1625 | ret.append((func, func + size_t.sizeof, "final_table")) 1626 | ret.append((val, val + size_t.sizeof, "final_table")) 1627 | 1628 | # TODO 1629 | to_do_ptr = get_value_safe("to_do_hd") 1630 | while to_do_ptr is not None and to_do_ptr.cast(size_t) != 0: 1631 | to_do_struct = to_do_ptr.dereference() 1632 | size = int(to_do_struct["size"].cast(size_t)) 1633 | items = do_to_struct["items"] 1634 | for i in range(size): 1635 | item_struct = (items + i).dereference() 1636 | func = item_struct['fun'] 1637 | val = item_struct['val'] 1638 | ret.append((func, func + size_t.sizeof, "final: to_do")) 1639 | ret.append((val, val + size_t.sizeof, "final: to_do")) 1640 | 1641 | return ret 1642 | 1643 | # Dynamic global roots are global variables from dynamically linked libraries. 1644 | # They are added to a linked list of roots. 1645 | def get_dyn_globals(): 1646 | ret = [] 1647 | dyn_globals = get_value_safe("caml_dyn_globals") 1648 | while dyn_globals is not None and dyn_globals != 0: 1649 | dyn_globals_struct = dyn_globals.dereference() 1650 | v = dyn_globals_struct['data'].cast(size_t) 1651 | ret.append((v, v + size_t.sizeof, "dyn_globals")) 1652 | dyn_globals = dyn_globals_struct['next'] 1653 | return ret 1654 | 1655 | # See walk_ocaml_stack() documentation. 1656 | # This function will walk the stack of the active thread first, 1657 | # followed by any other systhreads threads. All threads' information 1658 | # is kept in a linked list of thread information blocks. 1659 | # For the active thread, there are some global variables for quick 1660 | # access. Whenever acquiring or releasing the OCaml run-time lock, 1661 | # calling the GC or a C-function these global variables are updated. 1662 | # When releasing or acquiring the global run-time lock, these global 1663 | # variables are sync'd with the values from the linked list. 1664 | def walk_ocaml_stacks(): 1665 | ret = [] 1666 | 1667 | # scanning for currently active thread 1668 | sp = get_value_safe("caml_bottom_of_stack") 1669 | retaddr = get_value_safe("caml_last_return_address") 1670 | gc_regs = get_value_safe("caml_gc_regs") 1671 | roots = get_value_safe("caml_local_roots") 1672 | 1673 | if sp is not None and retaddr is not None and gc_regs is not None: 1674 | ret.extend(walk_ocaml_stack(sp, retaddr, gc_regs)) 1675 | if roots is not None: 1676 | ret.extend(get_local_roots(roots, "caml_local_roots")) 1677 | 1678 | # scanning for inactive threads 1679 | # otherlibs/systhreads/st_stubs.c:caml_thread_scan_roots() 1680 | active_thread = thread = get_value_safe("curr_thread") 1681 | if active_thread is None: 1682 | return 1683 | 1684 | while caml_thread_structp is not None: 1685 | thread_struct = thread.dereference() 1686 | 1687 | memrange = memoryspace.get_range(thread_struct["bottom_of_stack"]) 1688 | description = "Error: unknown thread" if memrange is None else memrange.description 1689 | 1690 | descriptor = thread_struct["descr"] 1691 | ret.append( (descriptor, descriptor + size_t.sizeof, "%s descr" % description) ) 1692 | backtrace_last_exn = thread_struct["caml_backtrace_last_exn"] # there's probably some macro at play here... 1693 | ret.append( (backtrace_last_exn, backtrace_last_exn + size_t.sizeof, "%s backtrace_last_exn" % description) ) 1694 | 1695 | if thread.cast(size_t) != active_thread.cast(size_t): 1696 | ret.extend(walk_ocaml_stack(thread_struct["bottom_of_stack"], thread_struct["last_retaddr"], thread_struct["gc_regs"], description)) 1697 | ret.extend(get_local_roots(thread_struct["caml_local_roots"], "%s local roots" % description)) 1698 | 1699 | thread = thread_struct["next"] 1700 | if thread.cast(size_t) == active_thread.cast(size_t): 1701 | break 1702 | 1703 | return ret 1704 | 1705 | # This is a freeform translation of asmrun/roots.c:do_local_roots() 1706 | # We basically walk the stack in search of OCamlValues. 1707 | # C-code should never call the garbage collector directly. Instead 1708 | # Ocaml dynamically inserts calls to the GC wherever memory is allocated. 1709 | # The GC is then called when the minor heap runs out of space to satisfy 1710 | # the current request. Because the compiler is the only one to insert 1711 | # calls to the GC, it knows the state of the stack and CPU registers 1712 | # wrt them contains GC roots. 1713 | # It emits this information in a structure called the caml_frametable. 1714 | # This structure is walked at run-time and a hash-table is created where 1715 | # the key is the return address of the caml_call_gc() function. 1716 | # When traversing the stack, the run-time uses this information to 1717 | # locate the roots on the stack or inside registers. 1718 | def walk_ocaml_stack(sp, retaddr, gc_regs, description="stack"): 1719 | if caml_contextp is None: 1720 | return [] 1721 | fd_mask = get_value_safe("caml_frame_descriptors_mask") 1722 | if fd_mask is None: 1723 | return [] 1724 | if get_value_safe("caml_frame_descriptors") is None: 1725 | return [] 1726 | 1727 | def hash_retaddr(addr): 1728 | return (addr.cast(size_t) >> 3) & fd_mask 1729 | 1730 | ret = [] 1731 | if sp == 0: 1732 | return ret 1733 | 1734 | reg_names = { 0: "rax", 1: "rbx", 2: "rdi", 3: "rsi", 4: "rdx", 5: "rcx", 6: "r8", 7: "r9", 1735 | 8: "r12", 9: "r13", 10: "r10", 11: "r11", 12: "rbp" } 1736 | frame = resolve(retaddr) 1737 | 1738 | while True: 1739 | h = hash_retaddr(retaddr) 1740 | while True: 1741 | d = get_value_safe("caml_frame_descriptors[%d]"%h) 1742 | d_struct = d.dereference() 1743 | if d_struct["retaddr"].cast(size_t) == retaddr.cast(size_t): 1744 | break 1745 | h = (h+1) & fd_mask 1746 | 1747 | if d_struct["frame_size"] != 0xFFFF: 1748 | for n in range(int(d_struct["num_live"])): 1749 | ofs = get_value_safe("caml_frame_descriptors[%d]->live_ofs[%d]" % (h, n), size_t) 1750 | if ofs & 1: 1751 | location = "[%s]" % reg_names.get(int(ofs>>1), "unknown_reg") 1752 | root = (gc_regs.cast(size_t) + ((ofs >> 1) * size_t.sizeof)).cast(size_t.pointer()).dereference() 1753 | else: 1754 | location = "[sp+0x%X]" % ofs 1755 | root = sp.cast(size_t) + ofs 1756 | root = root.cast(size_t.pointer()) 1757 | root = root.dereference().cast(size_t) 1758 | ret.append((root, root+size_t.sizeof, "%s(%s%s)" % (description, frame, location))) 1759 | 1760 | sp = (sp.cast(size_t) + (d_struct["frame_size"] & 0xFFFC)).cast(charp) 1761 | retaddr = (sp.cast(size_t) - size_t.sizeof).cast(size_t.pointer()).dereference() 1762 | else: 1763 | next_context_p = (sp.cast(size_t) + (2 * size_t.sizeof)).cast(caml_contextp) 1764 | next_context_struct = next_context_p.dereference() 1765 | sp = next_context_struct["bottom_of_stack"] 1766 | retaddr = next_context_struct["last_retaddr"] 1767 | regs = next_context_struct["gc_regs"] 1768 | if sp == 0: 1769 | break 1770 | return ret 1771 | 1772 | # Compile-time global values. 1773 | # This is an array of OcamlValues to OCaml blocks. There is 1 pointer per module linked in. 1774 | # The last value is a NULL sentinel. caml_globals is part of the .data section. 1775 | def get_globals(): 1776 | ret = [] 1777 | global_data_ptr = get_value_safe("caml_globals", size_t.pointer()) 1778 | if global_data_ptr is None: 1779 | return ret 1780 | 1781 | global_data = global_data_ptr.dereference() 1782 | 1783 | index = 0 1784 | while global_data != 0: 1785 | ret.append((global_data, global_data + size_t.sizeof, "global_data[%d]" % index)) 1786 | global_data_ptr = (global_data_ptr.cast(size_t) + size_t.sizeof).cast(size_t.pointer()) 1787 | global_data = global_data_ptr.dereference() 1788 | index += 1 1789 | 1790 | return ret 1791 | 1792 | # OCaml contains a method to dynamically add detection of more roots at run-time 1793 | # through the use of a callback function. Each user must store the previous pointer 1794 | # value, and replace the hook with its own function. Therefore, for each possible 1795 | # hook/symbol, we need to know what symbol is used to store the previous value. 1796 | # The actual discovery of the roots, however is already done in one of the above 1797 | # functions, this is merely a check that we didn't miss any roots we didn't know about. 1798 | def traverse_scan_roots_hook(): 1799 | known_hooks = { 1800 | "caml_thread_scan_roots": "prev_scan_roots_hook", # handled in walk_ocaml_stacks() 1801 | # more here, make sure to describe where it's handled 1802 | } 1803 | 1804 | scan_roots_hook_ptr = get_value_safe("caml_scan_roots_hook") 1805 | while scan_roots_hook_ptr is not None and scan_roots_hook_ptr != 0: 1806 | scan_roots_hook = resolve(scan_roots_hook_ptr) 1807 | next_hook = known_hooks.get(scan_roots_hook, None) 1808 | if next_hook is None: 1809 | print("Unhandled root scanning function: %s" % scan_roots_hook) 1810 | return 1811 | scan_roots_hook_ptr = get_value_safe(next_hook) 1812 | 1813 | # See asmrun/roots.c:caml_do_roots for guidance 1814 | # This function walks over all data structures known to contain roots. 1815 | # Following this, you can walk over all OCaml values the GC knows. 1816 | # This is similar to the way the GC performs the "mark" phase. 1817 | # Another function could be written that walks over the heap in a 1818 | # similar fashion to the "sweep" phase. 1819 | # TODO: caml_globals can contain many NULL values yielding warnings/errors 1820 | def get_entry_points(): 1821 | """ 1822 | Returns a list of entry-points (aka roots) from where all live values 1823 | can be discovered. 1824 | The list contains tuples (start address, post-end address, source) 1825 | with source a string identifying where the addresses were discovered. 1826 | """ 1827 | ret = [] 1828 | 1829 | # global roots 1830 | ret.extend(get_globals()) 1831 | # dynamic global roots 1832 | ret.extend(get_dyn_globals()) 1833 | # stacks and local roots 1834 | ret.extend(walk_ocaml_stacks()) 1835 | 1836 | # global C roots 1837 | ret.extend(get_global_roots("caml_global_roots")) 1838 | ret.extend(get_global_roots("caml_global_roots_young")) 1839 | ret.extend(get_global_roots("caml_global_roots_old")) 1840 | 1841 | # finalised values 1842 | ret.extend(get_final_roots()) 1843 | 1844 | # scan_roots_hook 1845 | traverse_scan_roots_hook() 1846 | return ret 1847 | 1848 | class ValidateHeap(gdb.Command): 1849 | """Validates the OCaml heap 1850 | ml_validate 1851 | """ 1852 | def __init__(self): 1853 | gdb.Command.__init__(self, "ml_validate", gdb.COMMAND_DATA, gdb.COMPLETE_SYMBOL, False) 1854 | 1855 | def parse_as_addr(self,addr): 1856 | x = gdb.parse_and_eval(addr) 1857 | if x.address == None: 1858 | return x.cast(size_t.pointer()) 1859 | else: # l-value, prevent short read when no debugging info 1860 | return gdb.parse_and_eval("*((size_t*)&"+addr+")").cast(size_t.pointer()) 1861 | 1862 | def _scan_range(self, todo, seen): 1863 | values = 0 1864 | bytes = 0 1865 | skipped = 0 1866 | 1867 | try: 1868 | while len(todo): 1869 | value = todo.pop() 1870 | 1871 | if value in seen: 1872 | skipped += 1 1873 | continue 1874 | seen.add(int(value.val())) 1875 | 1876 | 1877 | valid, what, children = value.try_parse(3) 1878 | if not valid: 1879 | print("Invalid value at %s: %s" % (str(value), what)) 1880 | p = value.parent 1881 | pindex = value.parentindex 1882 | while p is not None: 1883 | if isinstance(p, OCamlValue): 1884 | _, p_what, _ = p.try_parse(3) 1885 | print("from: 0x%08X[%d] - %s" % (p.val(), pindex, p_what)) 1886 | pindex = p.parentindex 1887 | p = p.parent 1888 | else: 1889 | print("from: %s[%d]" % (str(p), pindex)) 1890 | p = None 1891 | pindex = None 1892 | elif len(children): 1893 | todo.extend([child for child in children if int(child.val()) not in seen]) 1894 | 1895 | if valid: 1896 | values += 1 1897 | bytes += 0 if value.is_int() else value.size_bytes() 1898 | except: 1899 | traceback.print_exc() 1900 | raise 1901 | 1902 | return (values, skipped, bytes) 1903 | 1904 | @TraceAll 1905 | def invoke(self, arg, from_tty): 1906 | init_types() 1907 | init_memoryspace() 1908 | args = gdb.string_to_argv(arg) 1909 | 1910 | if len(args) > 0: 1911 | print("Wrong usage, see \"help ml_validate\"") 1912 | return 1913 | 1914 | values = skipped = bytes = 0 1915 | 1916 | try: 1917 | todo = collections.deque() 1918 | seen = set() 1919 | 1920 | for (begin, end, source) in get_entry_points(): 1921 | #print("Scanning %s - %d values" % (source, (end - begin)/size_t.sizeof)) 1922 | 1923 | address = begin 1924 | while address < end: 1925 | todo.append(OCamlValue(address, parent=source, parentindex=(address - begin)/size_t.sizeof)) 1926 | address = address + size_t.sizeof 1927 | 1928 | cur_values, cur_skipped, cur_bytes = self._scan_range(todo, seen) 1929 | #print("Scanned %d values, skipped %d values, total of %5dB" % (cur_values, cur_skipped, cur_bytes)) 1930 | values += cur_values 1931 | skipped += cur_skipped 1932 | bytes += cur_bytes 1933 | 1934 | except: 1935 | traceback.print_exc() 1936 | raise 1937 | 1938 | print("Totals: scanned %d values, skipped %d values, total of %5dB" % (values, skipped, bytes)) 1939 | 1940 | ValidateHeap() 1941 | 1942 | class ShowValue(gdb.Command): 1943 | """ 1944 | Display an OCaml value (recursively) 1945 | ml_show [/r] [verbosity] 1946 | """ 1947 | def __init__(self): 1948 | gdb.Command.__init__(self, "ml_show", gdb.COMMAND_DATA, gdb.COMPLETE_SYMBOL, False) 1949 | 1950 | def parse_as_addr(self,addr): 1951 | x = gdb.parse_and_eval(addr) 1952 | if x.address == None: 1953 | return x.cast(size_t.pointer()) 1954 | else: # l-value, prevent short read when no debugging info 1955 | return gdb.parse_and_eval("*((size_t*)&"+addr+")").cast(size_t.pointer()) 1956 | 1957 | def show(self, value, depth, recurse, verbosity): 1958 | valid, what, children = value.try_parse(verbosity) 1959 | what = what.replace('\0', "\\0") 1960 | prefix = " " * depth 1961 | print("%s%s %s" % (prefix, what, '= [' if len(children) and recurse else '')) 1962 | if recurse: 1963 | for child in children: 1964 | self.show(child, depth + 1, recurse - 1, verbosity) 1965 | 1966 | if len(children) and recurse: 1967 | print("%s%s" % (prefix, ']')) 1968 | 1969 | @TraceAll 1970 | def invoke(self, arg, from_tty): 1971 | init_types() 1972 | init_memoryspace() 1973 | args = gdb.string_to_argv(arg) 1974 | recurse = 1 1975 | verbosity = 1 1976 | if len(args) < 1 or len(args) > 3: 1977 | print("Wrong usage, see \"help ml_show\"") 1978 | return 1979 | 1980 | if len(args) == 1: 1981 | addr = self.parse_as_addr(args[0]) 1982 | elif len(args) == 3: 1983 | if not args[0].startswith("/r"): 1984 | print("recursivity must be specified as /rN") 1985 | return 1986 | recurse = int(args[0][2:]) 1987 | addr = self.parse_as_addr(args[1]) 1988 | verbosity = int(args[2]) 1989 | else: 1990 | if args[0].startswith('/r'): 1991 | recurse = int(args[0][2:]) 1992 | addr = self.parse_as_addr(args[1]) 1993 | else: 1994 | addr = self.parse_as_addr(args[0]) 1995 | verbosity = int(args[1]) 1996 | 1997 | value = OCamlValue(addr) 1998 | self.show(value, 0, recurse, verbosity) 1999 | 2000 | ShowValue() 2001 | 2002 | class ShowMemory(gdb.Command): 2003 | """ 2004 | Shows memory space and its disposition 2005 | ml_target [verbosity] 2006 | """ 2007 | 2008 | def __init__(self): 2009 | gdb.Command.__init__(self, "ml_target", gdb.COMMAND_DATA, gdb.COMPLETE_SYMBOL, False) 2010 | 2011 | def parse_as_addr(self,addr): 2012 | x = gdb.parse_and_eval(addr) 2013 | if x.address == None: 2014 | return x.cast(size_t.pointer()) 2015 | else: # l-value, prevent short read when no debugging info 2016 | return gdb.parse_and_eval("*((size_t*)&"+addr+")").cast(size_t.pointer()) 2017 | 2018 | @TraceAll 2019 | def invoke(self, arg, from_tty): 2020 | init_types() 2021 | init_memoryspace() 2022 | args = gdb.string_to_argv(arg) 2023 | 2024 | if len(args) == 2 and args[0] == 'all': 2025 | verbosity = int(args[1]) 2026 | address = None 2027 | elif len(args) == 1: 2028 | if args[0] == 'all': 2029 | address = None 2030 | verbosity = 0 2031 | else: 2032 | address = self.parse_as_addr(args[0]) 2033 | else: 2034 | print("Wrong usage, see \"help ml_target\"") 2035 | return 2036 | 2037 | try: 2038 | if address is None: 2039 | memoryspace.display(verbosity) 2040 | else: 2041 | memrange = memoryspace.get_range(address) 2042 | if memrange is None: 2043 | print("Address 0x%08X is invalid" % address) 2044 | else: 2045 | print("Address 0x%08X is part of: %s" % (address, str(memrange))) 2046 | except: 2047 | traceback.print_exc() 2048 | 2049 | 2050 | ShowMemory() 2051 | 2052 | class FindPointersTo(gdb.Command): 2053 | """Finds memory locations that point to a specified value: 2054 | ml_find 2055 | """ 2056 | def __init__(self): 2057 | gdb.Command.__init__(self, "ml_find", gdb.COMMAND_DATA, gdb.COMPLETE_SYMBOL, False) 2058 | 2059 | @TraceAll 2060 | def invoke(self, arg, from_tty): 2061 | init_types() 2062 | init_memoryspace() 2063 | args = gdb.string_to_argv(arg) 2064 | 2065 | if len(args) != 1: 2066 | print("Wrong usage, see \"help ml_find\"") 2067 | return 2068 | 2069 | value = int(args[0]) 2070 | pattern = struct.pack("L", value) 2071 | locations = memoryspace.search_memory_of_types(pattern, *MemoryType.all()) 2072 | for location in locations: 2073 | memrange = memoryspace.get_range(location) 2074 | print("Found at 0x%08X in %s" % (location, memrange.description)) 2075 | 2076 | FindPointersTo() 2077 | -------------------------------------------------------------------------------- /next.ml: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env ocamlscript 2 | 3 | let () = 4 | match Sys.argv with 5 | | [|_;k|] -> 6 | let rec loop acc = 7 | match input_line stdin with 8 | | exception End_of_file -> List.rev acc 9 | | s -> loop (s::acc) 10 | in 11 | begin match loop [] with 12 | | [] -> exit 1 13 | | first::_ as l -> 14 | let rec next = function 15 | | x::xs when x = k -> (match xs with [] -> first | y::_ -> y) 16 | | _::xs -> (* Printf.eprintf "skipping %S <> %S" y k; *) next xs 17 | | [] -> first 18 | in 19 | print_endline @@ next l 20 | end 21 | | _ -> 22 | prerr_endline "next.ml.exe "; 23 | prerr_endline "Outputs the next line from stdin after the , first one if is the last." 24 | -------------------------------------------------------------------------------- /oasis-query.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | # OASIS query helper 4 | # https://github.com/ygrek/scraps/blob/master/oasis-query.sh 5 | 6 | set -eu 7 | 8 | export LC_ALL=C 9 | 10 | code_sections() { 11 | oasis query ListSections | egrep '^(Library|Executable|Object)' 12 | } 13 | 14 | # query oasis for all BuildDepends and exclude internal Library names 15 | show_deps() { 16 | join -v 2 <(oasis query ListSections | grep Library | sed 's/Library(\(.*\))/\1/' | sort -u) <(oasis query $(code_sections | sed s/$/.BuildDepends/ ) | tr ',' '\n' | awk '($1!=""){print $1}' | sort -u) 17 | } 18 | 19 | show_source_dirs() { 20 | oasis query $(code_sections | sed s/$/.Path/ ) | sort -u 21 | } 22 | 23 | show_build_dirs() { 24 | show_source_dirs | sed 's@^@_build/@' 25 | } 26 | 27 | show_library_path() { 28 | echo $(show_build_dirs) | tr ' ' ':' 29 | } 30 | 31 | env_library_path() { 32 | printf "%s:$CAML_LD_LIBRARY_PATH" "$(show_library_path)" 33 | } 34 | 35 | show_include_dirs() { 36 | ocamlfind query -r -i-format $(show_deps) 37 | show_build_dirs | sed 's/^/-I /' 38 | } 39 | 40 | generate_merlin() { 41 | show_source_dirs | sed 's/^/S /' 42 | show_build_dirs | sed 's/^/B /' 43 | show_deps | sed 's/^/PKG /' 44 | } 45 | 46 | case "${1:-}" in 47 | "deps") show_deps ;; 48 | "build-dirs") show_build_dirs ;; 49 | "source-dirs") show_source_dirs ;; 50 | "library-path") show_library_path ;; 51 | "env-library-path") env_library_path ;; 52 | "include-dirs") show_include_dirs ;; 53 | "merlin") generate_merlin ;; 54 | *) 55 | echo "whoa?" >&2 56 | echo "Supported commands : deps build-dirs source-dirs include-dirs merlin library-path env-library-path" >&2 57 | exit 1 58 | esac 59 | -------------------------------------------------------------------------------- /opam-url: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eu 4 | 5 | if [ $# -eq 0 ]; then 6 | echo 'usage: opam-url [user]' 7 | exit 2 8 | fi 9 | 10 | ext=tar.gz 11 | name=$1 12 | version=$2 13 | org=${3:-ygrek} 14 | if [ "$name" = "ocaml-extlib" ]; then 15 | fname=extlib 16 | else 17 | fname=$name 18 | fi 19 | 20 | if [ "$name" = "ocaml-extunix" ]; then 21 | gname=extunix 22 | else 23 | gname=$name 24 | fi 25 | 26 | if [ "$name" = "mldonkey" ]; then 27 | tag=release-$(echo $version | tr '.' '-') 28 | else 29 | tag=$version 30 | fi 31 | 32 | if [ "$name" = "devkit" ]; then 33 | org=ahrefs 34 | ext=tbz 35 | fi 36 | 37 | if [ "$name" = "ocaml-murmur3" ]; then 38 | org=ahrefs 39 | fi 40 | 41 | if [ "$name" = "ocaml-mariadb" ]; then 42 | org=andrenth 43 | fi 44 | 45 | release=~/p/release/$name/$fname-$version.$ext 46 | if [ ! -f "$release" ]; then 47 | ext=tar.bz2 48 | release=~/p/release/$name/$fname-$version.$ext 49 | if [ ! -f "$release" ]; then 50 | echo "E: $release doesn't exist" 51 | exit 3 52 | fi 53 | fi 54 | src="https://ygrek.org/p/release/$name/$fname-$version.$ext" 55 | mirror1="https://github.com/$org/$gname/releases/download/v$tag/$fname-$version.$ext" 56 | mirror2="https://github.com/$org/$gname/releases/download/$tag/$fname-$version.$ext" 57 | 58 | function cuts0 { 59 | awk '{print $1}' 60 | } 61 | 62 | if [ "$(curl -s -L "$src" | sha256sum - | cuts0)" != "$(sha256sum "$release" | cuts0)" ]; then 63 | echo "W: $src bad" 64 | unset src 65 | fi 66 | mirror=$mirror1 67 | if [ "$(curl -s -L "$mirror" | sha256sum - | cuts0)" != "$(sha256sum "$release" | cuts0)" ]; then 68 | mirror=$mirror2 69 | [ "$(curl -s -L "$mirror" | sha256sum - | cuts0)" == "$(sha256sum "$release" | cuts0)" ] || (echo "E:$mirror bad"; exit 2) 70 | fi 71 | 72 | echo "url {" 73 | if [ -v src ]; then 74 | echo " src: \"$src\"" 75 | else 76 | echo " src: \"$mirror\"" 77 | fi 78 | cat < report_argv 19 | ``` 20 | 21 | Get the global view and final balance: 22 | 23 | ``` 24 | $ ./example.ml 25 | [New Year] cake : 58.00 (3 pax), Alice paid 66.00 tipping 13% 26 | [ Jan 2] cinema : 40.00 (2 pax), Bob paid 40.00 27 | [ Jan 10] pizza : 50.00 (3 pax), Mallory paid 55.00 tipping 10% 28 | 29 | Bob -15.75 30 | Alice -3.50 31 | Mallory 19.25 32 | ``` 33 | 34 | Report for one participant: 35 | 36 | ``` 37 | $ ./example.ml report Bob 38 | [New Year] -22.00 = -22.00 : cake : 58.00 (3 pax), Alice paid 66.00 tipping 13% 39 | [ Jan 2] +20.00 = -2.00 : cinema : 40.00 (2 pax), Bob paid 40.00 40 | [ Jan 10] -13.75 = -15.75 : pizza : 50.00 (3 pax), Mallory paid 55.00 tipping 10% 41 | ``` 42 | *) 43 | 44 | open Printf 45 | 46 | let fail fmt = ksprintf failwith fmt 47 | 48 | let money x = (if x < 0 then "-" else "") ^ sprintf "%d.%02d" (abs x / 100) (abs x mod 100) 49 | let delta x = (if x > 0 then "+" else "") ^ money x 50 | let pr fmt = ksprintf print_endline fmt 51 | 52 | let sum = List.fold_left (fun acc (_,x) -> acc + x) 0 53 | 54 | let compute ?track l = 55 | let h = Hashtbl.create 10 in 56 | let bal who = try Hashtbl.find h who with Not_found -> 0 in 57 | let tracked = ref 0 in 58 | l |> List.iter begin fun (`Date date,where,party,payer) -> 59 | assert (payer <> []); 60 | let paid = sum payer in 61 | let (bill,party) = 62 | match party with 63 | | `Items (bill,l) -> assert (l<>[]); bill, l 64 | | `Share' l -> assert (l<>[]); paid, l |> List.map (fun who -> who, paid / List.length l) 65 | | `Share (bill, l) -> assert (l<>[]); bill, l |> List.map (fun who -> who, bill / List.length l) 66 | in 67 | let total = sum party in 68 | let extra = bill - total in 69 | let taxes = 70 | if extra = 0 then 71 | "" 72 | else if extra < List.length party then (* due to integer division, passed onto payer *) 73 | (* sprintf " incl. extra %s" (money extra) *) 74 | "" 75 | else 76 | sprintf " incl. extra %s (%.2f%%)" (money extra) (100. *. float extra /. float total) 77 | in 78 | let tips = if paid - bill <> 0 then sprintf " tipping %d%%" (100 * (paid - bill) / bill) else "" in 79 | if paid < bill then fail "bill %s < paid %s" (money bill) (money paid); 80 | if bill < total then fail "bill %s < total %s" (money bill) (money total); 81 | party |> List.iter (fun (who,x) -> Hashtbl.replace h who (bal who - x - x * (paid - total) / total)); 82 | payer |> List.iter (fun (who,x) -> Hashtbl.replace h who (bal who + x)); 83 | let track = 84 | match track with 85 | | None -> Some "" 86 | | Some name when bal name = 0 && !tracked = 0 -> None 87 | | Some name -> 88 | let diff = bal name - !tracked in 89 | tracked := bal name; 90 | Some (sprintf "%7s = %7s :" (if diff = 0 then "" else delta diff) (money @@ bal name)) 91 | in 92 | begin match track with 93 | | None -> () 94 | | Some track -> 95 | pr "[%8s] %s %-20s : %s (%d pax)%s, %s paid %s%s" 96 | date track where (money bill) (List.length party) taxes (String.concat " " @@ List.map fst payer) (money paid) tips; 97 | end 98 | end; 99 | pr ""; 100 | h 101 | 102 | let show_standings h = 103 | Hashtbl.fold (fun k v acc -> (k,v)::acc) h [] 104 | |> List.sort (fun (_,a) (_,b) -> compare a b) 105 | |> List.iter (fun (who,x) -> pr "%s %s" who (money x)) 106 | 107 | let report_paid name ledger = 108 | let x = List.fold_left begin fun acc (_,_,_,payer) -> 109 | List.fold_left (fun acc (who,x) -> acc + if who = name then x else 0) acc payer 110 | end 0 ledger 111 | in 112 | pr "%s %s" name (money x) 113 | 114 | let on x = `Date x 115 | let items l = `Items (sum l, l) 116 | let bill d l = `Items (d,l) 117 | let share' l = `Share' l 118 | let share bill l = `Share (bill, l) 119 | 120 | let report_argv ledger = 121 | match List.tl @@ Array.to_list Sys.argv with 122 | | "report"::name::[] 123 | | name::[] -> show_standings @@ compute ~track:name ledger 124 | | [] -> show_standings @@ compute ledger 125 | | "paid"::name::[] -> report_paid name ledger 126 | | _ -> prerr_endline "wat?"; exit 2 127 | -------------------------------------------------------------------------------- /php_serialize.ml: -------------------------------------------------------------------------------- 1 | (** 2 | PHP serialization 3 | http://php.net/manual/en/function.serialize.php 4 | 5 | This is free and unencumbered software released into the public domain. 6 | For more information, please refer to 7 | *) 8 | 9 | open ExtLib 10 | 11 | (* open Prelude *) 12 | let (>>) x f = f x 13 | let ($) f g = function x -> f (g x) 14 | 15 | (* 16 | 17 | 18 | Anatomy of a serialize()'ed value: 19 | 20 | String 21 | s:size:value; 22 | 23 | Integer 24 | i:value; 25 | 26 | Boolean 27 | b:value; (does not store "true" or "false", does store '1' or '0') 28 | 29 | Null 30 | N; 31 | 32 | Array 33 | a:size:{key definition;value definition;(repeated per element)} 34 | 35 | Object 36 | O:strlen(object name):object name:object size:{s:strlen(property name):property name:property definition;(repeated per property)} 37 | 38 | String values are always in double quotes 39 | Array keys are always integers or strings 40 | "null => 'value'" equates to 's:0:"";s:5:"value";', 41 | "true => 'value'" equates to 'i:1;s:5:"value";', 42 | "false => 'value'" equates to 'i:0;s:5:"value";', 43 | "array(whatever the contents) => 'value'" equates to an "illegal offset type" warning because you can't use an 44 | array as a key; however, if you use a variable containing an array as a key, it will equate to 's:5:"Array";s:5:"value";', 45 | and 46 | attempting to use an object as a key will result in the same behavior as using an array will. 47 | *) 48 | 49 | type php = AI of (int * php) list | AS of (string * php) list | S of string | I of int | B of bool | F of float | N 50 | 51 | let check x y = if x <> y then failwith (Printf.sprintf "Php_serialize failed : %u <> %u" x y) 52 | 53 | let rec parse_one = parser 54 | | [< ''a'; '':'; n=number; '':'; ''{'; a=parse_array; ''}' >] -> ignore n;(*check n (List.length a);*) a 55 | | [< ''b'; '':'; n=number; '';' >] -> B (0 <> n) 56 | | [< ''d'; '':'; f=parse_float_semi; >] -> F f 57 | | [< n=parse_int >] -> I n 58 | | [< s=parse_str >] -> S s 59 | | [< ''N'; '';' >] -> N 60 | and number t = parse_nat 0 t 61 | and parse_nat n = parser (* overflow test?* *) 62 | | [< ''0'..'9' as c; t >] -> let digit = Char.code c - Char.code '0' in parse_nat (n * 10 + digit) t 63 | | [< >] -> n 64 | and integer = parser 65 | | [< ''-'; t >] -> - (number t) 66 | | [< t >] -> number t 67 | and parse_int = parser 68 | | [< ''i'; '':'; n=integer; '';' >] -> n 69 | and parse_float_semi t = (* ugly, because of one look ahead token FIXME *) 70 | let buf = Scanf.Scanning.from_function (fun () -> Stream.next t) in 71 | Scanf.bscanf buf "%f;" (fun f -> f) 72 | and parse_str = parser 73 | | [< ''s'; '':'; n=number; '':'; ''"'; s=take_string n; ''"'; '';' >] -> s 74 | and take_string n t = String.init n (fun _ -> Stream.next t) 75 | and parse_array = parser 76 | | [< k=parse_int; v=parse_one; a=parse_int_array [k,v] >] -> AI a 77 | | [< k=parse_str; v=parse_one; a=parse_str_array [k,v] >] -> AS a 78 | | [< >] -> AI [] (* empty array *) 79 | and parse_int_array acc = parser 80 | | [< k=parse_int; v=parse_one; t >] -> parse_int_array ((k,v)::acc) t 81 | | [< >] -> List.rev acc 82 | and parse_str_array acc = parser 83 | | [< k=parse_str; v=parse_one; t >] -> parse_str_array ((k,v)::acc) t 84 | | [< >] -> List.rev acc 85 | 86 | let parse stream = 87 | let show () = 88 | let tail = Stream.npeek 10 stream >> List.map (String.make 1) >> String.concat "" in 89 | Printf.sprintf "Position %u : %s" (Stream.count stream) tail 90 | in 91 | try 92 | let r = parse_one stream in 93 | Stream.empty stream; r 94 | with 95 | | Stream.Error _ | Stream.Failure -> failwith (show ()) 96 | 97 | let parse_string = parse $ Stream.of_string 98 | 99 | (** Combinators for easy deconstruction *) 100 | 101 | exception Error of string 102 | 103 | let fail v str = raise (Error (Printf.sprintf "%s : %s" str (Std.dump v))) 104 | 105 | let int = function I n -> n | x -> fail x "int" 106 | let str = function S s -> s | x -> fail x "str" 107 | 108 | let opt k x = try Some (k x) with Error _ -> None 109 | 110 | let values f = function 111 | | AS a -> List.map (f $ snd) a 112 | | AI a -> List.map (f $ snd) a 113 | | x -> fail x "values" 114 | 115 | let array f = function 116 | | AS a -> List.map (fun (k,v) -> k, f v) a 117 | | x -> fail x "array" 118 | 119 | let assoc php name = 120 | match php with 121 | | AS a -> List.assoc name a 122 | | _ -> fail php "assoc" 123 | 124 | module Out = struct 125 | 126 | (** Combinators to build values of [php] type *) 127 | 128 | let str s = S s 129 | let int n = I n 130 | 131 | let array f e = AI (e >> Enum.mapi (fun i x -> i, f x) >> List.of_enum) 132 | let iarray f e = AI (e >> Enum.map (fun (k,v) -> k, f v) >> List.of_enum) 133 | let sarray f e = AS (e >> Enum.map (fun (k,v) -> k, f v) >> List.of_enum) 134 | 135 | (** Serialize [php] value *) 136 | let output out v = 137 | let put_arr f a = IO.printf out "a:%u:{" (List.length a); List.iter f a; IO.write out '}' in 138 | let rec put = function 139 | | AS a -> put_arr (fun (k,v) -> put (S k); put v) a 140 | | AI a -> put_arr (fun (k,v) -> put (I k); put v) a 141 | | I n -> IO.printf out "i:%i;" n 142 | | B b -> IO.printf out "b:%u;" (if b then 1 else 0) 143 | | F f -> IO.printf out "d:%f;" f 144 | | N -> IO.nwrite out "N;" 145 | | S s -> IO.printf out "s:%u:\"%s\";" (String.length s) s 146 | in 147 | put v 148 | 149 | end 150 | 151 | let to_string v = 152 | let out = IO.output_string () in 153 | Out.output out v; 154 | IO.close_out out 155 | 156 | -------------------------------------------------------------------------------- /pmp: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # This program is part of Aspersa (http://code.google.com/p/aspersa/) 3 | 4 | # ######################################################################## 5 | # This script aggregates GDB stack traces for a selected program. By default it 6 | # does mysqld. 7 | # 8 | # Author: Baron Schwartz, based on a script by Domas Mituzas at 9 | # poormansprofiler.org 10 | # ######################################################################## 11 | 12 | # License: BSD 3-clause 13 | # https://opensource.org/licenses/BSD-3-Clause 14 | 15 | # Print a usage message and exit. 16 | usage() { 17 | if [ "${OPT_ERR}" ]; then 18 | echo "${OPT_ERR}" 19 | fi 20 | cat <<-USAGE 21 | Usage: $0 [OPTIONS] [FILE] 22 | $0 does two things: 1) get a GDB backtrace 2) aggregate it. 23 | If you specify a FILE, then step 1) is not performed. 24 | Options: 25 | -b BINARY Which binary to trace (default mysqld) 26 | -i ITERATIONS How many traces to gather and aggregate (default 10) 27 | -k KEEPFILE Keep the raw traces in this file after aggregation 28 | -l NUMBER Aggregate only first NUMBER functions; 0=infinity (default 16) 29 | -p PID Process ID of the process to trace; overrides -b 30 | -s SLEEPTIME Number of seconds to sleep between iterations (default 0) 31 | -t THREAD Threads to trace (default all) 32 | --no-demangle Do not demangle OCaml functions, print raw symbol names 33 | USAGE 34 | exit 1 35 | } 36 | 37 | # Actually does the aggregation. The arguments are the max number of functions 38 | # to aggregate, and the files to read. If maxlen=0, it means infinity. We have 39 | # to pass the maxlen argument into this function to make maxlen testable. 40 | aggregate_stacktrace() { 41 | maxlen="$1"; 42 | demangle="$3"; 43 | cat > ${tmpfile}.awk <( *\$|::)/ ) { 87 | if ( 0 == gsub(/<[^<>]*>/, "", targ) ) { 88 | break; 89 | } 90 | } 91 | # Remove void and const decorators. 92 | gsub(/ (void|const) /, "", targ); 93 | gsub(/ /, "", targ); 94 | } 95 | else if ( targ ~ /\\?\\?/ && \$2 ~ /[1-9]/ ) { 96 | # Substitute ?? by the name of the library. 97 | targ = \$NF; 98 | while ( targ ~ /\\// ) { 99 | targ = substr(targ, index(targ, "/") + 1); 100 | } 101 | targ = substr(targ, 1, index(targ, ".") - 1); 102 | targ = targ "::??"; 103 | } 104 | else if ( ${demangle} == 1 && targ ~ /^camlDune__exe__/) { 105 | sub(/^camlDune__exe__/,"",targ); 106 | } 107 | else if ( ${demangle} == 1 && targ ~ /^caml/ && targ !~ /^caml_/ ) { 108 | a = split(targ, p, "_"); 109 | if (a > 1 && p[a] ~ /^[0-9]+$/) 110 | { 111 | if (p[a-1] == "fun") 112 | { 113 | targ = substr(targ, 1, length(targ) - length(p[a]) - length(p[a-1]) - 1) "#" p[a] 114 | } 115 | else 116 | { 117 | targ = substr(targ, 1, length(targ) - length(p[a]) - 1) 118 | } 119 | } 120 | gsub(/__/, ".", targ); 121 | sub(/^caml/, "", targ); 122 | } 123 | # get rid of long symbol names such as 'pthread_cond_wait@@GLIBC_2.3.2' 124 | if ( targ ~ /@@/ ) { 125 | fname = substr(targ, 1, index(targ, "@@") - 1); 126 | } 127 | else { 128 | fname = targ; 129 | } 130 | if ( ${maxlen:-0} == 0 || c < ${maxlen:-0} ) { 131 | if (s != "" ) { 132 | s = s " " fname; 133 | } 134 | else { 135 | s = fname; 136 | } 137 | } 138 | c++; 139 | } 140 | END { 141 | print s 142 | } 143 | EOF 144 | awk -f ${tmpfile}.awk "$2" | sort | uniq -c | sort -n -k 1,1 145 | } 146 | 147 | # The main program to run. 148 | main() { 149 | export tmpfile=$(tempfile --prefix=pmp); 150 | 151 | # Get command-line options 152 | for o; do 153 | case "${o}" in 154 | --) 155 | shift; break; 156 | ;; 157 | --help) 158 | usage; 159 | ;; 160 | -b) 161 | shift; OPT_b="${1}"; shift; 162 | ;; 163 | -i) 164 | shift; OPT_i="${1}"; shift; 165 | ;; 166 | -k) 167 | shift; OPT_k="${1}"; shift; 168 | ;; 169 | -l) 170 | shift; OPT_l="${1}"; shift; 171 | ;; 172 | -p) 173 | shift; OPT_p="${1}"; shift; 174 | ;; 175 | -s) 176 | shift; OPT_s="${1}"; shift; 177 | ;; 178 | -t) 179 | shift; OPT_t="${1}"; shift; 180 | ;; 181 | --no-demangle) 182 | shift; OPT_demangle=0; 183 | ;; 184 | -*) 185 | OPT_ERR="Unknown option ${o}." 186 | usage 187 | ;; 188 | esac 189 | done 190 | export OPT_i="${OPT_i:-10}"; 191 | export OPT_k="${OPT_k:-}"; 192 | export OPT_l="${OPT_l:-16}"; 193 | export OPT_b="${OPT_b:-mysqld}"; 194 | export OPT_p="${OPT_p:-}"; 195 | export OPT_s="${OPT_s:-0}"; 196 | export OPT_t="${OPT_t:-all}"; 197 | export OPT_demangle="${OPT_demangle:-1}"; 198 | 199 | if [ -z "${1}" ]; then 200 | # There's no file to analyze, so we'll make one. 201 | if [ -z "${OPT_p}" ]; then 202 | OPT_p=$(pidof -s "${OPT_b}" 2>/dev/null); 203 | if [ -z "${OPT_p}" ]; then 204 | OPT_p=$(pgrep -o -x "${OPT_b}" 2>/dev/null) 205 | fi 206 | if [ -z "${OPT_p}" ]; then 207 | OPT_p=$(ps -eaf | grep "${OPT_b}" | grep -v grep | awk '{print $2}' | head -n1); 208 | fi 209 | fi 210 | date; 211 | for x in $(seq 1 $OPT_i); do 212 | gdb -ex "set pagination 0" -ex "thread apply $OPT_t bt" -batch /proc/$OPT_p/exe -p $OPT_p >> "${OPT_k:-${tmpfile}}" 213 | date +'TS %N.%s %F %T' >> "${OPT_k:-${tmpfile}}" 214 | sleep $OPT_s 215 | done 216 | fi 217 | 218 | if [ $# -eq 0 ]; then 219 | aggregate_stacktrace "${OPT_l}" "${OPT_k:-${tmpfile}}" "${OPT_demangle}" 220 | else 221 | aggregate_stacktrace "${OPT_l}" "$@" "${OPT_demangle}" 222 | fi 223 | rm -f ${tmpfile} ${tmpfile}.awk 224 | } 225 | 226 | # Execute the program if it was not included from another file. This makes it 227 | # possible to include without executing, and thus test. 228 | if [ "$(basename "$0")" = "pmp" ] || [ "$(basename "$0")" = "bash" -a "$_" = "$0" ]; then 229 | main "$@" 230 | fi 231 | -------------------------------------------------------------------------------- /pmpa: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | 3 | # Based on a pmp script from Aspersa (http://code.google.com/p/aspersa/) by Baron Schwartz, 4 | # in turn based on a script by Domas Mituzas at http://poormansprofiler.org 5 | 6 | # Description: https://inbox.ocaml.org/caml-list/20110807150719.34376e5e605354e296c528ca@gmail.com/ 7 | 8 | # License: BSD 3-clause 9 | # https://opensource.org/licenses/BSD-3-Clause 10 | 11 | usage() 12 | { 13 | if [ "${OPT_ERR}" ]; then 14 | echo "E: ${OPT_ERR}" 15 | fi 16 | cat <<-USAGE 17 | Usage: $0 [OPTIONS] 18 | $0 does two things: 19 | 1) sets a breakpoint and gets a backtrace using GDB 20 | 2) aggregates it 21 | Options: 22 | -i ITERATIONS How many traces to gather and aggregate (default 10) 23 | -l NUMBER Aggregate only first NUMBER functions (default 16) 24 | -p PID Process ID of the process to trace 25 | -t FUNCTION Function to break on (default caml_gc_dispatch) 26 | USAGE 27 | exit 1 28 | } 29 | 30 | 31 | main () 32 | { 33 | for o; do 34 | case "${o}" in 35 | --help) 36 | usage; 37 | ;; 38 | -p) 39 | shift; OPT_p="${1}"; shift; 40 | ;; 41 | -i) 42 | shift; OPT_i="${1}"; shift; 43 | ;; 44 | -t) 45 | shift; OPT_t="${1}"; shift; 46 | ;; 47 | -l) 48 | shift; OPT_l="${1}"; shift; 49 | ;; 50 | -*) 51 | OPT_ERR="unknown option ${o}"; 52 | usage 53 | ;; 54 | esac 55 | done 56 | export OPT_p="${OPT_p:-}"; 57 | export OPT_i="${OPT_i:-10}"; 58 | export OPT_t="${OPT_t:-caml_gc_dispatch}"; 59 | export OPT_l="${OPT_l:-16}"; 60 | 61 | if [ ! "${OPT_p}" ] ; then 62 | OPT_ERR="PID not specified"; 63 | usage 64 | fi 65 | 66 | for i in $(seq 1 $OPT_i); do 67 | gdb -p "$OPT_p" "/proc/$OPT_p/exe" -batch -n -q -ex 'set interactive off' -ex "b $OPT_t" -ex 'c' -ex 'bt'\ 68 | |awk '/^#[^0]/ {print $4}' \ 69 | |sed -e s/^camlDune__exe__// -e s/__/./g -e 's/^caml\([^_]\)/\1/' \ 70 | |grep -Fv caml_garbage_collection |grep -Fv caml_call_gc \ 71 | |uniq |head -n "$OPT_l" |paste -s -d , ; 72 | done |sort |uniq -c |sort -n; 73 | } 74 | 75 | main $@ 76 | -------------------------------------------------------------------------------- /pvolume.sh: -------------------------------------------------------------------------------- 1 | #!/bin/dash 2 | 3 | SINK=$(pactl info | grep Default.Sink | awk '{print $3}') 4 | SOURCE=$(pactl info | grep Default.Source | awk '{print $3}') 5 | 6 | #echo $SINK 7 | #set -x 8 | 9 | case "$1" in 10 | "up") pactl set-sink-mute $SINK 0; pactl set-sink-volume $SINK +5% ;; 11 | "down") pactl set-sink-mute $SINK 0; pactl -- set-sink-volume $SINK -5% ;; 12 | "mute-out") pactl set-sink-mute $SINK toggle ;; 13 | "mute-in") pactl set-source-mute $SOURCE toggle ;; 14 | "next-out") 15 | OTHER_SINK=$(pactl list sinks | grep Name: | awk '{print $2}' | ~/bin/next.ml.exe "$SINK") 16 | if [ "$OTHER_SINK" != "$SINK" ]; then 17 | pactl set-default-sink "$OTHER_SINK" 18 | notify-send -u normal -i audio-volume-medium-symbolic -t 5000 "Sound output" "$(printf "Now playing on %s" "$OTHER_SINK")" 19 | fi 20 | ;; 21 | *) echo unknown command; exit 2;; 22 | esac 23 | --------------------------------------------------------------------------------