├── README.md
├── bencode.ml
├── cxx_wrapped.h
├── git-branches
├── idn.ml
├── indente.ml
├── measure.ml
├── memory.pikchr
├── memsum
├── mlvalues.py
├── next.ml
├── oasis-query.sh
├── opam-url
├── pay.ml
├── php_serialize.ml
├── pmp
├── pmpa
└── pvolume.sh
/README.md:
--------------------------------------------------------------------------------
1 | * [Bencode](bencode.ml) - parse [bencode](http://www.bittorrent.org/beps/bep_0003.html)'d structures (Bittorrent metadata)
2 | * [cxx_wrapped.h](cxx_wrapped.h) - simple template to wrap C++ object as OCaml custom value (used for example in [ocaml-hypertable](https://github.com/ygrek/ocaml-hypertable/))
3 | * [git-branches](git-branches) - git subcommand to compactly show every branch (with its top commit) in the repository
4 | * [idn.ml](idn.ml) - copy of punycode and IDN module for OCaml (previously available at http://caml.ru/~dima/ocaml/idn.ml) with some bugfixes
5 | * [indente.ml](indente.ml) - detect common beginner error of else branch scope in OCaml code
6 | * [Measure](measure.ml) - simple module to measure code speed and allocation rate
7 | * [memsum](memsum) - shell script to quickly see memory usage summed over process groups
8 | * [mlvalues.py](mlvalues.py) - GDB/python script to inspect OCaml runtime, heap and individual values
9 | * [oasis-query.sh](oasis-query.sh) - shell helper to extract useful information from [oasis](https://github.com/ocaml/oasis)
10 | * [opam-url](opam-url) - shell helper to generate url section of [opam](https://opam.ocaml.org) file
11 | * [Php_serialize](php_serialize.ml) - parse and construct [php\_serialize](http://php.net/manual/en/function.serialize.php)'d values
12 | * [pay.ml](pay.ml) - simple group expenses recording and bill splitting
13 | * [pmp](pmp) - modified [poor man's profiler](http://poormansprofiler.org/) script (with OCaml names demangling)
14 | * [pmpa](pmpa) - poor man's allocation profiler ([description](https://inbox.ocaml.org/caml-list/20110807150719.34376e5e605354e296c528ca@gmail.com/))
15 | * [pvolume.sh](pvolume.sh) - pulseaudio volume and output device helper
16 |
--------------------------------------------------------------------------------
/bencode.ml:
--------------------------------------------------------------------------------
1 | (**
2 | http://www.bittorrent.org/beps/bep_0003.html
3 |
4 | This is free and unencumbered software released into the public domain.
5 | For more information, please refer to
6 | *)
7 |
8 | (* open Prelude *)
9 |
10 | let (>>) x f = f x
11 | let fail fmt = Printf.ksprintf failwith fmt
12 |
13 | open Printf
14 | open ExtLib
15 |
16 | module type Number = sig
17 | type t
18 | val neg : t -> t
19 | val of_int : int -> t
20 | val to_int : t -> int
21 | val of_string : string -> t
22 | val to_string : t -> string
23 | val zero : t
24 | val add : t -> t -> t
25 | val mul : t -> t -> t
26 | end
27 |
28 | module Make(N : Number) = struct
29 |
30 | type t =
31 | | S of string
32 | | I of N.t
33 | | L of t list
34 | | D of (string * t) list
35 |
36 | let decode_stream ?(hints=[]) chars =
37 | let ten = N.of_int 10 in
38 | let digit c = N.of_int (Char.code c - Char.code '0') in
39 | let rec loop acc = parser
40 | | [< x=parse_one; t; >] -> loop (x::acc) t
41 | | [< >] -> List.rev acc
42 | and parse_one = parser
43 | | [< s=parse_string >] -> S s
44 | | [< ''i'; n=parse_int_num; ''e' >] -> I n
45 | | [< ''l'; l=loop []; ''e' >] -> L l
46 | | [< ''d'; d=loop_d []; ''e' >] -> D d
47 | and loop_d acc = parser
48 | | [< k=parse_string; v=parse_one; t >] -> loop_d ((k,v) :: acc) t
49 | | [< >] -> List.rev acc
50 | and parse_string = parser
51 | | [< n = parse_pos_num; '':'; s = take (N.to_int n) >] -> s
52 | and parse_int_num = parser
53 | | [< ''-'; x = parse_pos_num >] -> N.neg x
54 | | [< x = parse_pos_num >] -> x
55 | and parse_pos_num = parser
56 | | [< ''0' >] -> N.zero
57 | | [< ''1'..'9' as c; n = parse_digits (digit c) >] -> n
58 | and parse_digits n = parser
59 | | [< ''0'..'9' as c; t >] -> parse_digits (N.add (N.mul n ten) (digit c)) t
60 | | [< >] -> n
61 | and take n chars =
62 | let s = String.make n '\000' in
63 | for i = 0 to n-1 do s.[i] <- Stream.next chars done; s
64 | in
65 | let main () =
66 | let r = parse_one chars in
67 | if not (List.mem `IgnoreTail hints) then Stream.empty chars;
68 | r
69 | in
70 | let show () =
71 | let tail = Stream.npeek 10 chars >> List.map (String.make 1) >> String.concat "" in
72 | sprintf "Position %u : %s" (Stream.count chars) tail
73 | in
74 | try
75 | main ()
76 | with
77 | | Stream.Error _ | Stream.Failure -> failwith (show ())
78 |
79 | let rec print out =
80 | let pr fmt = IO.printf out fmt in
81 | function
82 | | I n -> pr "%s " (N.to_string n)
83 | | S s -> pr "\"%s\" " (String.slice ~last:10 s)
84 | | L l -> pr "( "; List.iter (print out) l; pr ") "
85 | | D d -> pr "{ "; List.iter (fun (k,v) -> pr "%s: " k; print out v) d; pr "} "
86 |
87 | let to_string t =
88 | let module B = Buffer in
89 | let b = B.create 100 in
90 | let puts s = bprintf b "%u:%s" (String.length s) s in
91 | let rec put = function
92 | | I n -> bprintf b "i%se" (N.to_string n)
93 | | S s -> puts s
94 | | L l -> B.add_char b 'l'; List.iter put l; B.add_char b 'e'
95 | | D d ->
96 | B.add_char b 'd';
97 | List.iter (fun (s,x) -> puts s; put x) (List.sort ~cmp:(fun (s1,_) (s2,_) -> compare s1 s2) d);
98 | B.add_char b 'e'
99 | in
100 | put t; B.contents b
101 |
102 | (** @raise exn on error *)
103 | let decode s =
104 | Stream.of_string s >> decode_stream
105 |
106 | let key s k v =
107 | match v with
108 | | D l -> k (try List.assoc s l with Not_found -> fail "no key '%s'" s)
109 | | _ -> fail "not a dictionary"
110 |
111 | let int = function I n -> n | _ -> fail "int"
112 | let str = function S s -> s | _ -> fail "str"
113 | let list k v = match v with L l -> k l | _ -> fail "list"
114 | let listof k v = match v with L l -> List.map k l | _ -> fail "listof"
115 | let dict k v = match v with D l -> k l | _ -> fail "dict"
116 |
117 | end
118 |
119 | include Make(Int64)
120 |
121 |
--------------------------------------------------------------------------------
/cxx_wrapped.h:
--------------------------------------------------------------------------------
1 | // Simple template to wrap C++ object as OCaml custom value
2 | // Author: ygrek
3 | // Version: 2020-02-18
4 |
5 | // This is free and unencumbered software released into the public domain.
6 | // Anyone is free to copy, modify, publish, use, compile, sell, or
7 | // distribute this software, either in source code form or as a compiled
8 | // binary, for any purpose, commercial or non-commercial, and by any means.
9 | // For more information, please refer to
10 |
11 | // value wrapped::alloc(Ptr,tag=0,mem=0,max=1)
12 | // creates custom value with pointer to C++ object inside
13 | // finalizer will release pointer (whether destructor will be called
14 | // depends on the semantics of the pointer)
15 | // tag is optional tag attached to custom value
16 | // mem and max are parameters of caml_alloc_custom
17 | // value wrapped::alloc_mem(Ptr,tag,used)
18 | // same as wrapped::alloc but using caml_alloc_custom_mem
19 | // void wrapped::release(value)
20 | // releases wrapped pointer
21 | // Ptr const& wrapped::get(value)
22 | // returns pointer to wrapped object
23 | // raises OCaml Invalid_argument exception if pointer was already released
24 | // size_t wrapped::count()
25 | // returns the number of currently allocated Ptr wrappers
26 | //
27 | // wrapped<> manages smart pointers to C++ objects
28 | // wrapped_ptr<> manages raw pointers (owns pointed object, release() destroys object)
29 | //
30 | // generational_global_root is a RAII wrapper to register GC roots
31 | // caml_release_runtime is a RAII wrapper to release runtime lock in the given scope
32 | // without_runtime_lock executes function with runtime lock released
33 |
34 | extern "C" {
35 | #define CAML_NAME_SPACE
36 | #include
37 | #include
38 | #include
39 | #include
40 | #include
41 | #include
42 | #include
43 | }
44 |
45 | #include
46 |
47 | // name used as identifier for custom_operations
48 | // should be instantiated for each wrapped pointer class
49 | template
50 | char const* ml_name();
51 |
52 | // Ptr is a smart pointer class,
53 | // e.g.: std::auto_ptr, boost::shared_ptr, boost::intrusive_ptr
54 | template
55 | class wrapped
56 | {
57 | private:
58 | struct ml_wrapped
59 | {
60 | ml_wrapped(Ptr x, size_t t) : tag(t), p(x) {} // copy is ok
61 | ml_wrapped(typename Ptr::element_type* x, size_t t) : tag(t), p(x) {}
62 | size_t tag;
63 | Ptr p;
64 | };
65 |
66 | static size_t count_;
67 |
68 | #define Wrapped_val(v) (*(ml_wrapped**)Data_custom_val(v))
69 |
70 | static void finalize(value v)
71 | {
72 | release(v);
73 | delete Wrapped_val(v);
74 | }
75 |
76 | public:
77 | typedef Ptr type;
78 |
79 | static size_t count() { return count_; }
80 | static char const* name() { return ml_name(); }
81 | static size_t tag(value v) { return Wrapped_val(v)->tag; }
82 |
83 | static Ptr const& get(value v) // do not copy
84 | {
85 | Ptr const& p = Wrapped_val(v)->p;
86 | //printf("get %lx : %s\n",(size_t)p.get(),name());
87 | if (NULL == p.get()) caml_invalid_argument(name());
88 | return p;
89 | }
90 |
91 | static void release(value v)
92 | {
93 | Ptr& p = Wrapped_val(v)->p;
94 | //printf("release %lx : %s\n",(size_t)p.get(),name());
95 | if (NULL == p.get()) return;
96 | count_--;
97 | p.reset();
98 | }
99 |
100 | template
101 | static value alloc(TPtr p, size_t tag = 0, mlsize_t wmem = 0, mlsize_t wmax = 1)
102 | {
103 | //printf("alloc %lx : %s\n",(size_t)p.get(),name());
104 | CAMLparam0();
105 | CAMLlocal1(v);
106 |
107 | static struct custom_operations wrapped_ops = {
108 | const_cast(name()),
109 | finalize,
110 | custom_compare_default,
111 | custom_hash_default,
112 | custom_serialize_default,
113 | custom_deserialize_default,
114 | #if defined(custom_compare_ext_default)
115 | custom_compare_ext_default,
116 | #endif
117 | #if defined(custom_fixed_length_default)
118 | custom_fixed_length_default,
119 | #endif
120 | };
121 |
122 | v = caml_alloc_custom(&wrapped_ops, sizeof(ml_wrapped*), wmem, wmax);
123 | Wrapped_val(v) = new ml_wrapped(p, tag);
124 | count_++;
125 |
126 | CAMLreturn(v);
127 | }
128 |
129 | // caml_alloc_custom_mem was added in the same release as fixed_length field
130 | #if defined(custom_fixed_length_default)
131 | template
132 | static value alloc_mem(TPtr p, size_t tag, mlsize_t used)
133 | {
134 | //printf("alloc %lx : %s\n",(size_t)p.get(),name());
135 | CAMLparam0();
136 | CAMLlocal1(v);
137 |
138 | static struct custom_operations wrapped_ops = {
139 | const_cast(name()),
140 | finalize,
141 | custom_compare_default,
142 | custom_hash_default,
143 | custom_serialize_default,
144 | custom_deserialize_default,
145 | custom_compare_ext_default,
146 | custom_fixed_length_default,
147 | };
148 |
149 | v = caml_alloc_custom_mem(&wrapped_ops, sizeof(ml_wrapped*), used);
150 | Wrapped_val(v) = new ml_wrapped(p, tag);
151 | count_++;
152 |
153 | CAMLreturn(v);
154 | }
155 | #endif
156 |
157 | #undef Wrapped_val
158 |
159 | }; //wrapped
160 |
161 | template
162 | size_t wrapped::count_ = 0;
163 |
164 | template
165 | struct raw_ptr
166 | {
167 | #if __cplusplus >= 201103L
168 | typedef std::unique_ptr ptr;
169 | #else
170 | typedef std::auto_ptr ptr;
171 | #endif
172 | };
173 |
174 | template
175 | struct wrapped_ptr : public wrapped::ptr>
176 | {
177 | typedef wrapped::ptr> base;
178 | static T* get(value v)
179 | {
180 | return base::get(v).get();
181 | }
182 | static value alloc(T* p, size_t tag = 0, mlsize_t wmem = 0, mlsize_t wmax = 1)
183 | {
184 | return base::alloc(p,tag,wmem,wmax);
185 | }
186 | #if defined(caml_alloc_custom_mem)
187 | static value alloc_mem(T* p, size_t tag, mlsize_t used)
188 | {
189 | return base::alloc_mem(p,tag,used);
190 | }
191 | #endif
192 | }; // wrapped_ptr
193 |
194 | #if defined(__GNUC__)
195 | #define UNUSED __attribute__((unused))
196 | #else
197 | #define UNUSED
198 | #endif
199 |
200 | static size_t UNUSED wrapped_tag(value x) { return wrapped_ptr::tag(x); }
201 |
202 | class caml_release_runtime // : boost::noncopyable
203 | {
204 | public:
205 | caml_release_runtime() { caml_release_runtime_system(); }
206 | ~caml_release_runtime() { caml_acquire_runtime_system(); }
207 | private:
208 | caml_release_runtime( const caml_release_runtime& );
209 | const caml_release_runtime& operator=( const caml_release_runtime& );
210 | };
211 |
212 | // compatibiliy
213 | typedef caml_release_runtime caml_blocking_section;
214 |
215 | class generational_global_root // : boost::noncopyable
216 | {
217 | public:
218 | generational_global_root(value v)
219 | {
220 | v_ = v;
221 | caml_register_generational_global_root(&v_);
222 | }
223 | ~generational_global_root()
224 | {
225 | caml_remove_generational_global_root(&v_);
226 | v_ = Val_unit;
227 | }
228 | void set(value v)
229 | {
230 | caml_modify_generational_global_root(&v_, v);
231 | }
232 | value get()
233 | {
234 | return v_;
235 | }
236 | private:
237 | generational_global_root(generational_global_root const&);
238 | const generational_global_root& operator=(generational_global_root const&);
239 | private:
240 | value v_;
241 | };
242 |
243 | #if __cplusplus >= 201103L
244 | template
245 | auto without_runtime_lock(Func f, Args && ... args) -> decltype(f(std::forward(args)...))
246 | {
247 | caml_blocking_section lock;
248 | return f(std::forward(args)...);
249 | }
250 | #endif
251 |
--------------------------------------------------------------------------------
/git-branches:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env bash
2 |
3 | for branch in $(git for-each-ref --format='%(refname:short)' refs/remotes/ refs/heads/ "$@" | grep -v '^origin/HEAD$'); do
4 | echo -e "$(git show --format='%ci %<(24)%cr %<(24)%an' $branch -- | head -n 1) \\t$branch"
5 | done | sort -r | less -XF
6 |
--------------------------------------------------------------------------------
/idn.ml:
--------------------------------------------------------------------------------
1 | (* Punycode and IDN library for OCaml *)
2 | (* License: without restrictions *)
3 | (* Author: dima@caml.ru *)
4 |
5 | (* Fixes by: ygrek and cyberhuman *)
6 | (* Version: 2013/08/29 *)
7 |
8 | exception Bad_input
9 | exception Overflow
10 |
11 | (* Parameters *)
12 |
13 | let base = 36
14 | let tmin = 1
15 | let tmax = 26
16 | let skew = 38
17 | let damp = 700
18 | let initial_bias = 72
19 | let initial_n = 0x80
20 | let delimiter = 0x2D
21 |
22 | (* Encoding *)
23 |
24 | let basic p = p < 0x80
25 |
26 | let encode_digit d =
27 | if d < 26 then
28 | d + Char.code 'a'
29 | else if d < 36 then
30 | d - 26 + Char.code '0'
31 | else
32 | raise Bad_input
33 |
34 | let adapt delta num_points first =
35 | let delta =
36 | if first then delta / damp else (delta lsr 1) in
37 | let delta = ref (delta + (delta / num_points)) in
38 | let k = ref 0 in
39 | let lim = ((base - tmin) * tmax) / 2 in
40 | while (!delta > lim) do
41 | delta := !delta / (base - tmin);
42 | k := !k + base
43 | done;
44 | !k + (((base - tmin + 1) * !delta) / (!delta + skew))
45 |
46 | let encode_data input_data =
47 | let n = ref initial_n in
48 | let delta = ref 0 in
49 | let bias = ref initial_bias in
50 | let basic_count = ref 0 in
51 | let buf = Buffer.create 32 in
52 | let out n =
53 | Buffer.add_char buf (Char.chr n) in
54 |
55 | Array.iter
56 | (fun c ->
57 | if basic c then
58 | begin
59 | out c;
60 | incr basic_count;
61 | end)
62 | input_data;
63 |
64 | if !basic_count > 0 then
65 | Buffer.add_char buf (Char.chr delimiter);
66 |
67 | let handled_count = ref !basic_count in
68 |
69 | while (!handled_count < Array.length input_data) do
70 | let m = ref max_int in
71 | Array.iter
72 | (fun c ->
73 | if c >= !n && c < !m then
74 | m := c)
75 | input_data;
76 |
77 | if !m - !n > (max_int - !delta) / (succ !handled_count) then
78 | raise Overflow;
79 | delta := !delta + (!m - !n) * (succ !handled_count);
80 | n := !m;
81 |
82 | Array.iter
83 | (fun c ->
84 | if c < !n then
85 | begin
86 | incr delta;
87 | if !delta = 0 then
88 | raise Overflow;
89 | end;
90 | if c = !n then
91 | begin
92 | let q = ref !delta in
93 | let k = ref base in
94 | (try
95 | while true do
96 | let t =
97 | if !k <= !bias then tmin
98 | else if !k >= !bias + tmax then tmax
99 | else !k - !bias in
100 | if !q < t then
101 | raise Exit;
102 | out (encode_digit (t + ((!q - t) mod (base - t))));
103 | q := (!q - t) / (base - t);
104 | k := !k + base
105 | done
106 | with Exit -> ());
107 | out (encode_digit !q);
108 | bias := adapt !delta (succ !handled_count) (!handled_count = !basic_count);
109 | delta := 0;
110 | incr handled_count;
111 | end)
112 | input_data;
113 | incr delta;
114 | incr n;
115 | done;
116 | Buffer.contents buf
117 |
118 | (* Decoding *)
119 |
120 | let decode_digit p =
121 | if p < 48 then raise Bad_input else
122 | if p < 58 then p + 26 - 48 else
123 | if p < 65 then raise Bad_input else
124 | if p < 65 + 26 then p - 65 else
125 | if p < 97 then raise Bad_input else
126 | if p < 97 + 26 then p - 97 else
127 | raise Bad_input
128 |
129 | let decode_data input_data =
130 | let buflen = String.length input_data in
131 | let n = ref initial_n in
132 | let i = ref 0 in
133 | let bias = ref initial_bias in
134 | let buf = Array.make buflen 0 in
135 |
136 | let input_length =
137 | String.length input_data in
138 |
139 | let out = ref 0 in
140 | let data_pos =
141 | try
142 | let pos = String.rindex input_data (Char.chr delimiter) in
143 | for i = 0 to pos - 1 do
144 | Array.unsafe_set buf i (Char.code input_data.[i])
145 | done;
146 | out := pos;
147 | pos + 1
148 | with _ -> 0
149 | in
150 |
151 | let j = ref data_pos in
152 | while !j < input_length do
153 | let oldi = ref !i in
154 | let w = ref 1 in
155 | let k = ref base in
156 | (try
157 | while true do
158 | if !j >= input_length then raise Bad_input;
159 | let digit = decode_digit (Char.code input_data.[!j]) in incr j;
160 | if digit > (max_int - !i) / !w then raise Overflow;
161 | i := !i + digit * !w;
162 | let t =
163 | if !k <= !bias then tmin
164 | else if !k >= !bias + tmax then tmax
165 | else !k - !bias
166 | in
167 | if digit < t then
168 | raise Exit;
169 | if !w > max_int / (base - t) then raise Overflow;
170 | w := !w * (base - t);
171 | k := !k + base
172 | done
173 | with Exit -> ());
174 | let next =
175 | succ !out in
176 | bias := adapt (!i - !oldi) next (!oldi = 0);
177 | if !i / next > max_int - !n then raise Overflow;
178 | n := !n + !i / next;
179 | i := !i mod next;
180 | if !out >= buflen then raise Overflow;
181 | if !out > !i then
182 | Array.blit buf !i buf (!i + 1) (!out - !i);
183 | buf.(!i) <- !n;
184 | incr i; incr out;
185 | done;
186 | Array.sub buf 0 !out
187 |
188 | let upoints = Netconversion.uarray_of_ustring `Enc_utf8
189 | let ustring = Netconversion.ustring_of_uarray `Enc_utf8
190 |
191 | (* Punycode API *)
192 |
193 | let encode s = encode_data (upoints s)
194 | let decode s = ustring (decode_data s)
195 |
196 | (* Helpers *)
197 |
198 | let split domain =
199 | let rec make acc rest =
200 | try
201 | let pos = String.index rest '.' in
202 | make ((String.sub rest 0 pos)::acc)
203 | (String.sub rest (succ pos) ((String.length rest) - pos - 1))
204 | with Not_found -> List.rev (rest::acc)
205 | in make [] domain
206 |
207 | let join = String.concat "."
208 |
209 | let need_encoding s =
210 | let l =
211 | String.length s in
212 | try
213 | for i = 0 to pred l do
214 | if not (basic (Char.code (String.unsafe_get s i))) then
215 | raise Exit
216 | done; false
217 | with Exit -> true
218 |
219 | let transcode s =
220 | if need_encoding s then
221 | "xn--" ^ encode s
222 | else s
223 |
224 | let transtext s =
225 | let l = String.length s in
226 | if l > 4 && String.sub s 0 4 = "xn--" then
227 | decode (String.sub s 4 (l - 4))
228 | else s
229 |
230 | (* IDN api *)
231 |
232 | let encode_domain domain =
233 | join (List.map transcode (split domain))
234 |
235 | let decode_domain domain =
236 | join (List.map transtext (split domain))
237 |
238 |
239 | let () =
240 | assert ("他们为什么不说中文" = decode "ihqwcrb4cv8a8dqg056pqjye");
241 | assert ("---禁刊拍賣網址---" = decode "-------5j3ji85am9zsk4ckwjm29b");
242 | assert ("reality44hire-b9a" = encode (decode "reality44hire-b9a"));
243 | begin
244 | try
245 | let (_:string) = decode_domain "xn----7sbksbihemjgbjxflp8bn1jxc.xn--p1aiaudio_orlov_yum" in
246 | assert false
247 | with
248 | | Bad_input -> assert true
249 | | _ -> assert false
250 | end;
251 | ()
252 |
--------------------------------------------------------------------------------
/indente.ml:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env b0caml
2 | #directory "+ocaml/compiler-libs"
3 |
4 | open Printf
5 |
6 | let loc { Parsetree.pexp_loc = loc; _ } = Location.get_pos_info loc.loc_start
7 | let col expr = let (_,_,c) = loc expr in c
8 |
9 | let iter = { Ast_iterator.default_iterator with
10 | expr = fun iter expr ->
11 | begin match expr with
12 | | { pexp_desc = Pexp_sequence ({ pexp_desc = Pexp_ifthenelse (_cond,e_then,e_else); _ } as e_if, next); _ } ->
13 | let last = Option.value ~default:e_then e_else in
14 | if col last = col next && col next <> col e_if then
15 | let (file,line,_) = loc next in
16 | printf "Suspicious indentation of next expression after if at %s %d\n%!" file line
17 | | _ -> ()
18 | end;
19 | Ast_iterator.default_iterator.expr iter expr
20 | }
21 |
22 | let parse file =
23 | iter.structure iter @@ Pparse.parse_implementation ~tool_name:"indetect" file
24 |
25 | let () =
26 | Sys.argv |> Array.iteri begin fun i file ->
27 | if i <> 0 then try parse file with exn -> fprintf stderr "Failed to parse %s : %s\n%!" file (Printexc.to_string exn)
28 | end
29 |
--------------------------------------------------------------------------------
/measure.ml:
--------------------------------------------------------------------------------
1 | (**
2 | Simple module to measure code speed and allocation rate
3 |
4 | http://ygrek.org.ua/p/code/measure.ml
5 | 2011-08-08
6 |
7 | This is free and unencumbered software released into the public domain.
8 | For more information, please refer to
9 | *)
10 |
11 | open Printf
12 | open Gc
13 |
14 | let bytes_f f = (* oh ugly *)
15 | let a = abs_float f in
16 | if a < 1024. then sprintf "%dB" (int_of_float f) else
17 | if a < 1024. *. 1024. then sprintf "%dKB" (int_of_float (f /. 1024.)) else
18 | if a < 1024. *. 1024. *. 1024. then sprintf "%.1fMB" (f /. 1024. /. 1024.) else
19 | sprintf "%.1fGB" (f /. 1024. /. 1024. /. 1024.)
20 |
21 | let bytes x = bytes_f (float_of_int x)
22 | let bytes_64 x = bytes_f (Int64.to_float x)
23 |
24 | let words_f f =
25 | bytes_f (f *. (float_of_int (Sys.word_size / 8)))
26 |
27 | let words x = words_f (float_of_int x)
28 |
29 | let gc_diff st1 st2 =
30 | let allocated st = st.minor_words +. st.major_words -. st.promoted_words in
31 | let a = allocated st2 -. allocated st1 in
32 | let minor = st2.minor_collections - st1.minor_collections in
33 | let major = st2.major_collections - st1.major_collections in
34 | let compact = st2.compactions - st1. compactions in
35 | let heap = st2.heap_words - st1.heap_words in
36 | Printf.sprintf "allocated %7s, heap %7s, collect %2d %4d %5d" (words_f a) (words heap) compact major minor
37 |
38 | let measure f x =
39 | Gc.compact ();
40 | let st = Gc.quick_stat () in
41 | let t1 = Unix.gettimeofday () in
42 | let () = f x in
43 | let t2 = Unix.gettimeofday () in
44 | let st2 = Gc.quick_stat () in
45 | sprintf "%s, elapsed %.3f sec" (gc_diff st st2) (t2 -. t1)
46 |
47 | let show name f x =
48 | printf "%12s : %s\n%!" name (measure f x)
49 |
50 | (**
51 | Example usage:
52 |
53 | let src = "We should forget about small efficiencies, say about 97% of the time: premature optimization is the root of all evil"
54 |
55 | let run f = for i = 1 to 1_000_000 do ignore (f () : string) done
56 | let scanf () = Scanf.sscanf src "%s@:" (fun s -> s)
57 | let sub () = String.sub src 0 (String.index src ':')
58 |
59 | let () =
60 | Measure.show "scanf" run scanf;
61 | Measure.show "sub" run sub;
62 | ()
63 |
64 | Compile:
65 |
66 | ocamlopt unix.cmxa measure.ml bench.ml -o bench
67 |
68 | Result:
69 |
70 | scanf : allocated 2.2GB, heap 0B, collect 0 1419 8888, elapsed 2.072 sec
71 | sub : allocated 76.3MB, heap 0B, collect 0 0 305, elapsed 0.162 sec
72 |
73 | *)
74 |
--------------------------------------------------------------------------------
/memory.pikchr:
--------------------------------------------------------------------------------
1 |
2 | scale *= 1.5
3 | fontscale = 0.8
4 |
5 | B1: box width 3cm height 3cm thin color gray
6 | B2: box same
7 | B3: box same with .ne at previous.sw
8 | B4: box same
9 |
10 | T1: box invisible "Inactive" fit at B1.center
11 | T2: box same "Active" at B2.center
12 | T3: box same "MemFree" at B3.center
13 | T4: box same "Mapped" at B4.center
14 |
15 | move right 0.05 from T1.s
16 | arrow down even with T3.n chop "reclaim" aligned above
17 | move left 0.05 from T3.n
18 | arrow up even with T1.s chop "malloc" aligned above
19 |
20 | //arrow from T2.s+(0.1,0) to T4.n+(0.1,0) chop "reclaim" aligned above
21 | //arrow from T4.n-(0.1,0) to T2.s-(0.1,0) chop "touch" aligned above
22 |
23 | move up 0.05 from T1.e
24 | arrow right even with T2.w chop "touch" aligned above
25 | move down 0.05 from T1.e
26 | arrow same <- chop "MADV_FREE" below
27 |
28 | arrow from T3.e+(0,0.05) right even with T4.w chop "mmap" aligned above
29 | arrow same <- from T3.e-(0,0.05) chop "munmap" below
30 |
31 | arrow from 0.1 left of T3.ne to T2.sw chop "calloc" aligned rjust above
32 | arrow same <- from T3.ne "free" aligned below
33 |
34 | line invisible from B1.sw to B1.nw "allocated" above aligned
35 | line invisible from B3.sw to B3.nw "free" above aligned
36 | line invisible from B3.sw-(0.5cm,0) to B1.nw-(0.5cm,0) "userspace" above aligned
37 | "free" below at B3.s
38 | "mapped" below at B4.s
39 | line invisible from B3.sw to B4.se
40 | "kernel" at 0.5cm below previous
41 |
--------------------------------------------------------------------------------
/memsum:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env bash
2 | #
3 | # Quick overview of memory usage by process groups
4 | #
5 | # This is free and unencumbered software released into the public domain.
6 | # For more information, please refer to
7 |
8 | ps h -eo pid,ppid,rss,args \
9 | | awk '{
10 | cmd = $4
11 | for (i=5;i<=NF;i++) { cmd = cmd " " $i } # reconstruct cmd
12 | name[$1] = cmd ; # save cmd for pid
13 | total += $3
14 | if ($2>10) { acc[$2] += $3 } # group rss by parent pid
15 | else { acc[cmd] += $3 } # for toplevel processes - group rss by cmd
16 | }
17 | END {
18 | shown = 0
19 | for (i in acc) {
20 | mem = acc[i]/1024/1024;
21 | if (mem > 1) { printf("%.1fG %s\n",mem,(i in name) ? name[i] : i); shown += acc[i]; }
22 | };
23 | printf("%.1fG TOTAL (%.1fG shown)",total/1024/1024, shown/1024/1024)
24 | }' \
25 | | sort -nk1
26 |
--------------------------------------------------------------------------------
/mlvalues.py:
--------------------------------------------------------------------------------
1 | # Inspect OCaml heap and values in GDB
2 | # 2016/12/12
3 | #
4 | # https://github.com/ygrek/scraps/blob/master/mlvalues.py
5 | # (c) 2011 ygrek
6 | # (c) 2016 Amplidata, a HGST company.
7 | #
8 | # Licensed under the terms of BSD 3-clause
9 | #
10 | # Description
11 | # -----------
12 | #
13 | # You will need GDB with python3.
14 | # This code reimplements Std.dump from extlib and is
15 | # limited to information available in runtime representation
16 | # of OCaml values. Not complete, doesn't handle cycles at all,
17 | # tested only on x64.
18 | #
19 | # At GDB prompt input:
20 | # source mlvalues.py
21 | #
22 | # And inspect any OCaml value:
23 | # ml_dump [/r[N]] # address or symbol representing OCaml value
24 | # optional /r flag controls the recursion depth limit when printing (default 1).
25 | # e.g.:
26 | # ml_dump caml_globals
27 | # ml_dump /r &camlList
28 | #
29 | # Or inspect an array of values:
30 | # ml_dump [/r[N]]
31 | # e.g.:
32 | # ml_dump 1 # inspecting single pointer to value
33 | # ml_dump gray_vals 5
34 | #
35 | # Inspect local (stack) GC roots:
36 | # ml_dump [/r[N]] caml_local_roots
37 | #
38 | # Show OCaml heap information:
39 | # ml_heap
40 | #
41 | # Scan OCaml heap region:
42 | # ml_scan [/r[N]] addr bytes
43 | #
44 | # Validate the OCaml heap:
45 | # ml_validate
46 | # This process can take a while and scans the entire heap, trying to
47 | # validate all values (e.g. string sizes, overflows, underflows,
48 | # stray pointers, etc.). Use it to find issues with the garbage
49 | # collector or OCaml run-time.
50 | #
51 | # Show a particular OCaml value, alternative to ml_dump:
52 | # ml_show [/r[] [verbosity]
53 | # Verbosity currently ranges from 0-3, with default being 1.
54 | # 0: Either type of value or error reason
55 | # 1: Type and value for simple types
56 | # 2: Type, value and sub-values for short aggregate/composite types
57 | # (Tries not to blow up on your screen)
58 | # 3: Type, value and sub-values of everything
59 | # OCaml lists are not detected, a higher recursion depth will
60 | # be needed to print it out entirely.
61 | #
62 | # Show memory address space info with OCaml specific-bits:
63 | # ml_target [verbosity]
64 | # This command is similar to "info target" but shows OCaml-specifics.
65 | # Verbosity is only valid for "all", values are 0-2, default is 1:
66 | # 0: OCaml stack, minor & major heap are displayed
67 | # 1: Same as 0 and static data, code and known GC metadata areads
68 | # 2: all memory types are displayed
69 | # e.g.
70 | # ml_target 0x00035208
71 | # ml_target all 3
72 | #
73 | # Find a particular value in memory:
74 | # ml_find
75 | # Similar to the gdb "find" command, but will also display the memory
76 | # type (see ml_target) where the value was found, making it easier to
77 | # find the right occurrence.
78 | #
79 | # Use Python class directly for detailed scrutiny, e.g.:
80 | # python x = OCamlValue(gdb.parse_and_eval("caml_globals")); print x.size_words()
81 | #
82 | # Changelog
83 | # ---------
84 | #
85 | # 2016-07-28
86 | # Add ml_validate, ml_show, ml_target and a host of utility code (by Amplidata)
87 | #
88 | # 2016-01-17
89 | # New command `ml_scan` to show values in OCaml heap
90 | #
91 | # 2015-10-22
92 | # Truncate long strings when printing
93 | #
94 | # 2014-04-29
95 | # Limit recursion depth when printing
96 | #
97 | # 2014-03-31
98 | # Be precise with type when reading runtime variables
99 | #
100 | # 2013-08-25
101 | # Use `cast` instead of `reinterpet_cast` for better compatibility (by ADEpt)
102 | # Better handling of missing debuginfo in OCaml runtime
103 | # Add usage examples
104 | #
105 | # 2013-08-01
106 | # Dump local (stack) GC roots with `ml_dump local_roots`
107 | #
108 | # 2012-08-09
109 | # Fix printing of strings with binary data (use 'latin1' codec)
110 | #
111 | # 2012-04-11
112 | # Dump the array of OCaml values with ml_dump
113 | # Inspect closure blocks
114 | # Tweak formatting
115 | #
116 | # 2012-03-07
117 | # New command `ml_heap` - shows some general information about OCaml heap:
118 | # * GC parameters
119 | # * total heap size
120 | # * allocated chunks layout
121 | #
122 | # 2011-12-27
123 | # Show symbol and name for custom values
124 | # Catch gdb.MemoryError and continue printing
125 | # Correctly lookup types
126 | #
127 | # 2011-12-24
128 | # Initial
129 |
130 | import functools
131 | import traceback
132 | import collections
133 | import bisect
134 | import struct
135 | import ctypes
136 | from enum import Enum
137 |
138 | def TraceMemoryError(f):
139 | """
140 | Attribute that wraps the function in order to display a traceback
141 | when accessing invalid memory
142 | """
143 | def wrapper(*args, **kwargs):
144 | try:
145 | return f(*args, **kwargs)
146 | except gdb.MemoryError as me:
147 | print("Function %s attempted to access invalid memory" % f.__name__)
148 | traceback.print_exc()
149 | raise
150 | return functools.update_wrapper(wrapper, f)
151 |
152 | def TraceAll(f):
153 | """
154 | Attribute that wraps the function in order to display a traceback
155 | """
156 | def wrapper(*args, **kwargs):
157 | try:
158 | return f(*args, **kwargs)
159 | except:
160 | traceback.print_exc()
161 | raise
162 | return functools.update_wrapper(wrapper, f)
163 |
164 | def try_dereference(gdbval):
165 | """
166 | Attempt to dereference a gdb.Value returning None when the access fails.
167 | """
168 | try:
169 | ret = gdbval.dereference()
170 | ret.fetch_lazy()
171 | return ret
172 | except gdb.MemoryError:
173 | return None
174 |
175 | def print_cont(*args, **kwargs):
176 | kwargs["end"] = ''
177 | print(*args, **kwargs)
178 |
179 | # gdb.Type's used often throughout the script
180 | intnat = size_t = charp = doublep = heap_chunk_head_p = caml_contextp = caml_thread_structp = None
181 |
182 | # do not lookup types at class level cause this script may be run before
183 | # the inferior image is loaded and gdb can't know sizeof(long) in advance
184 | def init_types():
185 | global intnat, size_t, charp, doublep, heap_chunk_head_p, caml_contextp, caml_thread_structp
186 |
187 | if doublep is not None:
188 | return
189 |
190 | try:
191 | heap_chunk_head_p = gdb.lookup_type("heap_chunk_head").pointer()
192 | except:
193 | print("Didn't find 'heap_chunk_head'. Major heap walking is unavailable")
194 | pass
195 |
196 | try:
197 | caml_contextp = gdb.lookup_type("struct caml_context").pointer()
198 | except:
199 | print("Didn't find 'struct caml_context'. Stack walking unavailable.")
200 | pass
201 |
202 | try:
203 | caml_thread_structp = gdb.lookup_type("caml_thread_struct").pointer()
204 | except:
205 | print("Didn't find 'caml_thread_struct'. System thread roots unavailable.")
206 | pass
207 |
208 | intnat = gdb.lookup_type("intnat")
209 | size_t = gdb.lookup_type("size_t")
210 | charp = gdb.lookup_type("char").pointer()
211 | doublep = gdb.lookup_type("double").pointer()
212 | # keep this one last
213 |
214 | def get_value_safe(name, type=None):
215 | try:
216 | x = gdb.parse_and_eval(name)
217 | if type is not None:
218 | x = x.cast(type)
219 | return x
220 | except gdb.error:
221 | return None
222 |
223 | class MemoryType(Enum):
224 | """
225 | Various types of memory we're interested in
226 | """
227 | General = 0
228 | Stack = 1
229 | MajorHeap = 2
230 | MinorHeap = 3
231 | Code = 4
232 | StaticData = 5
233 | OtherStatic = 6
234 | Finalisers = 7
235 | GC_Metadata = 8
236 | LinuxSpecial = 9
237 | Unknown = 10
238 |
239 | Address = 100
240 |
241 | @classmethod
242 | def all(cls):
243 | return [ cls.General, cls.Stack, cls.MajorHeap, cls.MinorHeap, cls.Code, cls.StaticData, cls.OtherStatic, cls.Finalisers, cls.GC_Metadata, cls.LinuxSpecial, cls.Unknown]
244 |
245 | @functools.total_ordering
246 | class MemoryRange:
247 | """
248 | A range of memory inside the process' memory address space
249 | """
250 | def __init__(self, address, size, source, description, memtype=None):
251 | """
252 | Args:
253 | address, size: describe memory range
254 | source: where this memory range was defined (e.g. a file, coredump or gdb)
255 | description: describes the contents of this memory range
256 | memtype: a MemoryType parameter if the contents are known
257 | or None otherwise. In this case this class will attempt
258 | to deduce the MemoryType from the description, following
259 | some standard gdb names
260 | """
261 | # We might be passed gdb.Value's, so convert them to native int's to reduce memory footprint
262 | self.startaddr = int(address)
263 | self.size = int(size)
264 | self.source = source
265 | self.description = description
266 | if memtype is None:
267 | self.memtype = MemoryRange._determine_memtype(description)
268 | else:
269 | self.memtype = memtype
270 |
271 | @staticmethod
272 | def _determine_memtype(description):
273 | """
274 | Determine the type of memory from the description
275 | This function knows most of the descriptions returned by 'info target' and 'info proc mappings'
276 | and will deduce the actual memory type based on that.
277 | """
278 | if description.startswith('.'):
279 | t = description.split()[0]
280 | if t in [ ".data", ".bss", ".rodata" ]:
281 | return MemoryType.StaticData
282 | elif t == ".text":
283 | return MemoryType.Code
284 | else:
285 | return MemoryType.OtherStatic
286 | elif description.startswith('['):
287 | if description == "[stack]":
288 | return MemoryType.Stack
289 | elif description in [ "[vdso]", "[vsyscall]", "[vvar]" ]:
290 | return MemoryType.LinuxSpecial
291 | elif description == "[heap]":
292 | return MemoryType.General
293 | else:
294 | return MemoryType.Unknown
295 | elif description.startswith("load"):
296 | return MemoryType.General
297 | elif "libc" in description:
298 | # C library is known to have some odd-named sections
299 | return MemoryType.OtherStatic
300 | elif description == "text_env":
301 | return MemoryType.OtherStatic
302 | else:
303 | return MemoryType.Unknown
304 |
305 | @staticmethod
306 | def from_addr(address):
307 | """Create a single-address MemoryRange"""
308 | return MemoryRange(address, size_t.sizeof, "MemorySpace.from_addr", "address", MemoryType.Address)
309 |
310 | @staticmethod
311 | def part_of(other, start, size):
312 | """Create a MemoryRange that spans a chunk of the provided MemoryRange"""
313 | return MemoryRange(start, size, other.source, other.description, other.memtype)
314 |
315 | @property
316 | def lastaddr(self):
317 | """Last valid address in this range"""
318 | return self.startaddr + self.size - 1
319 |
320 | @property
321 | def endaddr(self):
322 | """Post-end address of this range"""
323 | return self.startaddr + self.size
324 |
325 | def __contains__(self, address):
326 | return self.startaddr <= address < self.endaddr
327 |
328 | def _overlaps(self, other):
329 | return self.startaddr <= other.startaddr < self.endaddr \
330 | or other.startaddr <= self.startaddr < self.endaddr
331 |
332 | def __eq__(self, other):
333 | return isinstance(other, MemoryRange) and self.startaddr == other.startaddr
334 |
335 | def __lt__(self, other):
336 | return isinstance(other, MemoryRange) and self.startaddr < other.startaddr
337 |
338 | def settype(self, memtype, description=None):
339 | """
340 | Override the memory type and, optionally, the description
341 | """
342 | self.memtype = memtype
343 | if description is not None:
344 | self.description = description
345 |
346 | def __str__(self):
347 | return "0x%08X - 0x%08X is %15s|%s" % (self.startaddr, self.startaddr + self.size, self.memtype.name, self.description)
348 |
349 |
350 | class MemorySpace:
351 | """Describes the inferior process' memory address space/layout"""
352 | def __init__(self):
353 | self.have_accurate_info = True
354 | self.populate_ranges()
355 | self.annotate_stacks()
356 | self.annotate_major_heap()
357 | self.annotate_minor_heap()
358 | self.annotate_finalisers()
359 |
360 | def set_inaccurate(self, missing):
361 | print("Some debug information is missing from the binary: %s . Not all functionality is available" % missing)
362 | self.have_accurate_info = False
363 |
364 | def display(self, verbosity):
365 | """
366 | Pretty print the memory address space with varying verbosity levels:
367 | 0: only OCaml Stack and Minor/Major Heap areas are displayed
368 | 1: In addition, static data, code and known GC metadata areas are displayed
369 | 2: all memory types are displayed
370 | """
371 | if verbosity == 0:
372 | interesting = [ MemoryType.Stack, MemoryType.MinorHeap, MemoryType.MajorHeap ]
373 | elif verbosity == 1:
374 | interesting = [ MemoryType.Stack, MemoryType.MinorHeap, MemoryType.MajorHeap,
375 | MemoryType.StaticData, MemoryType.Code, MemoryType.GC_Metadata ]
376 | else:
377 | interesting = MemoryType.all()
378 |
379 | for r in self.ranges:
380 | if r.memtype in interesting:
381 | print("%s" % str(r))
382 |
383 | def get_range(self, address):
384 | """Get the memory range containing the provided address or None otherwise."""
385 | index = bisect.bisect(self.ranges, MemoryRange.from_addr(address))
386 | if index >= len(self.ranges):
387 | return None
388 | memrange = self.ranges[index-1]
389 | if address in memrange:
390 | return memrange
391 | return None
392 |
393 | def is_address_of_type(self, address, *memtypes):
394 | """Return True of the address is contained in a memory range of one of the provided types"""
395 | memrange = self.get_range(address)
396 | return memrange is not None and memrange.memtype in memtypes
397 |
398 | def is_on_stack(self, address):
399 | """Indicate whether the address is on one of the inferior threads' stack"""
400 | return self.is_address_of_type(address, MemoryType.Stack)
401 |
402 | def is_in_heap(self, address):
403 | """Indicate whether the address points to data in the OCaml heap"""
404 | return self.is_address_of_type(address, MemoryType.MajorHeap, MemoryType.MinorHeap)
405 |
406 | # Beware, on some architectures data is sometimes stored intermingled with code in the .text section
407 | # Typically this is after the return instruction from a function
408 | def is_valid_data(self, address):
409 | """Indicate whether the address points to a memory range known to contain data"""
410 | return self.is_address_of_type(address,
411 | MemoryType.MajorHeap, MemoryType.MinorHeap,
412 | MemoryType.StaticData, MemoryType.Stack,
413 | MemoryType.Finalisers)
414 |
415 | def is_code(self, address):
416 | """Indicate whether the address points to a memory range containing code"""
417 | return self.is_address_of_type(address, MemoryType.Code)
418 |
419 | def search_memory_of_types(self, pattern, *memtypes):
420 | """Search all memory of the given types for the provided pattern.
421 | The pattern must adhere to the buffer interface, the simplest
422 | way to create this is probably struct.pack(...)."""
423 | inferior = gdb.selected_inferior()
424 | locations = []
425 | for memrange in self.ranges:
426 | if memrange.memtype not in memtypes:
427 | continue
428 |
429 | loc = ctypes.c_void_p(memrange.startaddr).value
430 | end = ctypes.c_void_p(memrange.endaddr).value
431 | while loc < end:
432 | loc = inferior.search_memory(loc, end - loc, pattern)
433 | if loc is None or loc == 0:
434 | loc = end
435 | else:
436 | locations.append(loc)
437 | loc += size_t.sizeof
438 |
439 | return locations
440 |
441 | # TODO: make this function truncate the existing ranges, rather than delete them
442 | # It will allow annotate_split_range to keep return value over multiple splits
443 | def split_range_at(self, address):
444 | """Split a memory range at the provided address and returns both new ranges."""
445 | index = bisect.bisect(self.ranges, MemoryRange.from_addr(address))
446 | index -= 1
447 | # address is before any existing ranges
448 | if index < 0:
449 | return None, None
450 |
451 | memrange = self.ranges[index]
452 | # address points to the beginning of an existing range
453 | if memrange.startaddr == address:
454 | previous = None if index == 0 else self.ranges[index-1]
455 | return previous, memrange
456 |
457 | # address inside the memory range
458 | if address in memrange:
459 | first = MemoryRange(memrange.startaddr, address - memrange.startaddr, memrange.source, memrange.description, memrange.memtype)
460 | second = MemoryRange(address, memrange.startaddr + memrange.size - address, memrange.source, memrange.description, memrange.memtype)
461 |
462 | del self.ranges[index]
463 | bisect.insort(self.ranges, first)
464 | bisect.insort(self.ranges, second)
465 | return first, second
466 |
467 | # address (right) after the memory range and not contained by another memory range
468 | previous = memrange if address == memrange.endaddr else None
469 | return previous, None
470 |
471 | def tentative_add_range(self, memrange):
472 | """Add a memory range, leaving any existing overlaps untouched, yet filling holes where necessary"""
473 | # optimize the easy case. makes rest of code simpler
474 | if not len(self.ranges):
475 | bisect.insort(self.ranges, memrange)
476 | return
477 |
478 | probeaddr = memrange.startaddr
479 | while probeaddr < memrange.endaddr:
480 | index = bisect.bisect(self.ranges, MemoryRange.from_addr(probeaddr))
481 |
482 | # before first
483 | if index == 0:
484 | nxt = self.ranges[index]
485 | lastaddr = nxt.startaddr
486 | bisect.insort(self.ranges, MemoryRange.part_of(memrange, probeaddr, lastaddr - probeaddr))
487 | probeaddr = nxt.endaddr
488 | continue
489 |
490 | # after last
491 | if index >= len(self.ranges):
492 | prev = self.ranges[index-1]
493 | startaddr = prev.endaddr
494 | if startaddr <= probeaddr:
495 | bisect.insort(self.ranges, MemoryRange.part_of(memrange, probeaddr, memrange.endaddr - probeaddr))
496 | probeaddr = memrange.endaddr
497 | else:
498 | probeaddr = startaddr
499 | continue
500 |
501 | # in between 2
502 | prev = self.ranges[index-1]
503 | if probeaddr in prev:
504 | probeaddr = prev.endaddr
505 | continue
506 |
507 | nxt = self.ranges[index]
508 | if nxt.startaddr in memrange:
509 | bisect.insort(self.ranges, MemoryRange.part_of(memrange, probeaddr, nxt.startaddr - probeaddr))
510 | probeaddr = nxt.endaddr
511 | else:
512 | bisect.insort(self.ranges, MemoryRange.part_of(memrange, probeaddr, memrange.endaddr - probeaddr))
513 | probeaddr = memrange.endaddr
514 |
515 | def annotate_split_range(self, address, size, memtype, description):
516 | """Annotate an existing range (or part thereof) as the specified MemoryType, splitting the range where necessary"""
517 | _, _ = self.split_range_at(address) # do not keep return values, following call may delete it from self.ranges
518 | end, _ = self.split_range_at(address+size)
519 | begin = self.get_range(address)
520 |
521 | begin.settype(memtype, description)
522 | if end != begin:
523 | print("Annotating '%s' over 2 separate ranges: %s and %s" % (description, str(begin), str(end)))
524 | # TODO: merge the two
525 | end.settype(memtype, description)
526 |
527 | def populate_ranges(self,):
528 | """Populate the memory ranges from coredump info or live gdb information"""
529 | self.ranges = list()
530 | # coredump: info target shows all sections in full detail
531 | # live debug: only file-backed sections are shown
532 | targetinfo = gdb.execute("info target", False, True)
533 | for line in targetinfo.splitlines():
534 | line = line.strip()
535 | if line.startswith('`'):
536 | line = line.split("'")[1]
537 | source = line[1:]
538 | continue
539 | if not line.startswith("0x"):
540 | continue
541 |
542 | start, dash, end, str_is, memtype = line.split(maxsplit=4)
543 | assert(dash == '-' and str_is == 'is')
544 | start = int(start, 16)
545 | end = int(end, 16)
546 | new_range = MemoryRange(start, end-start, source, memtype)
547 | startoverlap = self.get_range(start)
548 | endoverlap = self.get_range(end)
549 |
550 | if endoverlap == startoverlap:
551 | endoverlap = None
552 |
553 | #TODO: splitup and punch holes/replace
554 | if memtype.startswith('.'):
555 | # gdb reports loadXXX sections on top of file-backed sections of the binary
556 | # probably because the kernel maps writeable pages on top of them
557 | # Therefore, keep the more accurate description from the file-backed section
558 | if startoverlap is not None and startoverlap.memtype == MemoryType.General:
559 | previous, current = self.split_range_at(start)
560 | self.ranges.remove(current)
561 | startoverlap = None
562 | if endoverlap is not None and endoverlap.memtype == MemoryType.General:
563 | current, end = self.split_range_at(end)
564 | self.ranges.remove(current)
565 | endoverlap = None
566 |
567 | if startoverlap is not None and endoverlap is not None:
568 | print("Overlapping memory ranges: %s in %s -> %s" %
569 | (new_range, str(startoverlap), str(endoverlap)))
570 | bisect.insort(self.ranges, new_range)
571 |
572 | # live target: run-time allocated memory and some file-backed sections
573 | # There typically is overlap with the 'info target' output, so give precedence
574 | # to the previously added ranges
575 | mappinginfo = gdb.execute("info proc mappings", False, True)
576 | for line in mappinginfo.splitlines():
577 | line = line.strip()
578 | if not line.startswith("0x"):
579 | continue
580 |
581 | items = line.split()
582 | if len(items) == 4:
583 | start, end, size, offset = items
584 | source = "unknown"
585 | elif len(items) == 5:
586 | start, end, size, offset, source = items
587 | else:
588 | print("Unexpected line when parsing 'info proc mappings': %s" % line)
589 | continue
590 |
591 | start = int(start, 16)
592 | size = int(size, 16)
593 | end = int(end, 16)
594 |
595 | new_range = MemoryRange(start, size, source, source)
596 | self.tentative_add_range(new_range)
597 |
598 | def annotate_stacks(self):
599 | """
600 | Mark all memoryranges containing thread stacks as such.
601 | We do this by taking the stack pointer of each thread
602 | and marking the target address' memory range.
603 | There typically are guard pages around stacks, they will
604 | not be marked as stack.
605 | """
606 | curthread = gdb.selected_thread()
607 | try:
608 | for thread in gdb.selected_inferior().threads():
609 | thread.switch()
610 |
611 | # This is different depending on gdb version
612 | try:
613 | frame = gdb.newest_frame()
614 | stackpointer = frame.read_register("sp")
615 | except:
616 | regname, as_hex, as_int = gdb.execute("info register sp", False, True).split()
617 | stackpointer = int(as_hex, 16)
618 | memrange = self.get_range(stackpointer)
619 | tid = thread.ptid[1] if thread.ptid[1] else thread.ptid[2]
620 | if memrange is None:
621 | print("Did not find stack of thread %d" % tid)
622 | continue
623 | memrange.settype(MemoryType.Stack, "Stack of thread %d(TID %d)" % (thread.num, tid))
624 | finally:
625 | curthread.switch()
626 |
627 | def annotate_major_heap(self):
628 | """
629 | Mark all memory ranges containing OCaml stack as such.
630 | Memory ranges are split when needed to avoid marking padding as actual heap.
631 | """
632 | # TODO: we could provide a fallback path by manually taking the proper bytes as
633 | # the ml_heap command does
634 | if heap_chunk_head_p is None:
635 | self.set_inaccurate("major heap info")
636 | return
637 |
638 | heap_chunk_ptr = get_value_safe("caml_heap_start", heap_chunk_head_p)
639 | try:
640 | while heap_chunk_ptr is not None and heap_chunk_ptr != 0:
641 | heap_chunk_head_ptr = heap_chunk_ptr - 1
642 | heap_chunk_head = heap_chunk_head_ptr.dereference()
643 |
644 | block = heap_chunk_head["block"]
645 | size = heap_chunk_head["size"]
646 |
647 | memrange = self.get_range(heap_chunk_head_ptr)
648 | if memrange is not None:
649 | self.annotate_split_range(heap_chunk_ptr.cast(size_t), size, MemoryType.MajorHeap, "Major heap")
650 | else:
651 | new_range = MemoryRange(heap_chunk_ptr.cast(size_t), size, "gdb", "Major Heap", MemoryType.MajorHeap)
652 | assert(false) # This shouldn't happen
653 | self.tentative_add_range(new_range)
654 |
655 | heap_chunk_ptr = heap_chunk_head["next"].cast(heap_chunk_head_p)
656 | except gdb.MemoryError:
657 | print("OCaml major heap linked list is corrupt: last entry = 0x%08X" % (int(heap_chunk_ptr.cast(size_t))))
658 |
659 | gray_vals = get_value_safe("gray_vals", size_t)
660 | gray_vals_cur = get_value_safe("gray_vals_cur", size_t)
661 | gray_vals_size = get_value_safe("gray_vals_size", size_t)
662 | gray_vals_end = get_value_safe("gray_vals_end", size_t)
663 | if gray_vals is not None and gray_vals_size is not None:
664 | self.annotate_split_range(gray_vals, gray_vals_size, MemoryType.GC_Metadata, "major GC's gray values")
665 | if gray_vals_cur is not None and gray_vals_end is not None:
666 | self.annotate_split_range(gray_vals_cur, gray_vals_end - gray_vals_cur, MemoryType.GC_Metadata, "major GC's current gray values")
667 |
668 | def annotate_minor_heap(self):
669 | """
670 | Mark the minor heap memory range as such.
671 | """
672 | minor_start = get_value_safe("caml_young_base", size_t)
673 | minor_end = get_value_safe("caml_young_end", size_t)
674 | if minor_start is None or minor_end is None:
675 | return
676 | minor_size = minor_end - minor_start
677 |
678 | memrange = self.get_range(minor_start)
679 | if memrange is not None:
680 | self.annotate_split_range(minor_start, minor_size, MemoryType.MinorHeap, "Minor heap")
681 | else:
682 | new_range = MemoryRange(minor_start, minor_size, "gdb", "Minor Heap", MemoryType.MinorHeap)
683 | self.set_inaccurate("minor heap memory map info")
684 | bisect.insort(self.ranges, new_range)
685 |
686 | def annotate_finalisers(self):
687 | """
688 | Mark the table of finalisers as such.
689 | """
690 | table = get_value_safe("final_table", size_t)
691 | size = get_value_safe("'finalise.d.c'::size", size_t)
692 | if size is None:
693 | size = get_value_safe("'finalise.c'::size", size_t)
694 |
695 | if table is None or size is None:
696 | self.set_inaccurate("finalisers")
697 | return
698 |
699 | if table != 0 and size != 0:
700 | self.annotate_split_range(table, size, MemoryType.Finalisers, "Finalisers table")
701 |
702 | memoryspace = None
703 |
704 | def init_memoryspace(reload=False):
705 | """Load memory space information from the inferior process."""
706 | global memoryspace
707 | if memoryspace is not None and not reload:
708 | return
709 |
710 | try:
711 | memoryspace = MemorySpace()
712 | except:
713 | traceback.print_exc()
714 | raise
715 |
716 | def resolve(address):
717 | """Resolve an address to a symbol (function/variable name)."""
718 | symbol = gdb.execute("info symbol 0x%08X" % int(address.cast(size_t)), False, True).split(" ",1)[0]
719 | if symbol == "No": # FIXME "No symbol matches"
720 | return "0x%08X" % int(address.cast(size_t))
721 | else:
722 | return "%s" % symbol
723 |
724 | # This class represents gdb.Value as OCaml value.
725 | # Analogue to stdlib Obj module.
726 | #
727 | # It probably contains more casts than strictly necessary,
728 | # but after all who am I to understand python type system?
729 | # Just a mere OCaml coder. And this is my first python program.
730 | class OCamlValue:
731 |
732 | VALUE_TAG = 0
733 | LAZY_TAG = 246
734 | CLOSURE_TAG = 247
735 | OBJECT_TAG = 248
736 | INFIX_TAG = 249
737 | FORWARD_TAG = 250
738 | NO_SCAN_TAG = 251
739 | ABSTRACT_TAG = 251
740 | STRING_TAG = 252
741 | DOUBLE_TAG = 253
742 | DOUBLE_ARRAY_TAG = 254
743 | CUSTOM_TAG = 255
744 | FINAL_TAG = 255
745 | INT_TAG = 1000
746 | OUT_OF_HEAP_TAG = 1001
747 | UNALIGNED_TAG = 1002
748 |
749 | VALID_TAGS = (VALUE_TAG, LAZY_TAG, CLOSURE_TAG, OBJECT_TAG, INFIX_TAG, FORWARD_TAG, NO_SCAN_TAG, ABSTRACT_TAG, STRING_TAG,
750 | DOUBLE_TAG, DOUBLE_ARRAY_TAG, CUSTOM_TAG)
751 |
752 | def __init__(self,v, parent=None, parentindex=None):
753 | if isinstance(v, OCamlValue):
754 | self.v = v.v
755 | elif not isinstance(v, gdb.Value):
756 | self.v = gdb.Value(v).cast(intnat)
757 | else:
758 | self.v = v.cast(intnat)
759 | self.parent = parent
760 | self.parentindex = parentindex
761 | if parent is not None and parentindex is None:
762 | raise Exception("If a parent is specified, the parentindex is also expected")
763 |
764 | def is_int(self):
765 | """Indicate whether the OCamlValue is an integer."""
766 | return self.v & 1 != 0
767 |
768 | def is_block(self):
769 | """Indicate whether the OCamlValue is a pointer to an OCaml block."""
770 | return self.v & 1 == 0
771 |
772 | @staticmethod
773 | def of_int(x):
774 | return OCamlValue(gdb.Value((x<<1) + 1))
775 |
776 | @staticmethod
777 | def of_val(x):
778 | assert(x & 1 == 0)
779 | return OCamlValue(gdb.Value(x))
780 |
781 | @staticmethod
782 | def of_bool(x):
783 | assert(x & (~1) == 0)
784 | return OCamlValue.of_int(x != 0)
785 |
786 | def int(self):
787 | """Get the integer value of this instance. Must only be called if it is an int."""
788 | assert(self.is_int())
789 | return self.v >> 1
790 |
791 | def val(self):
792 | """Get the gdb.Value of this instance."""
793 | return self.v
794 |
795 | def _string(self,enc='latin1'):
796 | assert self.tag() == OCamlValue.STRING_TAG
797 |
798 | byte_size = self.size_bytes()
799 | if byte_size is None:
800 | return "Invalid string: could not determine string size: value outside the heap: value = 0x%X" % self.v
801 |
802 | padsize_byte = (self.v + (byte_size - 1)).cast(charp)
803 | padsize = try_dereference(padsize_byte)
804 | if padsize is None:
805 | return "Invalid string: pad byte not in valid memory. value=0x%X, size in bytes=%d, pad byte: 0x%X" % (self.v, byte_size, padsize_byte)
806 |
807 | slen = byte_size - 1 - padsize
808 | trailing_nul = try_dereference((self.v + slen).cast(charp))
809 | assert(trailing_nul is not None) # we shouldn't get here if we could dereference padsize above
810 | if trailing_nul != 0:
811 | return "Invalid string: no NUL-byte at end of string. value=0x%X, size in bytes=%d, last byte=%d" % (self.v, byte_size, trailing_nul)
812 |
813 | if slen <= 1024:
814 | s = self.v.cast(charp).string(enc, 'ignore', slen)
815 | return s.__repr__()
816 | else:
817 | s = self.v.cast(charp).string(enc, 'ignore', 256)
818 | return "%s..<%d bytes total>" % (s.__repr__(), slen)
819 |
820 | def _float(self):
821 | assert self.tag() == OCamlValue.DOUBLE_TAG
822 | words = self.size_words()
823 | if words != doublep.target().sizeof: # Don't check for None, the assert already implies it
824 | return "Invalid float: size=%d" % words
825 | f = try_dereference(self.v.cast(doublep))
826 | assert(f is not None) # This is quite unlikely, unless v is outside the heap, while the header is inside
827 | return "%f" % f
828 |
829 | def __cmp__(self,other):
830 | if self.v == other.v:
831 | return 0
832 | else:
833 | if self.v > other.v:
834 | return 1
835 | else:
836 | return -1
837 |
838 | def __str__(self):
839 | if self.is_int():
840 | return ("%d" % self.int())
841 | else:
842 | return ("0x%X" % self.val())
843 |
844 | def __repr__(self):
845 | return "OCamlValue(%s)" % self.__str__()
846 |
847 | def hd(self):
848 | """Get the header value or None if inaccessible. Must only be called on a block."""
849 | header = (self.v - intnat.sizeof).cast(size_t.pointer())
850 | return try_dereference(header)
851 |
852 | def tag(self):
853 | """Get the block tag or None if inaccessible. Must only be called on a block."""
854 | if self.is_int():
855 | return OCamlValue.INT_TAG
856 | else:
857 | hd = self.hd()
858 | if hd is None:
859 | return OCamlValue.OUT_OF_HEAP_TAG
860 | return hd & 0xFF
861 |
862 | def _unsafe_field(self,i):
863 | """
864 | Get the contents of the indicated field or None if inaccessible.
865 | Does not check boundaries nor validate this is a block.
866 | """
867 | x = try_dereference( (self.v + (i * intnat.sizeof)).cast(intnat.pointer()) )
868 | if x is None:
869 | return None
870 | return OCamlValue(x, parent=self, parentindex = i)
871 |
872 | def field(self,i):
873 | """
874 | Get the contents of the indicated field or None if inaccessible.
875 | Must only be called on a block, cannot obtain a double from a double array.
876 | """
877 | assert self.is_block()
878 | assert self.tag () != OCamlValue.DOUBLE_ARRAY_TAG # FIXME not implemented
879 | n = self.size_words()
880 | if n is None:
881 | return None
882 | if i < 0 or i >= n:
883 | raise IndexError("field %d size %d" % (i,n))
884 | return self._unsafe_field(i)
885 | #t = intnat.array(n).pointer()
886 | #return OCamlValue(self.v.cast(t).dereference()[i])
887 |
888 | def fields(self):
889 | """
890 | Get a list of all fields of this block.
891 | Must only be called on a block, cannot obtain fields of a double array.
892 | When any access goes out of bounds, a single None value is appended to
893 | the list.
894 | """
895 | assert self.is_block()
896 | assert self.tag () != OCamlValue.DOUBLE_ARRAY_TAG # FIXME not implemented
897 |
898 | words = self.size_words()
899 | if words is None:
900 | return [None]
901 |
902 | a = []
903 | for i in range(int(words)):
904 | field = self._unsafe_field(i)
905 | a.append(field)
906 | if field is None:
907 | break # Append a single invalid value to indicate out-of-bounds to the user
908 | return a
909 |
910 | def size_words(self):
911 | """
912 | Return the size of this block in number of words or None if inaccessible
913 | Must only be called on a block.
914 | """
915 | assert self.is_block()
916 | hd = self.hd()
917 | if hd is None:
918 | return None
919 | return hd >> 10
920 |
921 | def size_bytes(self):
922 | """
923 | Return the size of this block in number of bytes or None if inaccessible
924 | Must only be called on a block.
925 | """
926 | size_words = self.size_words()
927 | if size_words is None:
928 | return None
929 | return size_words * intnat.sizeof
930 |
931 | def _is_list(self):
932 | """Indicate if this block describes a list."""
933 | # TODO
934 | if self.is_int():
935 | return self.int() == 0
936 | else:
937 | return self.size_words() == 2 and self.tag() == 0 and self.field(1)._is_list()
938 |
939 | def get_list(self):
940 | """Parse an OCaml list into a python list."""
941 | a = []
942 | l = self
943 | while l.is_block():
944 | a.append(l.field(0))
945 | l = l.field(1)
946 | return a
947 |
948 | def get_list_length(self):
949 | """Get the length of an OCaml list"""
950 | n = 0
951 | l = self
952 | while l.is_block():
953 | n+=1
954 | l = l.field(1)
955 | return n
956 |
957 | def resolve(self):
958 | """Resolve the block pointer contained in this OCamlValue."""
959 | return resolve(self.val())
960 |
961 | def show_opaque(self,s):
962 | print_cont("<%s at 0x%x>" % (s,int(self.val())))
963 |
964 | def show_seq(self,seq,delim,recurse,raw=False):
965 | for i, x in enumerate(seq):
966 | if i:
967 | print_cont(delim)
968 | if raw:
969 | print_cont(x.resolve())
970 | else:
971 | x.show(recurse)
972 |
973 | # Verbosity:
974 | # Error information is always shown
975 | # 0: Print type of the OCamlValue
976 | # 1: Print type and value of the OCamlValue, only displays number of items for sequence types
977 | # 2: Print type and value of the OCamlValue, display full contents of not to long,
978 | # otherwise same as 1
979 | # Each of the following functions interprets the OCamlValue per the function name
980 | # and displays the value according to the given verbosity.
981 | #
982 | # The functions get all of their input through function arguments
983 | # to avoid excessive fetching through gdb.Value and duplicating parsing logic.
984 |
985 | def _stringify_int(self, value, verbosity):
986 | if verbosity == 0:
987 | return "Integer"
988 | else:
989 | return "Integer(%d/0x%08X)" % (value, value)
990 |
991 | def _stringify_invalid_block(self, pointer, reason, verbosity):
992 | if verbosity == 0:
993 | return "Invalid block(%s)" % reason
994 | else:
995 | return "Invalid block(%s, v=0x%08X)" % (reason, pointer)
996 |
997 | def _stringify_invalid_size(self, pointer, size, item, verbosity):
998 | if verbosity == 0:
999 | return "Invalid size %s"
1000 | else:
1001 | return "Invalid size %s(v=0x%08X, size=%d)" % (item, self.v, self.size_words())
1002 |
1003 | def _stringify_generic(self, prefix, fields, verbosity):
1004 | size = len(fields)
1005 | if verbosity == 0:
1006 | return prefix
1007 | elif verbosity >= 2 and size <= 8:
1008 | return "%s [%s]" % (prefix, ', '.join(["0x%08X"%f.v for f in fields]))
1009 | else:
1010 | return "%s (%d items)" % (prefix, size)
1011 |
1012 | def _stringify_value(self, fields, verbosity):
1013 | return self._stringify_generic("Array/Tuple/Record/List entry", fields, verbosity)
1014 |
1015 | def _stringify_lazy_value(self, pointer, verbosity):
1016 | if verbosity == 0:
1017 | return "Lazy value"
1018 | else:
1019 | return "Lazy value (0x%08X)" % pointer
1020 |
1021 | def _stringify_lazy_result(self, fields, verbosity):
1022 | return self._stringify_generic("Lazy result", fields, verbosity)
1023 |
1024 | def _stringify_invalid_object(self, classval, objectid, reason, verbosity):
1025 | if verbosity == 0:
1026 | return "Object with %s" % reason
1027 | else:
1028 | return "Object with %s (cls=0x%08X, oid=0x%08X)" % (reason, classval, objectid)
1029 |
1030 | def _stringify_object(self, classval, objectid, fields, verbosity):
1031 | size = len(fields)
1032 | if verbosity == 0:
1033 | return "Object"
1034 | elif verbosity >= 2 and size <= 8:
1035 | return "Object (cls=0x%08X, oid=%d) [%s]" % (classval, objectid,
1036 | ', '.join(["0x%08X"%f.v for f in self.fields()[2:]]))
1037 | else:
1038 | return "Object (cls=0x%08X, oid=%d) [%d members]" % (classval, objectid, size)
1039 |
1040 | def _stringify_empty_closure(self, fields, verbosity):
1041 | if verbosity == 0:
1042 | return "Empty closure"
1043 |
1044 | size = len(fields)
1045 | if verbosity > 2 or (verbosity == 2 and size <= 8):
1046 | return "Empty closure (%s)" % ', '.join(["0x%08X"%f.v for f in fields])
1047 | else:
1048 | return "Empty closure (%d items)" % size
1049 |
1050 | def _stringify_closure(self, functions, fields, verbosity):
1051 | prefix = "Infixed closure" if len(functions) > 1 else "Closure"
1052 |
1053 | if verbosity == 0:
1054 | return prefix
1055 |
1056 | funcnames = []
1057 | for (function, real_function) in functions:
1058 | if real_function != function:
1059 | funcnames.append("%s via %s" % (real_function, function))
1060 | else:
1061 | funcnames.append(function)
1062 |
1063 | targets = ", ".join(funcnames)
1064 |
1065 | size = len(fields)
1066 | if verbosity > 2 or (verbosity == 2 and size <= 8):
1067 | return "%s to %s(%s)" % (prefix, targets,
1068 | ', '.join(["0x%08X"%f.v for f in fields]))
1069 | else:
1070 | return "%s to %s(%d)" % (prefix, targets, size)
1071 |
1072 | def _stringify_closure_arity_mismatch(self, function, arity, size, reason, verbosity):
1073 | if verbosity == 0:
1074 | return "Closure with %s" % reason
1075 | else:
1076 | return "Closure to %s with %s(arity=0x%08X, size=%d)" % (function, reason, arity, size)
1077 |
1078 | def _stringify_string(self, string, verbosity):
1079 | size = len(string)
1080 | if verbosity == 0:
1081 | return "String"
1082 | elif (verbosity == 2 and size < 64) or verbosity > 2:
1083 | return "String '%s'" % string
1084 | else:
1085 | return "String '%s...%d total'" % (string[:48], size)
1086 |
1087 | def _stringify_double(self, value, verbosity):
1088 | if verbosity == 0:
1089 | return "Double"
1090 | else:
1091 | return "Double: %f" % value
1092 |
1093 | def _stringify_structured_block(self, fields, verbosity):
1094 | size = len(fields)
1095 | if verbosity == 0:
1096 | return "Structured block"
1097 | elif verbosity >= 2 and size < 8:
1098 | return "Structured block [%s]" % ', '.join(["0x%08X"%f.v for f in fields])
1099 | else:
1100 | return "Structured block [%d total]" % size
1101 |
1102 | @TraceMemoryError
1103 | def try_parse(self, verbosity=0):
1104 | #print("Trying to validate: 0x%X" % self.v)
1105 | if self.v == 0:
1106 | # TODO: within some sections NULL pointers are expected, e.g. global_data
1107 | return False, "NULL pointer", []
1108 | if self.is_int():
1109 | return True, self._stringify_int(self.int(), verbosity), []
1110 |
1111 | # It's a block...
1112 |
1113 | if (self.v & (intnat.sizeof-1)) != 0:
1114 | return False, self._stringify_invalid_block(int(self.v), "Unaligned pointer", verbosity), []
1115 |
1116 | ptr = self.v.cast(intnat.pointer())
1117 | hd = self.hd()
1118 | if hd is None:
1119 | return False, self._stringify_invalid_block(int(self.v), "Out-of-bounds header", verbosity), []
1120 | if try_dereference(ptr) is None:
1121 | return False, self._stringify_invalid_block(int(self.v), "Out-of-bounds pointer", verbosity), []
1122 |
1123 | if memoryspace.have_accurate_info and not memoryspace.is_valid_data(self.v):
1124 | memrange = memoryspace.get_range(self.v)
1125 | return False, "Value (0x%08X) not in data memory: %s (%s)" % (self.v, str(memrange), self.resolve()), []
1126 |
1127 | word_size = self.size_words()
1128 | if self._unsafe_field(word_size - 1) is None:
1129 | return False, self._stringify_invalid_size(int(self.v), word_size, "of unknown type", verbosity), []
1130 |
1131 | # TODO: there's a limit to block sizes allocated on the minor heap
1132 | if verbosity >= 2 and word_size > 1024*1024:
1133 | print("Warning: v=0x%08X is greater than 1MiB: %d"%(self.v, word_size))
1134 |
1135 | # Pointers and size look acceptable
1136 |
1137 | tag = self.tag()
1138 | # These if/elif conditions are ordered according to expected prevalence for performance
1139 | # TODO: recognize and print lists properly. Now they will be printed as nested structures.
1140 | if tag == OCamlValue.VALUE_TAG:
1141 | fields = self.fields()
1142 | return True, self._stringify_value(fields, verbosity), fields
1143 |
1144 | elif tag == OCamlValue.STRING_TAG:
1145 | byte_size = self.size_bytes()
1146 | padsize_byte = (self.v + byte_size - 1).cast(charp)
1147 | padsize = padsize_byte.dereference()
1148 | if padsize > intnat.sizeof:
1149 | return False, "String with invalid padding byte: %d" % padsize, []
1150 |
1151 | real_len = byte_size - 1 - padsize
1152 | trailing_nul = (self.v + real_len).cast(charp).dereference()
1153 | if trailing_nul != 0:
1154 | return False, "String without trailing NUL: %d" % trailing_nul, []
1155 |
1156 | string = self.v.cast(charp).string('latin1', 'ignore', real_len)
1157 | return True, self._stringify_string(string, verbosity), []
1158 |
1159 | elif tag == OCamlValue.DOUBLE_TAG:
1160 | if self.size_bytes() != doublep.target().sizeof:
1161 | return False, self._stringify_invalid_size(int(self.v), size, "double", verbosity), []
1162 |
1163 | value = self.v.cast(doublep).dereference()
1164 | return True, self._stringify_double(value, verbosity), []
1165 |
1166 | elif tag == OCamlValue.OBJECT_TAG:
1167 | if word_size < 2:
1168 | return False, self._stringify_invalid_size(int(self.v), word_size, "of object", verbosity), []
1169 |
1170 | classval = self.field(0)
1171 | objectid = self.field(1)
1172 | if classval.is_int():
1173 | return False, self._stringify_invalid_object(classval.val(), objectid.val(), "invalid class id"), []
1174 |
1175 | if not objectid.is_int():
1176 | return False, self._stringify_invalid_object(classval.val(), objectid.val(), "invalid object id"), []
1177 |
1178 | # Beware: objectid was passed raw above, but is passed as integer below
1179 | fields = self.fields()[2:]
1180 | return True, self._stringify_object(classval.val(), objectid.int(), fields, verbosity), fields
1181 |
1182 | elif tag == OCamlValue.CLOSURE_TAG:
1183 | # The simplest closure contains 2 words:
1184 | # function pointer | arity of the function
1185 | # If data from the environment is added, it just comes after the arity.
1186 | #
1187 | # Some closures are special:
1188 | # caml_curryX (partial function application) and caml_tuplifyX (let add (a,b) = ...)
1189 | # These functions are generated by the compiler when needed and closures look like:
1190 | # function pointer of caml_curryX | arity | function pointer of target function
1191 | # function pointer of caml_tuplifyX | -arity | function pointer of target function
1192 | # According to some documentation, there is also caml_apply and other forms of
1193 | # caml_curryX_app that haven't been properly handled here yet.
1194 | #
1195 | # (See also documentation of the tag == OCamlValue.INFIX_TAG below)
1196 | # When a closure contains infix tags, it's pretty hard to distinguish between that
1197 | # and a standard closure containing an integer value that looks like an infix tag.
1198 | # Therefore we use the simple heuristic that if we find something that looks like
1199 | # an infix tag followed by a pointer to a code section, we treat it as an infix tag
1200 | # and continue parsing the closure.
1201 | if word_size < 2:
1202 | return False, self._stringify_invalid_size(int(self.v), word_size, "closure", verbosity), []
1203 |
1204 | offset = 0
1205 | fields = self.fields()
1206 | functions = []
1207 | have_more = True
1208 | tempfunc = fields[0].val()
1209 | while have_more:
1210 | function = fields[0].resolve()
1211 | arity = fields[1]
1212 | if not arity.is_int():
1213 | return False, self._stringify_closure_arity_mismatch(function, int(arity.v), word_size, "non-integer arity", verbosity), []
1214 | arity = arity.int()
1215 |
1216 | if abs(arity) < 1:
1217 | return False, self._stringify_closure_arity_mismatch(function, arity, word_size, "arity to small", verbosity), []
1218 |
1219 | if function.startswith("caml_curry"):
1220 | skip_fields = 3
1221 | real_function = fields[2].resolve()
1222 |
1223 | elif function.startswith("caml_tuplify"):
1224 | arity = -arity
1225 | real_function = fields[2].resolve()
1226 | skip_fields = 3
1227 |
1228 | else:
1229 | skip_fields = 2
1230 | real_function = function
1231 |
1232 | functions.append( (function, real_function) )
1233 |
1234 | if len(fields) >= skip_fields + 2 \
1235 | and (fields[skip_fields].val() & 0xFF) == OCamlValue.INFIX_TAG \
1236 | and memoryspace.is_code(fields[skip_fields + 1].val()):
1237 |
1238 | infix_offset = (fields[skip_fields].val() >> 10)
1239 | if infix_offset != offset + skip_fields + 1:
1240 | return False, "Closure with incorrect infix size", []
1241 | skip_fields += 1
1242 | else:
1243 | have_more = False
1244 |
1245 | offset += skip_fields
1246 | fields = fields[skip_fields:]
1247 |
1248 | if len(functions) == 0:
1249 | return False, self._stringify_empty_closure(fields, verbosity), []
1250 | return True, self._stringify_closure(functions, fields, verbosity), fields
1251 |
1252 | elif tag == OCamlValue.LAZY_TAG:
1253 | # This is the actual lazy value. When it is evaluated, the tag changes to either whatever
1254 | # fit the result (probably if it fits in this OCamlValue) or into a FORWARD_TAG otherwise
1255 | # with the field pointing to block containing the result.
1256 | if word_size != 1:
1257 | return False, self._stringify_invalid_size(int(self.v), word_size, "lazy value", verbosity), []
1258 |
1259 | # TODO: fields of the lazy value?
1260 | return True, self._stringify_lazy_value(int(self.v), verbosity), self.fields()
1261 |
1262 | elif tag == OCamlValue.FORWARD_TAG:
1263 | # This is used for forwarding to the OCamlValue containing the result of a lazy evaluation
1264 | if word_size == 0:
1265 | return False, self._stringify_invalid_size(int(self.v), word_size, "lazy result", verbosity), []
1266 |
1267 | fields = self.fields()
1268 | return True, self._stringify_lazy_result(fields, verbosity), fields
1269 |
1270 | elif tag == OCamlValue.INFIX_TAG:
1271 | # "let rec a x = ... and b y = ..." creates a closure with infixes:
1272 | # closure header | (a) closure data (1 or more words) | infix tag | (b) closure data (1 or more words)
1273 | # OCamlValue for a points to (a) and b points to (b)
1274 | # The size of the infix tag is the offset in words of (b) with respect to (a)
1275 | # For parsing, we just forward to the encapsulating closure
1276 | closure_offset = self.size_words()
1277 | return OCamlValue(self.v - (closure_offset*size_t.sizeof)).try_parse(verbosity)
1278 |
1279 | elif tag == OCamlValue.ABSTRACT_TAG:
1280 | # TODO: validate more?
1281 | return True, "Abstract %d" % word_size, []
1282 |
1283 | elif tag == OCamlValue.CUSTOM_TAG:
1284 | # Custom values are used for things like Int64, Int32 and more. They haven't had
1285 | # special treatment here yet. See the show() method for some more info.
1286 | # TODO: validate more
1287 | return True, "Custom %d" % word_size, []
1288 |
1289 | elif tag == OCamlValue.DOUBLE_ARRAY_TAG:
1290 | if (size_bytes % (doublep.target().sizeof)) != 0:
1291 | return False, self._stringify_invalid_size(int(self.v), size, "double array"), []
1292 | # TODO: print actual values
1293 | return True, "Double array", []
1294 |
1295 | else:
1296 | fields = self.fields()
1297 | return True, self._stringify_structured_block(fields, verbosity), fields
1298 |
1299 | @TraceMemoryError
1300 | def show(self,recurse):
1301 | if self.v == 0:
1302 | print_cont("NULL") # not a value
1303 | elif self.is_int():
1304 | print_cont("%d" % self.int())
1305 | elif self._is_list():
1306 | print_cont("[")
1307 | if recurse > 0:
1308 | self.show_seq(self.get_list(), ';', recurse-1)
1309 | else:
1310 | print_cont("%d values" % self.get_list_length())
1311 | print_cont("]")
1312 | else:
1313 | t = self.tag()
1314 | if t == 0:
1315 | print_cont("(")
1316 | if recurse > 0:
1317 | self.show_seq(self.fields(), ',', recurse-1)
1318 | else:
1319 | print_cont("%d fields" % self.size_words())
1320 | print_cont(")")
1321 | elif t == OCamlValue.LAZY_TAG:
1322 | self.show_opaque("lazy")
1323 | elif t == OCamlValue.CLOSURE_TAG:
1324 | print_cont("Closure(")
1325 | if recurse > 0:
1326 | self.show_seq(self.fields(), ',', recurse-1, raw=True)
1327 | else:
1328 | print_cont("%d fields" % self.size_words())
1329 | print_cont(")")
1330 | elif t == OCamlValue.OBJECT_TAG:
1331 | # | x when x = Obj.object_tag ->
1332 | # let fields = get_fields [] s in
1333 | # let clasz, id, slots =
1334 | # match fields with
1335 | # | h::h'::t -> h, h', t
1336 | # | _ -> assert false
1337 | # in
1338 | # (* No information on decoding the class (first field). So just print
1339 | # * out the ID and the slots. *)
1340 | # "Object #" ^ dump id ^ " (" ^ String.concat ", " (List.map dump slots) ^ ")"
1341 | # FIXME todo
1342 | self.show_opaque("object")
1343 | elif t == OCamlValue.INFIX_TAG:
1344 | self.show_opaque("infix")
1345 | elif t == OCamlValue.FORWARD_TAG:
1346 | self.show_opaque("forward")
1347 | elif t < OCamlValue.NO_SCAN_TAG:
1348 | print_cont("Tag%d(" % t)
1349 | if recurse > 0:
1350 | self.show_seq(self.fields(), ',', recurse-1)
1351 | else:
1352 | print_cont("%d fields" % self.size_words())
1353 | print_cont(")")
1354 | elif t == OCamlValue.STRING_TAG:
1355 | print_cont("%s" % self._string())
1356 | elif t == OCamlValue.DOUBLE_TAG:
1357 | print_cont("%s" % self._float())
1358 | elif t == OCamlValue.ABSTRACT_TAG:
1359 | self.show_opaque("abstract")
1360 | elif t == OCamlValue.CUSTOM_TAG:
1361 | # FIXME better handle builtin caml custom values : int32, int64, etc
1362 | try:
1363 | sym = self.field(0).resolve()
1364 | except:
1365 | sym = '?'
1366 | try:
1367 | name = self.field(0).val().cast(charp.pointer()).dereference().string()
1368 | except:
1369 | name = ''
1370 | raise
1371 | self.show_opaque("custom " + sym + ' "' + name + '"')
1372 | elif t == OCamlValue.FINAL_TAG:
1373 | self.show_opaque("final")
1374 | elif t == OCamlValue.DOUBLE_ARRAY_TAG:
1375 | print_cont("")
1376 | # return "[|%s|]" % "; ".join([x.dump() for x in self.fields()])
1377 | else:
1378 | self.show_opaque("unknown hd=0x%X" % int(self.hd()))
1379 |
1380 | # nil = OCamlValue.of_int(0)
1381 | # true = OCamlValue.of_int(1)
1382 | # false = OCamlValue.of_int(0)
1383 |
1384 | class DumpOCamlValue(gdb.Command):
1385 | """Recursively dumps runtime representation of OCaml value
1386 |
1387 | Dump value: ml_dump [/r[N]]
1388 | Dump the array of values: ml_dump [/r[N]]
1389 | Dump the pointer to value: ml_dump [/r[N]] 1
1390 | Dump local (stack) GC roots: ml_dump [/r[N]] local_roots
1391 |
1392 | Optional /r flag controls the recursion depth limit."""
1393 |
1394 | def __init__(self):
1395 | gdb.Command.__init__(self, "ml_dump", gdb.COMMAND_DATA, gdb.COMPLETE_SYMBOL, False)
1396 |
1397 | def parse_as_addr(self,addr):
1398 | x = gdb.parse_and_eval(addr)
1399 | if x.address == None:
1400 | return x.cast(size_t.pointer())
1401 | else: # l-value, prevent short read when no debugging info
1402 | return gdb.parse_and_eval("*((size_t*)&"+addr+")").cast(size_t.pointer())
1403 |
1404 | def show_ptr(self, addr, recurse):
1405 | print_cont("*0x%x:" % int(addr.cast(size_t)))
1406 | OCamlValue(addr.dereference()).show(recurse)
1407 | print("")
1408 |
1409 | # ocaml runtime may be compiled without debug info so we have to be specific with types
1410 | # otherwise gdb may default to 32-bit int even on 64-bit arch and inspection goes loose
1411 | # NB values can be given by name or by address
1412 | @TraceAll
1413 | def invoke(self, arg, from_tty):
1414 | init_types()
1415 | init_memoryspace()
1416 | args = gdb.string_to_argv(arg)
1417 | recurse = 1
1418 | if len(args) > 0 and args[0].startswith("/r"):
1419 | s = args[0][2:]
1420 | if s == "":
1421 | recurse = float('inf')
1422 | else:
1423 | recurse = int(s)
1424 | args = args[1:]
1425 | if len(args) < 1 or len(args) > 2:
1426 | print("Wrong usage, see \"help ml_dump\"")
1427 | return
1428 | if len(args) == 2:
1429 | addr = self.parse_as_addr(args[0])
1430 | for i in range(int(args[1])):
1431 | self.show_ptr(addr + i, recurse)
1432 | else:
1433 | if args[0] == "local_roots":
1434 | p = gdb.parse_and_eval("*(struct caml__roots_block**)&caml_local_roots")
1435 | while p != 0:
1436 | print("caml_frame 0x%x" % int(p.cast(size_t)))
1437 | for i in range(int(p['nitems'])):
1438 | self.show_ptr(p['tables'][i], recurse)
1439 | p = p['next']
1440 | else:
1441 | addr = self.parse_as_addr(args[0])
1442 | OCamlValue(addr).show(recurse)
1443 | print("")
1444 |
1445 | DumpOCamlValue()
1446 |
1447 | class ShowOCamlHeap(gdb.Command):
1448 | """Show some facts about OCaml heap: ml_heap [units]
1449 |
1450 | Specify "w" or "words" for `units` to use OCaml words rather than bytes"""
1451 |
1452 | def __init__(self):
1453 | gdb.Command.__init__(self, "ml_heap", gdb.COMMAND_NONE, gdb.COMPLETE_NONE, False)
1454 |
1455 | def e(self,x,t):
1456 | return gdb.parse_and_eval("*(("+str(t)+"*)&"+x+")")
1457 |
1458 | def malloced_size(self,x):
1459 | # see caml_aligned_malloc, FIXME Page_size = 4K assumption
1460 | return x + 4*size_t.sizeof + 4*1024
1461 |
1462 | @TraceAll
1463 | def invoke(self, arg, from_tty):
1464 | init_types()
1465 | init_memoryspace()
1466 | args = gdb.string_to_argv(arg)
1467 | units = "bytes"
1468 | unit = 1
1469 | if len(args) > 0 and (args[0] == "words" or args[0] == "w"):
1470 | unit = size_t.sizeof
1471 | units = "words"
1472 |
1473 | print(" major heap size = %d %s" % (self.e("caml_stat_heap_size","intnat") / unit, units))
1474 | print(" major heap top size = %d %s" % (self.e("caml_stat_top_heap_size","intnat") / unit, units))
1475 | print(" total heap chunks =", self.e("caml_stat_heap_chunks","intnat"))
1476 | print(" gray values = %d %s" % (self.e("gray_vals_size","size_t") * size_t.sizeof / unit, units))
1477 | print("extra heap resources =", self.e("caml_extra_heap_resources","double"))
1478 | print()
1479 | print("minor heap :")
1480 | minor_size = self.e("caml_minor_heap_size","size_t")
1481 | minor_base = self.e("caml_young_base","size_t")
1482 | y_start = self.e("caml_young_start","size_t")
1483 | y_ptr = self.e("caml_young_ptr","size_t")
1484 | y_end = self.e("caml_young_end","size_t")
1485 | print("0x%x .. 0x%x - 0x%x (total %d used %d %s) malloc: 0x%x - 0x%x" % \
1486 | (y_start, y_ptr, y_end, minor_size/unit, (y_end - y_ptr)/unit, units, minor_base, minor_base + self.malloced_size(minor_size)))
1487 | print()
1488 | print("major heap :")
1489 | # the following casting is needed, otherwise gdb may sign-extend values without debug info
1490 | v = self.e("caml_heap_start","size_t")
1491 | i = 0
1492 | while v != 0:
1493 | p = gdb.Value(v - 4 * size_t.sizeof)
1494 | # typedef struct {
1495 | # void *block; /* address of the malloced block this chunk live in */
1496 | # asize_t alloc; /* in bytes, used for compaction */
1497 | # asize_t size; /* in bytes */
1498 | # char *next;
1499 | # } heap_chunk_head;
1500 | p = p.cast(size_t.pointer())
1501 | block = p.dereference()
1502 | size = (p + 2).dereference()
1503 | print("%2d) chunk 0x%x - 0x%x (%d %s) malloc: 0x%x - 0x%x" % (i, v, v+size, size/unit, units, block, block+self.malloced_size(size)))
1504 | i = i + 1
1505 | v = (p + 3).dereference()
1506 |
1507 | ShowOCamlHeap()
1508 |
1509 | class ScanOCamlValue(gdb.Command):
1510 | """Scan region of OCaml heap and show all values in it
1511 |
1512 | ml_scan [/r[N]] addr_start [bytes]
1513 |
1514 | Optional /r flag controls the recursion depth limit."""
1515 |
1516 | def __init__(self):
1517 | gdb.Command.__init__(self, "ml_scan", gdb.COMMAND_DATA, gdb.COMPLETE_SYMBOL, False)
1518 |
1519 | def parse_as_addr(self,addr):
1520 | x = gdb.parse_and_eval(addr)
1521 | if x.address == None:
1522 | return x.cast(size_t.pointer())
1523 | else: # l-value, prevent short read when no debugging info
1524 | return gdb.parse_and_eval("*((size_t*)&"+addr+")").cast(size_t.pointer())
1525 |
1526 | def show_val(self, addr, recurse):
1527 | print_cont("0x%x = " % int(addr.cast(size_t)))
1528 | OCamlValue(addr).show(recurse)
1529 | print("")
1530 |
1531 | @TraceAll
1532 | def invoke(self, arg, from_tty):
1533 | init_types()
1534 | init_memoryspace()
1535 | args = gdb.string_to_argv(arg)
1536 | recurse = 1
1537 | if len(args) > 0 and args[0].startswith("/r"):
1538 | s = args[0][2:]
1539 | if s == "":
1540 | recurse = float('inf')
1541 | else:
1542 | recurse = int(s)
1543 | args = args[1:]
1544 | if len(args) < 1 or len(args) > 2:
1545 | print("Wrong usage, see \"help ml_scan\"")
1546 | return
1547 | addr = self.parse_as_addr(args[0])
1548 | if len(args) == 2:
1549 | addr_end = addr + int(args[1]) / size_t.sizeof
1550 | else:
1551 | addr_end = addr + 64
1552 | while addr < addr_end:
1553 | self.show_val(addr,recurse)
1554 | x = OCamlValue(addr)
1555 | addr = addr + x.size_words() + 1
1556 |
1557 | ScanOCamlValue()
1558 |
1559 | # The functions below mimic the behavior of the OCaml run-time when performing GC.
1560 | # In C programs, memory must be handled explicitly by the developer.
1561 | # In C++ programs, tools like shared_pointer, unique_pointer, ... make memory management
1562 | # easier, yet still explicit. Memory is lost when no longer referenced.
1563 | # In languages with GC, memory management is not explicit, but handled by the run-time.
1564 | # Referenced memory is discoverable through "roots", leaving the GC with the knowledge
1565 | # that all other allocated memory is unreferenced and can be reclaimed/reused.
1566 | # In OCaml there are multiple roots, each of which is described near the function
1567 | # that discovers it below.
1568 |
1569 | def get_value(name, type):
1570 | return gdb.parse_and_eval("""*( (%s*)(&%s) )""" % (type, name))
1571 |
1572 | # Global roots are global values from C-code that are registered into the GC.
1573 | def get_global_roots(roots_list_name):
1574 | """
1575 | Traverse the linked list of the provided global roots list and return a list of root addresses.
1576 | """
1577 | roots_list = get_value_safe(roots_list_name)
1578 | ret = []
1579 | if roots_list is None or roots_list == 0:
1580 | return ret
1581 |
1582 | global_root = roots_list['forward'].dereference()
1583 | while global_root != 0:
1584 | root = global_root.dereference()['root'].dereference()
1585 | ret.append((root, root+1, roots_list_name))
1586 | global_root = global_root.dereference()['forward'].dereference()
1587 | return ret
1588 |
1589 | # Local roots are roots in the stack of C-code.
1590 | # They are automatically generated and registered from the CAMLxparam, CAMLlocal, ... macro's
1591 | # and linked together as a linked list.
1592 | def get_local_roots(roots, name):
1593 | ret = []
1594 | while roots != 0:
1595 | root_struct = roots.dereference()
1596 | for i in range(int(root_struct['ntables'])):
1597 | for j in range(int(root_struct['nitems'])):
1598 | value = root_struct['tables'][i][j]
1599 | ret.append((value, value + size_t.sizeof, name))
1600 | roots = root_struct['next']
1601 | return ret
1602 |
1603 | # See byterun/finalise.c:caml_final_do_strong_roots
1604 | # Finalisers can be registered with the GC. They are functions
1605 | # that are called when the GC determines that the value is no
1606 | # longer used and the GC is about to reclaim the memory.
1607 | # These finalisers therefore contain both a closure to a function
1608 | # and an OCamlValue to the relevant block.
1609 | def get_final_roots():
1610 | ret = []
1611 |
1612 | young = get_value_safe("'finalise.d.c'::young", size_t) # avoid ambiguity
1613 | if young is None:
1614 | young = get_value_safe("'finalise.c'::young", size_t)
1615 | if young is None:
1616 | print("Didn't find 'finalise.c::young'. Young finaliser information is missing.")
1617 | return ret
1618 |
1619 | for i in range(int(young)):
1620 | final_struct = get_value_safe("final_table[%d]" % i)
1621 | if final_struct is None:
1622 | break
1623 | func = final_struct['fun']
1624 | val = final_struct['val']
1625 | ret.append((func, func + size_t.sizeof, "final_table"))
1626 | ret.append((val, val + size_t.sizeof, "final_table"))
1627 |
1628 | # TODO
1629 | to_do_ptr = get_value_safe("to_do_hd")
1630 | while to_do_ptr is not None and to_do_ptr.cast(size_t) != 0:
1631 | to_do_struct = to_do_ptr.dereference()
1632 | size = int(to_do_struct["size"].cast(size_t))
1633 | items = do_to_struct["items"]
1634 | for i in range(size):
1635 | item_struct = (items + i).dereference()
1636 | func = item_struct['fun']
1637 | val = item_struct['val']
1638 | ret.append((func, func + size_t.sizeof, "final: to_do"))
1639 | ret.append((val, val + size_t.sizeof, "final: to_do"))
1640 |
1641 | return ret
1642 |
1643 | # Dynamic global roots are global variables from dynamically linked libraries.
1644 | # They are added to a linked list of roots.
1645 | def get_dyn_globals():
1646 | ret = []
1647 | dyn_globals = get_value_safe("caml_dyn_globals")
1648 | while dyn_globals is not None and dyn_globals != 0:
1649 | dyn_globals_struct = dyn_globals.dereference()
1650 | v = dyn_globals_struct['data'].cast(size_t)
1651 | ret.append((v, v + size_t.sizeof, "dyn_globals"))
1652 | dyn_globals = dyn_globals_struct['next']
1653 | return ret
1654 |
1655 | # See walk_ocaml_stack() documentation.
1656 | # This function will walk the stack of the active thread first,
1657 | # followed by any other systhreads threads. All threads' information
1658 | # is kept in a linked list of thread information blocks.
1659 | # For the active thread, there are some global variables for quick
1660 | # access. Whenever acquiring or releasing the OCaml run-time lock,
1661 | # calling the GC or a C-function these global variables are updated.
1662 | # When releasing or acquiring the global run-time lock, these global
1663 | # variables are sync'd with the values from the linked list.
1664 | def walk_ocaml_stacks():
1665 | ret = []
1666 |
1667 | # scanning for currently active thread
1668 | sp = get_value_safe("caml_bottom_of_stack")
1669 | retaddr = get_value_safe("caml_last_return_address")
1670 | gc_regs = get_value_safe("caml_gc_regs")
1671 | roots = get_value_safe("caml_local_roots")
1672 |
1673 | if sp is not None and retaddr is not None and gc_regs is not None:
1674 | ret.extend(walk_ocaml_stack(sp, retaddr, gc_regs))
1675 | if roots is not None:
1676 | ret.extend(get_local_roots(roots, "caml_local_roots"))
1677 |
1678 | # scanning for inactive threads
1679 | # otherlibs/systhreads/st_stubs.c:caml_thread_scan_roots()
1680 | active_thread = thread = get_value_safe("curr_thread")
1681 | if active_thread is None:
1682 | return
1683 |
1684 | while caml_thread_structp is not None:
1685 | thread_struct = thread.dereference()
1686 |
1687 | memrange = memoryspace.get_range(thread_struct["bottom_of_stack"])
1688 | description = "Error: unknown thread" if memrange is None else memrange.description
1689 |
1690 | descriptor = thread_struct["descr"]
1691 | ret.append( (descriptor, descriptor + size_t.sizeof, "%s descr" % description) )
1692 | backtrace_last_exn = thread_struct["caml_backtrace_last_exn"] # there's probably some macro at play here...
1693 | ret.append( (backtrace_last_exn, backtrace_last_exn + size_t.sizeof, "%s backtrace_last_exn" % description) )
1694 |
1695 | if thread.cast(size_t) != active_thread.cast(size_t):
1696 | ret.extend(walk_ocaml_stack(thread_struct["bottom_of_stack"], thread_struct["last_retaddr"], thread_struct["gc_regs"], description))
1697 | ret.extend(get_local_roots(thread_struct["caml_local_roots"], "%s local roots" % description))
1698 |
1699 | thread = thread_struct["next"]
1700 | if thread.cast(size_t) == active_thread.cast(size_t):
1701 | break
1702 |
1703 | return ret
1704 |
1705 | # This is a freeform translation of asmrun/roots.c:do_local_roots()
1706 | # We basically walk the stack in search of OCamlValues.
1707 | # C-code should never call the garbage collector directly. Instead
1708 | # Ocaml dynamically inserts calls to the GC wherever memory is allocated.
1709 | # The GC is then called when the minor heap runs out of space to satisfy
1710 | # the current request. Because the compiler is the only one to insert
1711 | # calls to the GC, it knows the state of the stack and CPU registers
1712 | # wrt them contains GC roots.
1713 | # It emits this information in a structure called the caml_frametable.
1714 | # This structure is walked at run-time and a hash-table is created where
1715 | # the key is the return address of the caml_call_gc() function.
1716 | # When traversing the stack, the run-time uses this information to
1717 | # locate the roots on the stack or inside registers.
1718 | def walk_ocaml_stack(sp, retaddr, gc_regs, description="stack"):
1719 | if caml_contextp is None:
1720 | return []
1721 | fd_mask = get_value_safe("caml_frame_descriptors_mask")
1722 | if fd_mask is None:
1723 | return []
1724 | if get_value_safe("caml_frame_descriptors") is None:
1725 | return []
1726 |
1727 | def hash_retaddr(addr):
1728 | return (addr.cast(size_t) >> 3) & fd_mask
1729 |
1730 | ret = []
1731 | if sp == 0:
1732 | return ret
1733 |
1734 | reg_names = { 0: "rax", 1: "rbx", 2: "rdi", 3: "rsi", 4: "rdx", 5: "rcx", 6: "r8", 7: "r9",
1735 | 8: "r12", 9: "r13", 10: "r10", 11: "r11", 12: "rbp" }
1736 | frame = resolve(retaddr)
1737 |
1738 | while True:
1739 | h = hash_retaddr(retaddr)
1740 | while True:
1741 | d = get_value_safe("caml_frame_descriptors[%d]"%h)
1742 | d_struct = d.dereference()
1743 | if d_struct["retaddr"].cast(size_t) == retaddr.cast(size_t):
1744 | break
1745 | h = (h+1) & fd_mask
1746 |
1747 | if d_struct["frame_size"] != 0xFFFF:
1748 | for n in range(int(d_struct["num_live"])):
1749 | ofs = get_value_safe("caml_frame_descriptors[%d]->live_ofs[%d]" % (h, n), size_t)
1750 | if ofs & 1:
1751 | location = "[%s]" % reg_names.get(int(ofs>>1), "unknown_reg")
1752 | root = (gc_regs.cast(size_t) + ((ofs >> 1) * size_t.sizeof)).cast(size_t.pointer()).dereference()
1753 | else:
1754 | location = "[sp+0x%X]" % ofs
1755 | root = sp.cast(size_t) + ofs
1756 | root = root.cast(size_t.pointer())
1757 | root = root.dereference().cast(size_t)
1758 | ret.append((root, root+size_t.sizeof, "%s(%s%s)" % (description, frame, location)))
1759 |
1760 | sp = (sp.cast(size_t) + (d_struct["frame_size"] & 0xFFFC)).cast(charp)
1761 | retaddr = (sp.cast(size_t) - size_t.sizeof).cast(size_t.pointer()).dereference()
1762 | else:
1763 | next_context_p = (sp.cast(size_t) + (2 * size_t.sizeof)).cast(caml_contextp)
1764 | next_context_struct = next_context_p.dereference()
1765 | sp = next_context_struct["bottom_of_stack"]
1766 | retaddr = next_context_struct["last_retaddr"]
1767 | regs = next_context_struct["gc_regs"]
1768 | if sp == 0:
1769 | break
1770 | return ret
1771 |
1772 | # Compile-time global values.
1773 | # This is an array of OcamlValues to OCaml blocks. There is 1 pointer per module linked in.
1774 | # The last value is a NULL sentinel. caml_globals is part of the .data section.
1775 | def get_globals():
1776 | ret = []
1777 | global_data_ptr = get_value_safe("caml_globals", size_t.pointer())
1778 | if global_data_ptr is None:
1779 | return ret
1780 |
1781 | global_data = global_data_ptr.dereference()
1782 |
1783 | index = 0
1784 | while global_data != 0:
1785 | ret.append((global_data, global_data + size_t.sizeof, "global_data[%d]" % index))
1786 | global_data_ptr = (global_data_ptr.cast(size_t) + size_t.sizeof).cast(size_t.pointer())
1787 | global_data = global_data_ptr.dereference()
1788 | index += 1
1789 |
1790 | return ret
1791 |
1792 | # OCaml contains a method to dynamically add detection of more roots at run-time
1793 | # through the use of a callback function. Each user must store the previous pointer
1794 | # value, and replace the hook with its own function. Therefore, for each possible
1795 | # hook/symbol, we need to know what symbol is used to store the previous value.
1796 | # The actual discovery of the roots, however is already done in one of the above
1797 | # functions, this is merely a check that we didn't miss any roots we didn't know about.
1798 | def traverse_scan_roots_hook():
1799 | known_hooks = {
1800 | "caml_thread_scan_roots": "prev_scan_roots_hook", # handled in walk_ocaml_stacks()
1801 | # more here, make sure to describe where it's handled
1802 | }
1803 |
1804 | scan_roots_hook_ptr = get_value_safe("caml_scan_roots_hook")
1805 | while scan_roots_hook_ptr is not None and scan_roots_hook_ptr != 0:
1806 | scan_roots_hook = resolve(scan_roots_hook_ptr)
1807 | next_hook = known_hooks.get(scan_roots_hook, None)
1808 | if next_hook is None:
1809 | print("Unhandled root scanning function: %s" % scan_roots_hook)
1810 | return
1811 | scan_roots_hook_ptr = get_value_safe(next_hook)
1812 |
1813 | # See asmrun/roots.c:caml_do_roots for guidance
1814 | # This function walks over all data structures known to contain roots.
1815 | # Following this, you can walk over all OCaml values the GC knows.
1816 | # This is similar to the way the GC performs the "mark" phase.
1817 | # Another function could be written that walks over the heap in a
1818 | # similar fashion to the "sweep" phase.
1819 | # TODO: caml_globals can contain many NULL values yielding warnings/errors
1820 | def get_entry_points():
1821 | """
1822 | Returns a list of entry-points (aka roots) from where all live values
1823 | can be discovered.
1824 | The list contains tuples (start address, post-end address, source)
1825 | with source a string identifying where the addresses were discovered.
1826 | """
1827 | ret = []
1828 |
1829 | # global roots
1830 | ret.extend(get_globals())
1831 | # dynamic global roots
1832 | ret.extend(get_dyn_globals())
1833 | # stacks and local roots
1834 | ret.extend(walk_ocaml_stacks())
1835 |
1836 | # global C roots
1837 | ret.extend(get_global_roots("caml_global_roots"))
1838 | ret.extend(get_global_roots("caml_global_roots_young"))
1839 | ret.extend(get_global_roots("caml_global_roots_old"))
1840 |
1841 | # finalised values
1842 | ret.extend(get_final_roots())
1843 |
1844 | # scan_roots_hook
1845 | traverse_scan_roots_hook()
1846 | return ret
1847 |
1848 | class ValidateHeap(gdb.Command):
1849 | """Validates the OCaml heap
1850 | ml_validate
1851 | """
1852 | def __init__(self):
1853 | gdb.Command.__init__(self, "ml_validate", gdb.COMMAND_DATA, gdb.COMPLETE_SYMBOL, False)
1854 |
1855 | def parse_as_addr(self,addr):
1856 | x = gdb.parse_and_eval(addr)
1857 | if x.address == None:
1858 | return x.cast(size_t.pointer())
1859 | else: # l-value, prevent short read when no debugging info
1860 | return gdb.parse_and_eval("*((size_t*)&"+addr+")").cast(size_t.pointer())
1861 |
1862 | def _scan_range(self, todo, seen):
1863 | values = 0
1864 | bytes = 0
1865 | skipped = 0
1866 |
1867 | try:
1868 | while len(todo):
1869 | value = todo.pop()
1870 |
1871 | if value in seen:
1872 | skipped += 1
1873 | continue
1874 | seen.add(int(value.val()))
1875 |
1876 |
1877 | valid, what, children = value.try_parse(3)
1878 | if not valid:
1879 | print("Invalid value at %s: %s" % (str(value), what))
1880 | p = value.parent
1881 | pindex = value.parentindex
1882 | while p is not None:
1883 | if isinstance(p, OCamlValue):
1884 | _, p_what, _ = p.try_parse(3)
1885 | print("from: 0x%08X[%d] - %s" % (p.val(), pindex, p_what))
1886 | pindex = p.parentindex
1887 | p = p.parent
1888 | else:
1889 | print("from: %s[%d]" % (str(p), pindex))
1890 | p = None
1891 | pindex = None
1892 | elif len(children):
1893 | todo.extend([child for child in children if int(child.val()) not in seen])
1894 |
1895 | if valid:
1896 | values += 1
1897 | bytes += 0 if value.is_int() else value.size_bytes()
1898 | except:
1899 | traceback.print_exc()
1900 | raise
1901 |
1902 | return (values, skipped, bytes)
1903 |
1904 | @TraceAll
1905 | def invoke(self, arg, from_tty):
1906 | init_types()
1907 | init_memoryspace()
1908 | args = gdb.string_to_argv(arg)
1909 |
1910 | if len(args) > 0:
1911 | print("Wrong usage, see \"help ml_validate\"")
1912 | return
1913 |
1914 | values = skipped = bytes = 0
1915 |
1916 | try:
1917 | todo = collections.deque()
1918 | seen = set()
1919 |
1920 | for (begin, end, source) in get_entry_points():
1921 | #print("Scanning %s - %d values" % (source, (end - begin)/size_t.sizeof))
1922 |
1923 | address = begin
1924 | while address < end:
1925 | todo.append(OCamlValue(address, parent=source, parentindex=(address - begin)/size_t.sizeof))
1926 | address = address + size_t.sizeof
1927 |
1928 | cur_values, cur_skipped, cur_bytes = self._scan_range(todo, seen)
1929 | #print("Scanned %d values, skipped %d values, total of %5dB" % (cur_values, cur_skipped, cur_bytes))
1930 | values += cur_values
1931 | skipped += cur_skipped
1932 | bytes += cur_bytes
1933 |
1934 | except:
1935 | traceback.print_exc()
1936 | raise
1937 |
1938 | print("Totals: scanned %d values, skipped %d values, total of %5dB" % (values, skipped, bytes))
1939 |
1940 | ValidateHeap()
1941 |
1942 | class ShowValue(gdb.Command):
1943 | """
1944 | Display an OCaml value (recursively)
1945 | ml_show [/r] [verbosity]
1946 | """
1947 | def __init__(self):
1948 | gdb.Command.__init__(self, "ml_show", gdb.COMMAND_DATA, gdb.COMPLETE_SYMBOL, False)
1949 |
1950 | def parse_as_addr(self,addr):
1951 | x = gdb.parse_and_eval(addr)
1952 | if x.address == None:
1953 | return x.cast(size_t.pointer())
1954 | else: # l-value, prevent short read when no debugging info
1955 | return gdb.parse_and_eval("*((size_t*)&"+addr+")").cast(size_t.pointer())
1956 |
1957 | def show(self, value, depth, recurse, verbosity):
1958 | valid, what, children = value.try_parse(verbosity)
1959 | what = what.replace('\0', "\\0")
1960 | prefix = " " * depth
1961 | print("%s%s %s" % (prefix, what, '= [' if len(children) and recurse else ''))
1962 | if recurse:
1963 | for child in children:
1964 | self.show(child, depth + 1, recurse - 1, verbosity)
1965 |
1966 | if len(children) and recurse:
1967 | print("%s%s" % (prefix, ']'))
1968 |
1969 | @TraceAll
1970 | def invoke(self, arg, from_tty):
1971 | init_types()
1972 | init_memoryspace()
1973 | args = gdb.string_to_argv(arg)
1974 | recurse = 1
1975 | verbosity = 1
1976 | if len(args) < 1 or len(args) > 3:
1977 | print("Wrong usage, see \"help ml_show\"")
1978 | return
1979 |
1980 | if len(args) == 1:
1981 | addr = self.parse_as_addr(args[0])
1982 | elif len(args) == 3:
1983 | if not args[0].startswith("/r"):
1984 | print("recursivity must be specified as /rN")
1985 | return
1986 | recurse = int(args[0][2:])
1987 | addr = self.parse_as_addr(args[1])
1988 | verbosity = int(args[2])
1989 | else:
1990 | if args[0].startswith('/r'):
1991 | recurse = int(args[0][2:])
1992 | addr = self.parse_as_addr(args[1])
1993 | else:
1994 | addr = self.parse_as_addr(args[0])
1995 | verbosity = int(args[1])
1996 |
1997 | value = OCamlValue(addr)
1998 | self.show(value, 0, recurse, verbosity)
1999 |
2000 | ShowValue()
2001 |
2002 | class ShowMemory(gdb.Command):
2003 | """
2004 | Shows memory space and its disposition
2005 | ml_target [verbosity]
2006 | """
2007 |
2008 | def __init__(self):
2009 | gdb.Command.__init__(self, "ml_target", gdb.COMMAND_DATA, gdb.COMPLETE_SYMBOL, False)
2010 |
2011 | def parse_as_addr(self,addr):
2012 | x = gdb.parse_and_eval(addr)
2013 | if x.address == None:
2014 | return x.cast(size_t.pointer())
2015 | else: # l-value, prevent short read when no debugging info
2016 | return gdb.parse_and_eval("*((size_t*)&"+addr+")").cast(size_t.pointer())
2017 |
2018 | @TraceAll
2019 | def invoke(self, arg, from_tty):
2020 | init_types()
2021 | init_memoryspace()
2022 | args = gdb.string_to_argv(arg)
2023 |
2024 | if len(args) == 2 and args[0] == 'all':
2025 | verbosity = int(args[1])
2026 | address = None
2027 | elif len(args) == 1:
2028 | if args[0] == 'all':
2029 | address = None
2030 | verbosity = 0
2031 | else:
2032 | address = self.parse_as_addr(args[0])
2033 | else:
2034 | print("Wrong usage, see \"help ml_target\"")
2035 | return
2036 |
2037 | try:
2038 | if address is None:
2039 | memoryspace.display(verbosity)
2040 | else:
2041 | memrange = memoryspace.get_range(address)
2042 | if memrange is None:
2043 | print("Address 0x%08X is invalid" % address)
2044 | else:
2045 | print("Address 0x%08X is part of: %s" % (address, str(memrange)))
2046 | except:
2047 | traceback.print_exc()
2048 |
2049 |
2050 | ShowMemory()
2051 |
2052 | class FindPointersTo(gdb.Command):
2053 | """Finds memory locations that point to a specified value:
2054 | ml_find
2055 | """
2056 | def __init__(self):
2057 | gdb.Command.__init__(self, "ml_find", gdb.COMMAND_DATA, gdb.COMPLETE_SYMBOL, False)
2058 |
2059 | @TraceAll
2060 | def invoke(self, arg, from_tty):
2061 | init_types()
2062 | init_memoryspace()
2063 | args = gdb.string_to_argv(arg)
2064 |
2065 | if len(args) != 1:
2066 | print("Wrong usage, see \"help ml_find\"")
2067 | return
2068 |
2069 | value = int(args[0])
2070 | pattern = struct.pack("L", value)
2071 | locations = memoryspace.search_memory_of_types(pattern, *MemoryType.all())
2072 | for location in locations:
2073 | memrange = memoryspace.get_range(location)
2074 | print("Found at 0x%08X in %s" % (location, memrange.description))
2075 |
2076 | FindPointersTo()
2077 |
--------------------------------------------------------------------------------
/next.ml:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env ocamlscript
2 |
3 | let () =
4 | match Sys.argv with
5 | | [|_;k|] ->
6 | let rec loop acc =
7 | match input_line stdin with
8 | | exception End_of_file -> List.rev acc
9 | | s -> loop (s::acc)
10 | in
11 | begin match loop [] with
12 | | [] -> exit 1
13 | | first::_ as l ->
14 | let rec next = function
15 | | x::xs when x = k -> (match xs with [] -> first | y::_ -> y)
16 | | _::xs -> (* Printf.eprintf "skipping %S <> %S" y k; *) next xs
17 | | [] -> first
18 | in
19 | print_endline @@ next l
20 | end
21 | | _ ->
22 | prerr_endline "next.ml.exe ";
23 | prerr_endline "Outputs the next line from stdin after the , first one if is the last."
24 |
--------------------------------------------------------------------------------
/oasis-query.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash
2 |
3 | # OASIS query helper
4 | # https://github.com/ygrek/scraps/blob/master/oasis-query.sh
5 |
6 | set -eu
7 |
8 | export LC_ALL=C
9 |
10 | code_sections() {
11 | oasis query ListSections | egrep '^(Library|Executable|Object)'
12 | }
13 |
14 | # query oasis for all BuildDepends and exclude internal Library names
15 | show_deps() {
16 | join -v 2 <(oasis query ListSections | grep Library | sed 's/Library(\(.*\))/\1/' | sort -u) <(oasis query $(code_sections | sed s/$/.BuildDepends/ ) | tr ',' '\n' | awk '($1!=""){print $1}' | sort -u)
17 | }
18 |
19 | show_source_dirs() {
20 | oasis query $(code_sections | sed s/$/.Path/ ) | sort -u
21 | }
22 |
23 | show_build_dirs() {
24 | show_source_dirs | sed 's@^@_build/@'
25 | }
26 |
27 | show_library_path() {
28 | echo $(show_build_dirs) | tr ' ' ':'
29 | }
30 |
31 | env_library_path() {
32 | printf "%s:$CAML_LD_LIBRARY_PATH" "$(show_library_path)"
33 | }
34 |
35 | show_include_dirs() {
36 | ocamlfind query -r -i-format $(show_deps)
37 | show_build_dirs | sed 's/^/-I /'
38 | }
39 |
40 | generate_merlin() {
41 | show_source_dirs | sed 's/^/S /'
42 | show_build_dirs | sed 's/^/B /'
43 | show_deps | sed 's/^/PKG /'
44 | }
45 |
46 | case "${1:-}" in
47 | "deps") show_deps ;;
48 | "build-dirs") show_build_dirs ;;
49 | "source-dirs") show_source_dirs ;;
50 | "library-path") show_library_path ;;
51 | "env-library-path") env_library_path ;;
52 | "include-dirs") show_include_dirs ;;
53 | "merlin") generate_merlin ;;
54 | *)
55 | echo "whoa?" >&2
56 | echo "Supported commands : deps build-dirs source-dirs include-dirs merlin library-path env-library-path" >&2
57 | exit 1
58 | esac
59 |
--------------------------------------------------------------------------------
/opam-url:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -eu
4 |
5 | if [ $# -eq 0 ]; then
6 | echo 'usage: opam-url [user]'
7 | exit 2
8 | fi
9 |
10 | ext=tar.gz
11 | name=$1
12 | version=$2
13 | org=${3:-ygrek}
14 | if [ "$name" = "ocaml-extlib" ]; then
15 | fname=extlib
16 | else
17 | fname=$name
18 | fi
19 |
20 | if [ "$name" = "ocaml-extunix" ]; then
21 | gname=extunix
22 | else
23 | gname=$name
24 | fi
25 |
26 | if [ "$name" = "mldonkey" ]; then
27 | tag=release-$(echo $version | tr '.' '-')
28 | else
29 | tag=$version
30 | fi
31 |
32 | if [ "$name" = "devkit" ]; then
33 | org=ahrefs
34 | ext=tbz
35 | fi
36 |
37 | if [ "$name" = "ocaml-murmur3" ]; then
38 | org=ahrefs
39 | fi
40 |
41 | if [ "$name" = "ocaml-mariadb" ]; then
42 | org=andrenth
43 | fi
44 |
45 | release=~/p/release/$name/$fname-$version.$ext
46 | if [ ! -f "$release" ]; then
47 | ext=tar.bz2
48 | release=~/p/release/$name/$fname-$version.$ext
49 | if [ ! -f "$release" ]; then
50 | echo "E: $release doesn't exist"
51 | exit 3
52 | fi
53 | fi
54 | src="https://ygrek.org/p/release/$name/$fname-$version.$ext"
55 | mirror1="https://github.com/$org/$gname/releases/download/v$tag/$fname-$version.$ext"
56 | mirror2="https://github.com/$org/$gname/releases/download/$tag/$fname-$version.$ext"
57 |
58 | function cuts0 {
59 | awk '{print $1}'
60 | }
61 |
62 | if [ "$(curl -s -L "$src" | sha256sum - | cuts0)" != "$(sha256sum "$release" | cuts0)" ]; then
63 | echo "W: $src bad"
64 | unset src
65 | fi
66 | mirror=$mirror1
67 | if [ "$(curl -s -L "$mirror" | sha256sum - | cuts0)" != "$(sha256sum "$release" | cuts0)" ]; then
68 | mirror=$mirror2
69 | [ "$(curl -s -L "$mirror" | sha256sum - | cuts0)" == "$(sha256sum "$release" | cuts0)" ] || (echo "E:$mirror bad"; exit 2)
70 | fi
71 |
72 | echo "url {"
73 | if [ -v src ]; then
74 | echo " src: \"$src\""
75 | else
76 | echo " src: \"$mirror\""
77 | fi
78 | cat < report_argv
19 | ```
20 |
21 | Get the global view and final balance:
22 |
23 | ```
24 | $ ./example.ml
25 | [New Year] cake : 58.00 (3 pax), Alice paid 66.00 tipping 13%
26 | [ Jan 2] cinema : 40.00 (2 pax), Bob paid 40.00
27 | [ Jan 10] pizza : 50.00 (3 pax), Mallory paid 55.00 tipping 10%
28 |
29 | Bob -15.75
30 | Alice -3.50
31 | Mallory 19.25
32 | ```
33 |
34 | Report for one participant:
35 |
36 | ```
37 | $ ./example.ml report Bob
38 | [New Year] -22.00 = -22.00 : cake : 58.00 (3 pax), Alice paid 66.00 tipping 13%
39 | [ Jan 2] +20.00 = -2.00 : cinema : 40.00 (2 pax), Bob paid 40.00
40 | [ Jan 10] -13.75 = -15.75 : pizza : 50.00 (3 pax), Mallory paid 55.00 tipping 10%
41 | ```
42 | *)
43 |
44 | open Printf
45 |
46 | let fail fmt = ksprintf failwith fmt
47 |
48 | let money x = (if x < 0 then "-" else "") ^ sprintf "%d.%02d" (abs x / 100) (abs x mod 100)
49 | let delta x = (if x > 0 then "+" else "") ^ money x
50 | let pr fmt = ksprintf print_endline fmt
51 |
52 | let sum = List.fold_left (fun acc (_,x) -> acc + x) 0
53 |
54 | let compute ?track l =
55 | let h = Hashtbl.create 10 in
56 | let bal who = try Hashtbl.find h who with Not_found -> 0 in
57 | let tracked = ref 0 in
58 | l |> List.iter begin fun (`Date date,where,party,payer) ->
59 | assert (payer <> []);
60 | let paid = sum payer in
61 | let (bill,party) =
62 | match party with
63 | | `Items (bill,l) -> assert (l<>[]); bill, l
64 | | `Share' l -> assert (l<>[]); paid, l |> List.map (fun who -> who, paid / List.length l)
65 | | `Share (bill, l) -> assert (l<>[]); bill, l |> List.map (fun who -> who, bill / List.length l)
66 | in
67 | let total = sum party in
68 | let extra = bill - total in
69 | let taxes =
70 | if extra = 0 then
71 | ""
72 | else if extra < List.length party then (* due to integer division, passed onto payer *)
73 | (* sprintf " incl. extra %s" (money extra) *)
74 | ""
75 | else
76 | sprintf " incl. extra %s (%.2f%%)" (money extra) (100. *. float extra /. float total)
77 | in
78 | let tips = if paid - bill <> 0 then sprintf " tipping %d%%" (100 * (paid - bill) / bill) else "" in
79 | if paid < bill then fail "bill %s < paid %s" (money bill) (money paid);
80 | if bill < total then fail "bill %s < total %s" (money bill) (money total);
81 | party |> List.iter (fun (who,x) -> Hashtbl.replace h who (bal who - x - x * (paid - total) / total));
82 | payer |> List.iter (fun (who,x) -> Hashtbl.replace h who (bal who + x));
83 | let track =
84 | match track with
85 | | None -> Some ""
86 | | Some name when bal name = 0 && !tracked = 0 -> None
87 | | Some name ->
88 | let diff = bal name - !tracked in
89 | tracked := bal name;
90 | Some (sprintf "%7s = %7s :" (if diff = 0 then "" else delta diff) (money @@ bal name))
91 | in
92 | begin match track with
93 | | None -> ()
94 | | Some track ->
95 | pr "[%8s] %s %-20s : %s (%d pax)%s, %s paid %s%s"
96 | date track where (money bill) (List.length party) taxes (String.concat " " @@ List.map fst payer) (money paid) tips;
97 | end
98 | end;
99 | pr "";
100 | h
101 |
102 | let show_standings h =
103 | Hashtbl.fold (fun k v acc -> (k,v)::acc) h []
104 | |> List.sort (fun (_,a) (_,b) -> compare a b)
105 | |> List.iter (fun (who,x) -> pr "%s %s" who (money x))
106 |
107 | let report_paid name ledger =
108 | let x = List.fold_left begin fun acc (_,_,_,payer) ->
109 | List.fold_left (fun acc (who,x) -> acc + if who = name then x else 0) acc payer
110 | end 0 ledger
111 | in
112 | pr "%s %s" name (money x)
113 |
114 | let on x = `Date x
115 | let items l = `Items (sum l, l)
116 | let bill d l = `Items (d,l)
117 | let share' l = `Share' l
118 | let share bill l = `Share (bill, l)
119 |
120 | let report_argv ledger =
121 | match List.tl @@ Array.to_list Sys.argv with
122 | | "report"::name::[]
123 | | name::[] -> show_standings @@ compute ~track:name ledger
124 | | [] -> show_standings @@ compute ledger
125 | | "paid"::name::[] -> report_paid name ledger
126 | | _ -> prerr_endline "wat?"; exit 2
127 |
--------------------------------------------------------------------------------
/php_serialize.ml:
--------------------------------------------------------------------------------
1 | (**
2 | PHP serialization
3 | http://php.net/manual/en/function.serialize.php
4 |
5 | This is free and unencumbered software released into the public domain.
6 | For more information, please refer to
7 | *)
8 |
9 | open ExtLib
10 |
11 | (* open Prelude *)
12 | let (>>) x f = f x
13 | let ($) f g = function x -> f (g x)
14 |
15 | (*
16 |
17 |
18 | Anatomy of a serialize()'ed value:
19 |
20 | String
21 | s:size:value;
22 |
23 | Integer
24 | i:value;
25 |
26 | Boolean
27 | b:value; (does not store "true" or "false", does store '1' or '0')
28 |
29 | Null
30 | N;
31 |
32 | Array
33 | a:size:{key definition;value definition;(repeated per element)}
34 |
35 | Object
36 | O:strlen(object name):object name:object size:{s:strlen(property name):property name:property definition;(repeated per property)}
37 |
38 | String values are always in double quotes
39 | Array keys are always integers or strings
40 | "null => 'value'" equates to 's:0:"";s:5:"value";',
41 | "true => 'value'" equates to 'i:1;s:5:"value";',
42 | "false => 'value'" equates to 'i:0;s:5:"value";',
43 | "array(whatever the contents) => 'value'" equates to an "illegal offset type" warning because you can't use an
44 | array as a key; however, if you use a variable containing an array as a key, it will equate to 's:5:"Array";s:5:"value";',
45 | and
46 | attempting to use an object as a key will result in the same behavior as using an array will.
47 | *)
48 |
49 | type php = AI of (int * php) list | AS of (string * php) list | S of string | I of int | B of bool | F of float | N
50 |
51 | let check x y = if x <> y then failwith (Printf.sprintf "Php_serialize failed : %u <> %u" x y)
52 |
53 | let rec parse_one = parser
54 | | [< ''a'; '':'; n=number; '':'; ''{'; a=parse_array; ''}' >] -> ignore n;(*check n (List.length a);*) a
55 | | [< ''b'; '':'; n=number; '';' >] -> B (0 <> n)
56 | | [< ''d'; '':'; f=parse_float_semi; >] -> F f
57 | | [< n=parse_int >] -> I n
58 | | [< s=parse_str >] -> S s
59 | | [< ''N'; '';' >] -> N
60 | and number t = parse_nat 0 t
61 | and parse_nat n = parser (* overflow test?* *)
62 | | [< ''0'..'9' as c; t >] -> let digit = Char.code c - Char.code '0' in parse_nat (n * 10 + digit) t
63 | | [< >] -> n
64 | and integer = parser
65 | | [< ''-'; t >] -> - (number t)
66 | | [< t >] -> number t
67 | and parse_int = parser
68 | | [< ''i'; '':'; n=integer; '';' >] -> n
69 | and parse_float_semi t = (* ugly, because of one look ahead token FIXME *)
70 | let buf = Scanf.Scanning.from_function (fun () -> Stream.next t) in
71 | Scanf.bscanf buf "%f;" (fun f -> f)
72 | and parse_str = parser
73 | | [< ''s'; '':'; n=number; '':'; ''"'; s=take_string n; ''"'; '';' >] -> s
74 | and take_string n t = String.init n (fun _ -> Stream.next t)
75 | and parse_array = parser
76 | | [< k=parse_int; v=parse_one; a=parse_int_array [k,v] >] -> AI a
77 | | [< k=parse_str; v=parse_one; a=parse_str_array [k,v] >] -> AS a
78 | | [< >] -> AI [] (* empty array *)
79 | and parse_int_array acc = parser
80 | | [< k=parse_int; v=parse_one; t >] -> parse_int_array ((k,v)::acc) t
81 | | [< >] -> List.rev acc
82 | and parse_str_array acc = parser
83 | | [< k=parse_str; v=parse_one; t >] -> parse_str_array ((k,v)::acc) t
84 | | [< >] -> List.rev acc
85 |
86 | let parse stream =
87 | let show () =
88 | let tail = Stream.npeek 10 stream >> List.map (String.make 1) >> String.concat "" in
89 | Printf.sprintf "Position %u : %s" (Stream.count stream) tail
90 | in
91 | try
92 | let r = parse_one stream in
93 | Stream.empty stream; r
94 | with
95 | | Stream.Error _ | Stream.Failure -> failwith (show ())
96 |
97 | let parse_string = parse $ Stream.of_string
98 |
99 | (** Combinators for easy deconstruction *)
100 |
101 | exception Error of string
102 |
103 | let fail v str = raise (Error (Printf.sprintf "%s : %s" str (Std.dump v)))
104 |
105 | let int = function I n -> n | x -> fail x "int"
106 | let str = function S s -> s | x -> fail x "str"
107 |
108 | let opt k x = try Some (k x) with Error _ -> None
109 |
110 | let values f = function
111 | | AS a -> List.map (f $ snd) a
112 | | AI a -> List.map (f $ snd) a
113 | | x -> fail x "values"
114 |
115 | let array f = function
116 | | AS a -> List.map (fun (k,v) -> k, f v) a
117 | | x -> fail x "array"
118 |
119 | let assoc php name =
120 | match php with
121 | | AS a -> List.assoc name a
122 | | _ -> fail php "assoc"
123 |
124 | module Out = struct
125 |
126 | (** Combinators to build values of [php] type *)
127 |
128 | let str s = S s
129 | let int n = I n
130 |
131 | let array f e = AI (e >> Enum.mapi (fun i x -> i, f x) >> List.of_enum)
132 | let iarray f e = AI (e >> Enum.map (fun (k,v) -> k, f v) >> List.of_enum)
133 | let sarray f e = AS (e >> Enum.map (fun (k,v) -> k, f v) >> List.of_enum)
134 |
135 | (** Serialize [php] value *)
136 | let output out v =
137 | let put_arr f a = IO.printf out "a:%u:{" (List.length a); List.iter f a; IO.write out '}' in
138 | let rec put = function
139 | | AS a -> put_arr (fun (k,v) -> put (S k); put v) a
140 | | AI a -> put_arr (fun (k,v) -> put (I k); put v) a
141 | | I n -> IO.printf out "i:%i;" n
142 | | B b -> IO.printf out "b:%u;" (if b then 1 else 0)
143 | | F f -> IO.printf out "d:%f;" f
144 | | N -> IO.nwrite out "N;"
145 | | S s -> IO.printf out "s:%u:\"%s\";" (String.length s) s
146 | in
147 | put v
148 |
149 | end
150 |
151 | let to_string v =
152 | let out = IO.output_string () in
153 | Out.output out v;
154 | IO.close_out out
155 |
156 |
--------------------------------------------------------------------------------
/pmp:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # This program is part of Aspersa (http://code.google.com/p/aspersa/)
3 |
4 | # ########################################################################
5 | # This script aggregates GDB stack traces for a selected program. By default it
6 | # does mysqld.
7 | #
8 | # Author: Baron Schwartz, based on a script by Domas Mituzas at
9 | # poormansprofiler.org
10 | # ########################################################################
11 |
12 | # License: BSD 3-clause
13 | # https://opensource.org/licenses/BSD-3-Clause
14 |
15 | # Print a usage message and exit.
16 | usage() {
17 | if [ "${OPT_ERR}" ]; then
18 | echo "${OPT_ERR}"
19 | fi
20 | cat <<-USAGE
21 | Usage: $0 [OPTIONS] [FILE]
22 | $0 does two things: 1) get a GDB backtrace 2) aggregate it.
23 | If you specify a FILE, then step 1) is not performed.
24 | Options:
25 | -b BINARY Which binary to trace (default mysqld)
26 | -i ITERATIONS How many traces to gather and aggregate (default 10)
27 | -k KEEPFILE Keep the raw traces in this file after aggregation
28 | -l NUMBER Aggregate only first NUMBER functions; 0=infinity (default 16)
29 | -p PID Process ID of the process to trace; overrides -b
30 | -s SLEEPTIME Number of seconds to sleep between iterations (default 0)
31 | -t THREAD Threads to trace (default all)
32 | --no-demangle Do not demangle OCaml functions, print raw symbol names
33 | USAGE
34 | exit 1
35 | }
36 |
37 | # Actually does the aggregation. The arguments are the max number of functions
38 | # to aggregate, and the files to read. If maxlen=0, it means infinity. We have
39 | # to pass the maxlen argument into this function to make maxlen testable.
40 | aggregate_stacktrace() {
41 | maxlen="$1";
42 | demangle="$3";
43 | cat > ${tmpfile}.awk <( *\$|::)/ ) {
87 | if ( 0 == gsub(/<[^<>]*>/, "", targ) ) {
88 | break;
89 | }
90 | }
91 | # Remove void and const decorators.
92 | gsub(/ (void|const) /, "", targ);
93 | gsub(/ /, "", targ);
94 | }
95 | else if ( targ ~ /\\?\\?/ && \$2 ~ /[1-9]/ ) {
96 | # Substitute ?? by the name of the library.
97 | targ = \$NF;
98 | while ( targ ~ /\\// ) {
99 | targ = substr(targ, index(targ, "/") + 1);
100 | }
101 | targ = substr(targ, 1, index(targ, ".") - 1);
102 | targ = targ "::??";
103 | }
104 | else if ( ${demangle} == 1 && targ ~ /^camlDune__exe__/) {
105 | sub(/^camlDune__exe__/,"",targ);
106 | }
107 | else if ( ${demangle} == 1 && targ ~ /^caml/ && targ !~ /^caml_/ ) {
108 | a = split(targ, p, "_");
109 | if (a > 1 && p[a] ~ /^[0-9]+$/)
110 | {
111 | if (p[a-1] == "fun")
112 | {
113 | targ = substr(targ, 1, length(targ) - length(p[a]) - length(p[a-1]) - 1) "#" p[a]
114 | }
115 | else
116 | {
117 | targ = substr(targ, 1, length(targ) - length(p[a]) - 1)
118 | }
119 | }
120 | gsub(/__/, ".", targ);
121 | sub(/^caml/, "", targ);
122 | }
123 | # get rid of long symbol names such as 'pthread_cond_wait@@GLIBC_2.3.2'
124 | if ( targ ~ /@@/ ) {
125 | fname = substr(targ, 1, index(targ, "@@") - 1);
126 | }
127 | else {
128 | fname = targ;
129 | }
130 | if ( ${maxlen:-0} == 0 || c < ${maxlen:-0} ) {
131 | if (s != "" ) {
132 | s = s " " fname;
133 | }
134 | else {
135 | s = fname;
136 | }
137 | }
138 | c++;
139 | }
140 | END {
141 | print s
142 | }
143 | EOF
144 | awk -f ${tmpfile}.awk "$2" | sort | uniq -c | sort -n -k 1,1
145 | }
146 |
147 | # The main program to run.
148 | main() {
149 | export tmpfile=$(tempfile --prefix=pmp);
150 |
151 | # Get command-line options
152 | for o; do
153 | case "${o}" in
154 | --)
155 | shift; break;
156 | ;;
157 | --help)
158 | usage;
159 | ;;
160 | -b)
161 | shift; OPT_b="${1}"; shift;
162 | ;;
163 | -i)
164 | shift; OPT_i="${1}"; shift;
165 | ;;
166 | -k)
167 | shift; OPT_k="${1}"; shift;
168 | ;;
169 | -l)
170 | shift; OPT_l="${1}"; shift;
171 | ;;
172 | -p)
173 | shift; OPT_p="${1}"; shift;
174 | ;;
175 | -s)
176 | shift; OPT_s="${1}"; shift;
177 | ;;
178 | -t)
179 | shift; OPT_t="${1}"; shift;
180 | ;;
181 | --no-demangle)
182 | shift; OPT_demangle=0;
183 | ;;
184 | -*)
185 | OPT_ERR="Unknown option ${o}."
186 | usage
187 | ;;
188 | esac
189 | done
190 | export OPT_i="${OPT_i:-10}";
191 | export OPT_k="${OPT_k:-}";
192 | export OPT_l="${OPT_l:-16}";
193 | export OPT_b="${OPT_b:-mysqld}";
194 | export OPT_p="${OPT_p:-}";
195 | export OPT_s="${OPT_s:-0}";
196 | export OPT_t="${OPT_t:-all}";
197 | export OPT_demangle="${OPT_demangle:-1}";
198 |
199 | if [ -z "${1}" ]; then
200 | # There's no file to analyze, so we'll make one.
201 | if [ -z "${OPT_p}" ]; then
202 | OPT_p=$(pidof -s "${OPT_b}" 2>/dev/null);
203 | if [ -z "${OPT_p}" ]; then
204 | OPT_p=$(pgrep -o -x "${OPT_b}" 2>/dev/null)
205 | fi
206 | if [ -z "${OPT_p}" ]; then
207 | OPT_p=$(ps -eaf | grep "${OPT_b}" | grep -v grep | awk '{print $2}' | head -n1);
208 | fi
209 | fi
210 | date;
211 | for x in $(seq 1 $OPT_i); do
212 | gdb -ex "set pagination 0" -ex "thread apply $OPT_t bt" -batch /proc/$OPT_p/exe -p $OPT_p >> "${OPT_k:-${tmpfile}}"
213 | date +'TS %N.%s %F %T' >> "${OPT_k:-${tmpfile}}"
214 | sleep $OPT_s
215 | done
216 | fi
217 |
218 | if [ $# -eq 0 ]; then
219 | aggregate_stacktrace "${OPT_l}" "${OPT_k:-${tmpfile}}" "${OPT_demangle}"
220 | else
221 | aggregate_stacktrace "${OPT_l}" "$@" "${OPT_demangle}"
222 | fi
223 | rm -f ${tmpfile} ${tmpfile}.awk
224 | }
225 |
226 | # Execute the program if it was not included from another file. This makes it
227 | # possible to include without executing, and thus test.
228 | if [ "$(basename "$0")" = "pmp" ] || [ "$(basename "$0")" = "bash" -a "$_" = "$0" ]; then
229 | main "$@"
230 | fi
231 |
--------------------------------------------------------------------------------
/pmpa:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env bash
2 |
3 | # Based on a pmp script from Aspersa (http://code.google.com/p/aspersa/) by Baron Schwartz,
4 | # in turn based on a script by Domas Mituzas at http://poormansprofiler.org
5 |
6 | # Description: https://inbox.ocaml.org/caml-list/20110807150719.34376e5e605354e296c528ca@gmail.com/
7 |
8 | # License: BSD 3-clause
9 | # https://opensource.org/licenses/BSD-3-Clause
10 |
11 | usage()
12 | {
13 | if [ "${OPT_ERR}" ]; then
14 | echo "E: ${OPT_ERR}"
15 | fi
16 | cat <<-USAGE
17 | Usage: $0 [OPTIONS]
18 | $0 does two things:
19 | 1) sets a breakpoint and gets a backtrace using GDB
20 | 2) aggregates it
21 | Options:
22 | -i ITERATIONS How many traces to gather and aggregate (default 10)
23 | -l NUMBER Aggregate only first NUMBER functions (default 16)
24 | -p PID Process ID of the process to trace
25 | -t FUNCTION Function to break on (default caml_gc_dispatch)
26 | USAGE
27 | exit 1
28 | }
29 |
30 |
31 | main ()
32 | {
33 | for o; do
34 | case "${o}" in
35 | --help)
36 | usage;
37 | ;;
38 | -p)
39 | shift; OPT_p="${1}"; shift;
40 | ;;
41 | -i)
42 | shift; OPT_i="${1}"; shift;
43 | ;;
44 | -t)
45 | shift; OPT_t="${1}"; shift;
46 | ;;
47 | -l)
48 | shift; OPT_l="${1}"; shift;
49 | ;;
50 | -*)
51 | OPT_ERR="unknown option ${o}";
52 | usage
53 | ;;
54 | esac
55 | done
56 | export OPT_p="${OPT_p:-}";
57 | export OPT_i="${OPT_i:-10}";
58 | export OPT_t="${OPT_t:-caml_gc_dispatch}";
59 | export OPT_l="${OPT_l:-16}";
60 |
61 | if [ ! "${OPT_p}" ] ; then
62 | OPT_ERR="PID not specified";
63 | usage
64 | fi
65 |
66 | for i in $(seq 1 $OPT_i); do
67 | gdb -p "$OPT_p" "/proc/$OPT_p/exe" -batch -n -q -ex 'set interactive off' -ex "b $OPT_t" -ex 'c' -ex 'bt'\
68 | |awk '/^#[^0]/ {print $4}' \
69 | |sed -e s/^camlDune__exe__// -e s/__/./g -e 's/^caml\([^_]\)/\1/' \
70 | |grep -Fv caml_garbage_collection |grep -Fv caml_call_gc \
71 | |uniq |head -n "$OPT_l" |paste -s -d , ;
72 | done |sort |uniq -c |sort -n;
73 | }
74 |
75 | main $@
76 |
--------------------------------------------------------------------------------
/pvolume.sh:
--------------------------------------------------------------------------------
1 | #!/bin/dash
2 |
3 | SINK=$(pactl info | grep Default.Sink | awk '{print $3}')
4 | SOURCE=$(pactl info | grep Default.Source | awk '{print $3}')
5 |
6 | #echo $SINK
7 | #set -x
8 |
9 | case "$1" in
10 | "up") pactl set-sink-mute $SINK 0; pactl set-sink-volume $SINK +5% ;;
11 | "down") pactl set-sink-mute $SINK 0; pactl -- set-sink-volume $SINK -5% ;;
12 | "mute-out") pactl set-sink-mute $SINK toggle ;;
13 | "mute-in") pactl set-source-mute $SOURCE toggle ;;
14 | "next-out")
15 | OTHER_SINK=$(pactl list sinks | grep Name: | awk '{print $2}' | ~/bin/next.ml.exe "$SINK")
16 | if [ "$OTHER_SINK" != "$SINK" ]; then
17 | pactl set-default-sink "$OTHER_SINK"
18 | notify-send -u normal -i audio-volume-medium-symbolic -t 5000 "Sound output" "$(printf "Now playing on %s" "$OTHER_SINK")"
19 | fi
20 | ;;
21 | *) echo unknown command; exit 2;;
22 | esac
23 |
--------------------------------------------------------------------------------