Getting ready for program analysis with OCaml
5 exercises • ~2 hours • No tests — guided tutorials
By the end of this module, you will be able to:
List.map/fold, StringMap, StringSet, and refocamllex/Menhir grammar rules for a simple parserMatch on AST node types directly. The compiler warns you if you forget a case.
ASTs, lattice values, and analysis results are all naturally expressed as ADTs.
Strong static types catch bugs at compile time — no null pointer surprises.
Functional style means fewer side effects, easier reasoning about program state.
Signatures + functors let you write generic analyses parameterized by abstract domains.
ocamllex and Menhir provide industrial-strength lexer/parser generators.
(* Immutable binding *)
let x = 42
(* Function with type annotations *)
let square (n : int) : int = n * n
(* Multiple arguments *)
let add (a : int) (b : int) : int = a + b
let...inlet hypotenuse a b =
let a2 = a *. a in
let b2 = b *. b in
Float.sqrt (a2 +. b2)
let abs x =
if x >= 0 then x else -x
(* Returns a value -- no "return" keyword *)
let classify n =
if n > 0 then "positive"
else if n < 0 then "negative"
else "zero"
Everything is an expression that produces a value. There are no statements.
square, is_empty, greet, is_digit, classify_char.
(* A position is (line, column) *)
type pos = int * int
let origin : pos = (1, 1)
(* Destructure in function args *)
let format_pos ((line, col) : pos) : string =
Printf.sprintf "line %d, col %d" line col
(* Pattern match *)
let advance_pos (line, col) c =
if c = '\n' then (line + 1, 1)
else (line, col + 1)
Tuples are positional — access by pattern matching, not by name.
type assignment = {
var_name : string;
value : int;
line : int;
}
let a = { var_name = "x"; value = 5; line = 1 }
(* Field access *)
let name = a.var_name
(* Functional update -- creates a NEW record *)
let a' = { a with value = a.value + 3 }
Records are immutable by default. Use { r with field = new_val } to "update".
let greeting = "Hello, " ^ "world!" (* "Hello, world!" *)
let msg = "x = " ^ string_of_int 42 (* "x = 42" *)
(* Print to stdout *)
Printf.printf "name = %s, age = %d\n" "Alice" 30
(* Format to a string *)
let s = Printf.sprintf "[%s: %s]" "keyword" "if"
(* Common format specifiers *)
(* %d int %s string %b bool
%f float %c char %B bool (true/false) *)
(* This is a compile error -- OCaml checks format types! *)
Printf.printf "%d" "not an int"
(* Error: This expression has type string but ... expected int *)
Printf is checked at compile time. No %s-on-an-int crashes.
ADTs let you define types with multiple variants, each carrying different data. They are the backbone of ASTs in program analysis.
(* Binary operators *)
type op = Add | Sub | Mul
(* Expression tree -- a mini AST *)
type expr =
| Num of int
| Var of string
| BinOp of op * expr * expr
Each variant is a constructor that tags the data it carries.
(* 2 + 3 *)
let e1 = BinOp (Add, Num 2, Num 3)
(* x * (1 + y) *)
let e2 = BinOp (Mul, Var "x",
BinOp (Add, Num 1, Var "y"))
Shared_ast.Ast_types defines expr, stmt, func_def, and program using exactly this pattern.
match...with is OCaml's most powerful control structure. It destructures values and the compiler ensures you handle every case.
let string_of_op o =
match o with
| Add -> "+"
| Sub -> "-"
| Mul -> "*"
let rec string_of_expr e =
match e with
| Num n -> string_of_int n
| Var x -> x
| BinOp (o, l, r) ->
Printf.sprintf "(%s %s %s)"
(string_of_expr l)
(string_of_op o)
(string_of_expr r)
(* If you forget a case: *)
let bad o = match o with
| Add -> "+"
| Sub -> "-"
(* Warning 8: this pattern-matching
is not exhaustive.
Here is an example of a case
that is not matched: Mul *)
let classify (x, y) = match (x, y) with
| (0, 0) -> "origin"
| (0, _) -> "y-axis"
| (_, 0) -> "x-axis"
| _ -> "other"
let rec(* Count nodes in an expression tree *)
let rec count_nodes e =
match e with
| Num _ | Var _ -> 1
| BinOp (_, l, r) ->
1 + count_nodes l + count_nodes r
(* Tree depth *)
let rec depth e =
match e with
| Num _ | Var _ -> 1
| BinOp (_, l, r) ->
1 + max (depth l) (depth r)
(* Option type: Some x or None *)
type 'a option = Some of 'a | None
(* Evaluate if no variables present *)
let rec eval e =
match e with
| Num n -> Some n
| Var _ -> None (* can't evaluate *)
| BinOp (o, l, r) ->
match eval l, eval r with
| Some a, Some b ->
Some (apply_op o a b)
| _ -> None
Some n = known, None = unknown.
Tree transformations are the core mechanic of program analysis. You will do this constantly in Modules 2-6.
(* substitute "x" 5 (x * (1 + y)) --> (5 * (1 + y)) *)
let rec substitute var_name value e =
match e with
| Num _ -> e
| Var x -> if x = var_name then Num value else e
| BinOp (o, l, r) ->
BinOp (o, substitute var_name value l,
substitute var_name value r)
(* simplify (10 + 20) - 5 --> 25 *)
let rec simplify e =
match e with
| Num _ | Var _ -> e
| BinOp (o, l, r) ->
match simplify l, simplify r with
| Num a, Num b -> Num (apply_op o a b) (* fold! *)
| l', r' -> BinOp (o, l', r') (* can't fold *)
(* Immutable linked lists *)
let xs = [1; 2; 3; 4; 5]
let ys = 0 :: xs (* prepend: [0;1;2;3;4;5] *)
(* Pattern match on lists *)
let rec length = function
| [] -> 0
| _ :: rest -> 1 + length rest
List.map — transform each elementlet double_all xs = List.map (fun x -> x * 2) xs
(* double_all [1;2;3] = [2;4;6] *)
List.filter — keep matching elementslet keep_positive xs =
List.filter (fun x -> x > 0) xs
(* keep_positive [-1;3;0;5] = [3;5] *)
List.fold_left — reduce to a single valuelet sum xs =
List.fold_left (fun acc x -> acc + x) 0 xs
(* sum [1;2;3;4] = 10 *)
fold_left everywhere — building environments from lists of assignments, accumulating analysis results, computing fixpoints.
OCaml's standard library provides immutable, balanced-tree-backed Map and Set via functors.
module StringMap = Map.Make(String)
(* Build from a list of pairs *)
let build_env pairs =
List.fold_left
(fun env (k, v) -> StringMap.add k v env)
StringMap.empty
pairs
(* Lookup with Option *)
let lookup env name =
StringMap.find_opt name env
(* Get all keys *)
let all_vars env =
List.map fst (StringMap.bindings env)
module StringSet = Set.Make(String)
let s1 = StringSet.of_list ["x"; "y"; "z"]
let s2 = StringSet.of_list ["y"; "z"; "w"]
(* Set operations *)
let union = StringSet.union s1 s2
let inter = StringSet.inter s1 s2
let diff = StringSet.diff s1 s2
(* Convert to list *)
StringSet.elements inter (* ["y"; "z"] *)
StringSet for live-variable sets, reaching-definition sets, and taint sets. Map stores variable-to-abstract-value bindings.
type assignment = {
var_name : string;
value : int;
line : int;
}
let a = { var_name="x"; value=5; line=1 }
(* Format for display *)
let format_assign a =
Printf.sprintf "%s = %d (line %d)"
a.var_name a.value a.line
(* Functional update *)
let increment_value a n =
{ a with value = a.value + n }
ref(* ref creates a mutable cell *)
let counter = ref 0
(* Read with ! *)
let current = !counter (* 0 *)
(* Write with := *)
counter := !counter + 1 (* now 1 *)
(* Closure over a ref -- a counter factory *)
let make_counter () =
let n = ref 0 in
fun () ->
let v = !n in
n := v + 1;
v
let next = make_counter ()
(* next() = 0, next() = 1, next() = 2 *)
ref in fixpoint loops (Modules 3-4) where a worklist updates until convergence.
OCaml modules group related types, values, and functions. Signatures describe the interface; structures provide the implementation.
module type LATTICE = sig
type t
val bottom : t
val top : t
val join : t -> t -> t
val equal : t -> t -> bool
val to_string : t -> string
end
The signature says what exists. The type t is abstract — callers cannot see its representation.
module BoolLattice : LATTICE
with type t = bool
= struct
type t = bool
let bottom = false
let top = true
let join a b = a || b
let equal a b = (a = b)
let to_string b =
if b then "true" else "false"
end
The with type t = bool makes the type transparent so callers can pass true/false directly.
A functor is a function from modules to modules. It lets you write generic code parameterized by an interface.
(* MakeEnv takes any LATTICE and produces an environment module *)
module MakeEnv (L : LATTICE) = struct
module M = Map.Make(String)
type t = L.t M.t (* map from string to L.t *)
let empty = M.empty
let lookup env x =
match M.find_opt x env with
| Some v -> v
| None -> L.bottom (* missing = bottom *)
let update env x v = M.add x v env
let join env1 env2 =
M.union (fun _k v1 v2 -> Some (L.join v1 v2)) env1 env2
end
module Env = MakeEnv(ThreeValueLattice) (* concrete environment *)
let env = Env.update Env.empty "x" Zero (* use it! *)
lib/abstract_domains/abstract_env.ml. In Modules 3-4, you will plug in sign domains, interval domains, and taint domains as the LATTICE parameter.
A lattice is a partially ordered set with a least element (bottom), greatest element (top), and a join operation (least upper bound).
type three_value =
| Bot | Zero | Positive | Unknown
module ThreeValueLattice
: LATTICE with type t = three_value
= struct
type t = three_value
let bottom = Bot
let top = Unknown
let join a b =
if a = b then a
else if a = Bot then b
else if b = Bot then a
else Unknown
let equal a b = (a = b)
let to_string = function
| Bot -> "Bot" | Zero -> "Zero"
| Positive -> "Positive"
| Unknown -> "Unknown"
end
Every abstract domain in program analysis forms a lattice:
A parser turns source text into an AST. OCaml provides two tools that work together:
.mll files)let digit = ['0'-'9']
let alpha = ['a'-'z' 'A'-'Z' '_']
rule token = parse
| [' ' '\t' '\n']+ { token lexbuf }
| '+' { PLUS }
| '-' { MINUS }
| digit+ as n { INT (int_of_string n) }
| alpha+ as id { IDENT id }
| eof { EOF }
.mly files)%token <int> INT
%token <string> IDENT
%token PLUS MINUS STAR SLASH
%left PLUS MINUS (* precedence *)
%left STAR SLASH
%%
program: e=expr EOF { e } ;
expr:
| e1=expr PLUS e2=expr
{ BinOp(Add, e1, e2) }
| a=atom { a } ;
atom:
| n=INT { Num n }
| id=IDENT { Var id } ;
Menhir resolves ambiguity in grammars using precedence and associativity declarations.
How should 3 + 4 * 2 be parsed?
We want Option B (standard math precedence).
%left PLUS MINUS (* lowest *)
%left STAR SLASH (* higher *)
%nonassoc UMINUS (* highest *)
%left — left-associative: a-b-c = (a-b)-c%right — right-associative%nonassoc — cannot chainatom:
| MINUS a=atom %prec UMINUS
{ Neg a }
;
%prec UMINUS tells Menhir to use the highest precedence for this rule.
| # | Exercise | Time | Key Concepts | Foreshadows |
|---|---|---|---|---|
| 1 | OCaml Basics "Token Classifier" |
~20 min | let, functions, tuples, Printf, char classification |
Lexer helpers |
| 2 | Types and Recursion "Mini Expression Tree" |
~25 min | ADTs, pattern matching, Option, recursive tree ops |
shared_ast expr type |
| 3 | Collections and Records "Variable Tracker" |
~25 min | List.map/fold, StringMap, StringSet, ref |
Dataflow analysis |
| 4 | Modules and Functors "Analysis Domain Builder" |
~25 min | Signatures, structs, functors, LATTICE |
abstract_domains |
| 5 | Calculator Parser | ~25 min | ocamllex, Menhir grammar rules |
Lab 2 parser |
(* EXERCISE: ... *) stubs, run with dune exec, and compare output against the STUDENT_README. No OUnit2 tests — just guided tutorials.
Shared_ast.Ast_types from Module 2.
Abstract_domains.Abstract_env.MakeEnv from Module 4.
ref only when neededLATTICE + MakeEnv pattern recurs throughout the bootcampocamllex for lexing (regex-based tokenization)Menhir for parsing (grammar rules producing AST nodes)Now that you are comfortable with OCaml, Module 1 introduces the theory behind program analysis.
| Module 0 Concept | Module 1+ Usage |
|---|---|
| ADTs + pattern matching | AST traversal |
| Map + Set | Dataflow facts |
LATTICE signature | Abstract domains |
MakeEnv functor | Abstract environments |
| Menhir parser | Lab 2 parser extension |