├── .gitignore
├── .merlin
├── .travis.yml
├── CHANGES.md
├── LICENSE.md
├── README.md
├── _tags
├── doc
    └── api.odocl
├── example
    ├── keys
    │   ├── config.ml
    │   └── unikernel.ml
    └── outlines
    │   ├── config.ml
    │   └── unikernel.ml
├── lwt
    └── echo_server.ml
├── mirage
    ├── telnet_mirage.ml
    ├── telnet_mirage.mli
    └── telnet_mirage.mllib
├── opam
├── pkg
    ├── META
    └── pkg.ml
├── rfc
    ├── rfc1409.txt
    ├── rfc1572.txt
    ├── rfc5198.txt
    ├── rfc854.txt
    ├── rfc855.txt
    ├── rfc856.txt
    └── rfc857.txt
└── src
    ├── server.ml
    ├── telnet.ml
    ├── telnet.mli
    ├── telnet.mllib
    └── wire.ml


/.gitignore:
--------------------------------------------------------------------------------
 1 | _build/
 2 | *.native
 3 | *.byte
 4 | *.install
 5 | 
 6 | Makefile
 7 | key_gen.ml
 8 | log
 9 | main.ml
10 | mir-*
11 | *.xe
12 | *.xl
13 | *.xl.in
14 | *_libvirt.xml
15 | 


--------------------------------------------------------------------------------
/.merlin:
--------------------------------------------------------------------------------
 1 | S src
 2 | S mirage
 3 | S lwt
 4 | 
 5 | B _build/**
 6 | 
 7 | PKG cstruct
 8 | PKG lwt
 9 | PKG mirage-types-lwt
10 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: c
 2 | install: wget https://raw.githubusercontent.com/ocaml/ocaml-ci-scripts/master/.travis-opam.sh
 3 | script: bash -ex .travis-opam.sh
 4 | sudo: required
 5 | env:
 6 |   global:
 7 |     - PACKAGE="telnet"
 8 |   matrix:
 9 |     - OCAML_VERSION=4.02
10 |     - OCAML_VERSION=4.03
11 |     - OCAML_VERSION=4.04
12 |     - OCAML_VERSION=4.02 DEPOPTS="mirage-types ipaddr io-page" TESTS=false
13 |     - OCAML_VERSION=4.04 DEPOPTS="mirage-types ipaddr io-page" TESTS=false
14 | notifications:
15 |   email: false
16 | 


--------------------------------------------------------------------------------
/CHANGES.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hannesm/telnet/ff11a380d06161a9300aa212e6c0595026ad9920/CHANGES.md


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | (*
 2 |  * Copyright (c) 2016 Hannes Mehnert <hannes@mehnert.org>
 3 |  *
 4 |  * Permission to use, copy, modify, and distribute this software for any
 5 |  * purpose with or without fee is hereby granted, provided that the above
 6 |  * copyright notice and this permission notice appear in all copies.
 7 |  *
 8 |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 9 |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 |  *
16 |  *)
17 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Telnet - a telnet server implementation in OCaml to be used with MirageOS
2 | 
3 | Status: rough prototype, needs more attention
4 | 
5 | Dependencies: notty on mirage branch (`opam pin add notty https://github.com/pqwy/notty.git#mirage`)
6 | 
7 | [![Build Status](https://travis-ci.org/hannesm/telnet.svg?branch=master)](https://travis-ci.org/hannesm/telnet)
8 | 


--------------------------------------------------------------------------------
/_tags:
--------------------------------------------------------------------------------
 1 | true : color(always), bin_annot, safe_string, principal
 2 | true : warn(+A-4)
 3 | "src" : include
 4 | 
 5 | true : package(cstruct)
 6 | 
 7 | <src/wire.{ml,mli}>: package(cstruct.ppx sexplib)
 8 | 
 9 | <mirage/telnet_mirage.{ml,mli}>: package(mirage-types.lwt lwt)
10 | 
11 | <lwt/echo_server.{ml,mli,native}>: package(lwt lwt.unix sexplib)
12 | 
13 | 


--------------------------------------------------------------------------------
/doc/api.odocl:
--------------------------------------------------------------------------------
1 | Telnet
2 | 


--------------------------------------------------------------------------------
/example/keys/config.ml:
--------------------------------------------------------------------------------
 1 | open Mirage
 2 | 
 3 | let main = foreign "Unikernel.Main" (console @-> stackv4 @-> job)
 4 | 
 5 | let stack = generic_stackv4 default_console tap0
 6 | 
 7 | let () =
 8 |   add_to_opam_packages [ "telnet" ; "notty" ] ;
 9 |   add_to_ocamlfind_libraries [ "telnet"; "telnet.mirage" ; "notty" ; "notty.mirage" ; "notty.unix" ] ;
10 |   register "network" [
11 |     main $ default_console $ stack
12 |   ]
13 | 


--------------------------------------------------------------------------------
/example/keys/unikernel.ml:
--------------------------------------------------------------------------------
  1 | open Lwt.Infix
  2 | 
  3 | let red fmt    = Printf.sprintf ("\027[31m"^^fmt^^"\027[m")
  4 | let green fmt  = Printf.sprintf ("\027[32m"^^fmt^^"\027[m")
  5 | let yellow fmt = Printf.sprintf ("\027[33m"^^fmt^^"\027[m")
  6 | let blue fmt   = Printf.sprintf ("\027[36m"^^fmt^^"\027[m")
  7 | 
  8 | let (>>==) a fb = a >>= function
  9 |   | `Ok x -> fb x
 10 |   | _ -> Lwt.return_unit
 11 | 
 12 | module Main (C: V1_LWT.CONSOLE) (S: V1_LWT.STACKV4) = struct
 13 | 
 14 |   module T = Telnet_mirage.Make (S.TCPV4)
 15 |   module N = Notty_mirage.Term (T)
 16 | 
 17 |   let pps = Format.pp_print_string
 18 |   let ppi = Format.pp_print_int
 19 | 
 20 |   let pp_special fmt = function
 21 |     | `Escape       -> pps fmt "ESCAPE"
 22 |     | `Enter        -> pps fmt "ENTER"
 23 |     | `Tab          -> pps fmt "TAB"
 24 |     | `Backspace    -> pps fmt "BACKSPACE"
 25 |     | `Arrow `Up    -> pps fmt "UP"
 26 |     | `Arrow `Down  -> pps fmt "DOWN"
 27 |     | `Arrow `Left  -> pps fmt "LEFT"
 28 |     | `Arrow `Right -> pps fmt "RIGHT"
 29 |     | `Page `Up     -> pps fmt "PAGE UP"
 30 |     | `Page `Down   -> pps fmt "PAGE DOWN"
 31 |     | `Home         -> pps fmt "HOME"
 32 |     | `End          -> pps fmt "END"
 33 |     | `Insert       -> pps fmt "INSERT"
 34 |     | `Delete       -> pps fmt "DELETE"
 35 |     | `Function n   -> pps fmt "FN"; ppi fmt n
 36 | 
 37 |   let pp_mods fmt = function
 38 |     | [] -> ()
 39 |     | ms -> ms |> List.iter (fun m ->
 40 |         pps fmt @@ match m with `Meta -> "M" | `Ctrl -> "C" | `Shift -> "S"
 41 |       )
 42 | 
 43 |   let pp_mouse fmt = function
 44 |     | `Release -> pps fmt "Release"
 45 |     | `Drag    -> pps fmt "Drag"
 46 |     | `Move    -> pps fmt "Move"
 47 |     | `Press k ->
 48 |       pps fmt "Press ";
 49 |       pps fmt @@ match k with
 50 |       | `Left         -> "Left"
 51 |       | `Middle       -> "Middle"
 52 |       | `Right        -> "Right"
 53 |       | `Scroll `Up   -> "Scroll Up"
 54 |       | `Scroll `Down -> "Scroll Down"
 55 | 
 56 |   let rec take n xs =
 57 |     match n, xs with
 58 |     | 0, _ -> []
 59 |     | _, [] -> []
 60 |     | n, x::xs -> x :: take (pred n) xs
 61 | 
 62 |   let render term (w, h) xs =
 63 |     let open Notty in
 64 |     let magenta = A.(fg lightmagenta ++ bg black)
 65 |     and green   = A.(fg lightgreen   ++ bg black)
 66 |     and blue    = A.(fg lightblue    ++ bg black) in
 67 |     let pp_mods  = I.pp_attr green pp_mods
 68 |     and pp_mouse = I.pp_attr blue pp_mouse in
 69 |     let attr = magenta in
 70 |     let xs = take (h - 3) xs in
 71 |     let msg = I.string A.empty "Push keys."
 72 |     and ks = List.map (function
 73 |         | `Key (`Uchar u, mods) ->
 74 |           I.(uchar blue u 1 1 <|> strf ~attr " u%04x %a" u pp_mods mods)
 75 |         | `Key (#Unescape.key as k, mods) ->
 76 |           I.strf ~attr "%a %a" pp_special k pp_mods mods
 77 |         | `Mouse (e, (x, y), mods) ->
 78 |           I.strf ~attr "MOUSE %a (%d, %d) %a" pp_mouse e x y pp_mods mods
 79 |         | `Resize _ -> Printf.printf "resizing..." ; I.empty
 80 |       ) xs |> I.vcat in
 81 |     let w', h' = N.size term in
 82 |     I.(strf ~attr:A.(fg lightblack) "[ESC quits.] (%d, %d) (%d, %d)" w h w' h' <->
 83 |        vsnap ~align:`Top (h - 3) ks <-> void 0 1 <-> msg |> pad ~l:1 ~t:1)
 84 | 
 85 |   let handle c flow =
 86 |     N.create flow >>= function
 87 |     | `Ok term ->
 88 |       let rec loop (w, h) xs =
 89 |         let img = render term (w, h) xs in
 90 |         Notty_unix.output_image img ;
 91 |         N.write term (`Image img) >>== fun () ->
 92 |         N.read term >>= (function
 93 |             | `Ok (`Resize (w, h)) ->
 94 |               C.log_s c (green "resizing to %d, %d" w h) >>= fun () ->
 95 |               loop (w, h) xs
 96 |             | `Ok x ->
 97 |               Printf.printf "ok\n%!" ;
 98 |               loop (w, h) (x :: xs)
 99 |             | `Eof -> C.log_s c (red "eof while reading")
100 |             | `Error e -> C.log_s c (red "error while reading %s" (N.error_message e)))
101 |       in
102 |       loop (80, 24) []
103 |     | `Eof -> C.log_s c (red "eof while creating terminal")
104 |     | `Error e ->
105 |       C.log_s c (red "error creating terminal %s" (N.error_message e))
106 | 
107 |   let start c s =
108 |     S.listen_tcpv4 s ~port:23 (fun flow ->
109 |         let dst, dst_port = S.TCPV4.get_dest flow in
110 |         C.log_s c (green "new tcp connection from %s %d" (Ipaddr.V4.to_string dst) dst_port)
111 |         >>= fun () ->
112 |         T.of_flow flow >>= (function
113 |             | `Ok flow ->
114 |               C.log_s c (green "handling flow now") >>= fun () ->
115 |               handle c flow
116 |             | `Eof -> C.log_s c (red "eof while of_flow")
117 |           ));
118 |     S.listen s
119 | 
120 | end
121 | 
122 | 


--------------------------------------------------------------------------------
/example/outlines/config.ml:
--------------------------------------------------------------------------------
 1 | open Mirage
 2 | 
 3 | let main = foreign "Unikernel.Main" (console @-> stackv4 @-> job)
 4 | 
 5 | let stack = generic_stackv4 default_console tap0
 6 | 
 7 | let () =
 8 |   add_to_opam_packages [ "telnet" ; "notty" ] ;
 9 |   add_to_ocamlfind_libraries [ "telnet"; "telnet.mirage" ; "notty" ; "notty.mirage" ; "notty.unix" ] ;
10 |   register "network" [
11 |     main $ default_console $ stack
12 |   ]
13 | 


--------------------------------------------------------------------------------
/example/outlines/unikernel.ml:
--------------------------------------------------------------------------------
 1 | open Notty
 2 | open Lwt.Infix
 3 | 
 4 | let red fmt    = Printf.sprintf ("\027[31m"^^fmt^^"\027[m")
 5 | let green fmt  = Printf.sprintf ("\027[32m"^^fmt^^"\027[m")
 6 | let yellow fmt = Printf.sprintf ("\027[33m"^^fmt^^"\027[m")
 7 | let blue fmt   = Printf.sprintf ("\027[36m"^^fmt^^"\027[m")
 8 | 
 9 | let grid xxs = xxs |> List.map I.hcat |> I.vcat
10 | 
11 | let outline attr i =
12 |   let (w, h) = I.(width i, height i) in
13 |   let chr x = I.uchar attr x 1 1
14 |   and hbar  = I.uchar attr 0x2500 w 1
15 |   and vbar  = I.uchar attr 0x2502 1 h in
16 |   let (a, b, c, d) = (chr 0x256d, chr 0x256e, chr 0x256f, chr 0x2570) in
17 |   grid [ [a; hbar; b]; [vbar; i; vbar]; [d; hbar; c] ]
18 | 
19 | module Main (C: V1_LWT.CONSOLE) (S: V1_LWT.STACKV4) = struct
20 | 
21 |   module T = Telnet_mirage.Make (S.TCPV4)
22 |   module N = Notty_mirage.Term (T)
23 | 
24 |   open Notty
25 | 
26 |   let rec image = function
27 |     | 0 -> I.string A.(fg lightwhite) "X"
28 |     | n ->
29 |         image (pred n) |>
30 |         outline A.(fg (if n mod 2 = 0 then lightred else lightblack))
31 | 
32 |   let rec loop c term s =
33 |     let img = image s |> I.hsnap 31 |> I.vsnap 31 in
34 |     N.write term (`Image img) >>= function
35 |       | `Ok () -> N.read term >>= fun _ -> loop c term (s + 1)
36 |       | _      -> C.log_s c "shit."
37 | 
38 |   let start c s =
39 |     S.listen_tcpv4 s ~port:23 (fun flow ->
40 |         let dst, dst_port = S.TCPV4.get_dest flow in
41 |         C.log_s c (green "new tcp connection from %s %d" (Ipaddr.V4.to_string dst) dst_port)
42 |         >>= fun () ->
43 |         T.of_flow flow >>= (function
44 |             | `Ok flow ->
45 |               C.log_s c (green "handling flow now") >>= fun () ->
46 |                 N.create flow >>= (function
47 |                     `Ok term -> loop c term 1
48 |                   | `Eof | `Error _ -> C.log_s c "init: shit.")
49 |             | `Eof -> C.log_s c (red "eof while of_flow")
50 |           ));
51 |     S.listen s
52 | 
53 | end
54 | 


--------------------------------------------------------------------------------
/lwt/echo_server.ml:
--------------------------------------------------------------------------------
 1 | open Lwt.Infix
 2 | 
 3 | let string_of_unix_err err f p =
 4 |   Printf.sprintf "Unix_error (%s, %s, %s)"
 5 |     (Unix.error_message err) f p
 6 | 
 7 | let yap ~tag msg = Lwt_io.printf "[%s] %s\n%!" tag msg
 8 | 
 9 | let serve port callback =
10 |   let tag = "server" in
11 | 
12 |   let server_s () =
13 |     let open Lwt_unix in
14 |     let s = socket PF_INET SOCK_STREAM 0 in
15 |     setsockopt s SO_REUSEADDR true ;
16 |     bind s (ADDR_INET (Unix.inet_addr_any, port)) ;
17 |     listen s 10 ;
18 |     s
19 |   in
20 | 
21 |   let handle state fd addr =
22 |     Lwt.async @@ fun () ->
23 |       Lwt.catch (fun () -> callback state fd addr >>= fun () -> yap ~tag "<- handler done")
24 |         (function
25 |           | Unix.Unix_error (e, f, p) ->
26 |             yap ~tag @@ "handler: " ^ (string_of_unix_err e f p)
27 |           | exn -> yap ~tag ("handler: exception " ^ Printexc.to_string exn))
28 |   in
29 | 
30 |   yap ~tag ("-> start @ " ^ string_of_int port) >>= fun () ->
31 |   let rec loop s =
32 |     Lwt.catch (fun () -> Lwt_unix.accept s >|= fun f -> `R f)
33 |       (function
34 |         | Unix.Unix_error (e, f, p) -> Lwt.return (`L (string_of_unix_err e f p))
35 |         | exn -> Lwt.return (`L ("loop: exception " ^ Printexc.to_string exn))) >>= function
36 |     | `R (fd, addr) ->
37 |       yap ~tag "-> connect" >>= fun () ->
38 |       let state, out = Telnet.Server.init () in
39 |       Lwt_unix.write fd (Cstruct.to_string out) 0 (Cstruct.len out) >>= fun _w ->
40 |       ( handle state fd addr ; loop s )
41 |     | `L (msg) ->
42 |       yap ~tag ("server socket: " ^ msg) >>= fun () -> loop s
43 |     in
44 |     loop (server_s ())
45 | 
46 | let lines fd =
47 |   let buf = Bytes.create 256 in
48 |   Lwt_unix.read fd buf 0 256 >>= function
49 |   | 0 -> Lwt.fail (invalid_arg "end of connection")
50 |   | l -> Lwt.return (Bytes.sub buf 0 l)
51 | 
52 | let echo_server port =
53 |   serve port (fun state fd _addr ->
54 |       let rec loop state =
55 |         lines fd >>= fun str ->
56 |         let state, events, options = Telnet.Server.handle state (Cstruct.of_string str) in
57 |         Lwt_unix.write fd (Cstruct.to_string options) 0 (Cstruct.len options) >>= fun _w ->
58 |         Lwt_list.iter_s (fun event ->
59 |             match event with
60 |             | `Resize (w,h) -> yap ~tag:"handler" (Printf.sprintf "size %d,%d" w h)
61 |             | `Data s ->
62 |               yap ~tag:"handler" ("+ " ^ (Cstruct.to_string s)) >>= fun () ->
63 |               let out = Telnet.Server.encode s in
64 |               Printf.printf "out:" ; Cstruct.hexdump out ;
65 |               Lwt_unix.write fd (Cstruct.to_string out) 0 (Cstruct.len out) >>= fun _w ->
66 |               Lwt.return_unit)
67 |           events >>= fun () ->
68 |         loop state
69 |       in
70 |       loop state)
71 | 
72 | let () =
73 |   let port =
74 |     try int_of_string Sys.argv.(1) with _ -> 4433
75 |   in
76 |   Lwt_main.run (echo_server port)
77 | 


--------------------------------------------------------------------------------
/mirage/telnet_mirage.ml:
--------------------------------------------------------------------------------
 1 | open Lwt.Infix
 2 | 
 3 | module type SFLOW = sig
 4 | 
 5 |   type +'a io
 6 | 
 7 |   type input
 8 |   type output
 9 |   type flow
10 |   type error
11 | 
12 |   val error_message : error -> string
13 |   val read   : flow -> [`Ok of input | `Eof | `Error of error ] io
14 |   val write  : flow -> output -> [`Ok of unit | `Eof | `Error of error ] io
15 |   val writev : flow -> output list -> [`Ok of unit | `Eof | `Error of error ] io
16 |   val close  : flow -> unit io
17 | 
18 | end
19 | 
20 | module type SFLOW_LWT = SFLOW with type 'a io = 'a Lwt.t
21 | 
22 | module type TERMINAL_LINK = SFLOW_LWT
23 |   with type input  = [ `Data of Cstruct.t | `Resize of (int * int) ]
24 |    and type output = [ `Data of Cstruct.t | `Line_edit of bool ]
25 | 
26 | let (>>==) a fb = a >>= function
27 |   | `Ok x -> fb x
28 |   | `Error _ | `Eof as e -> Lwt.return e
29 | 
30 | (* : TERMINAL_LINK *)
31 | module Make (F : V1_LWT.FLOW) = struct
32 |   type input  = [ `Data of Cstruct.t | `Resize of (int * int) ]
33 |   type output = [ `Data of Cstruct.t | `Line_edit of bool ]
34 | 
35 |   module FLOW = F
36 | 
37 |   type +'a io = 'a Lwt.t
38 | 
39 |   type flow = {
40 |     flow : F.flow ;
41 |     mutable state : Server.state ;
42 |     mutable linger : input list
43 |   }
44 | 
45 |   type error = F.error
46 | 
47 |   let error_message = F.error_message
48 | 
49 |   let write_i flow buf =
50 |     F.write flow buf >|= fun _w -> `Ok ()
51 | 
52 |   let write s = function
53 |     | `Data buf ->
54 |       let buf = Server.encode buf in
55 |       write_i s.flow buf >|= fun _w -> `Ok ()
56 |     | _ -> Lwt.return (`Ok ())
57 | 
58 |   let rec read s =
59 |     match s.linger with
60 |     | [] ->
61 |       F.read s.flow >>== fun buffer ->
62 |       let state, events, out = Server.handle s.state buffer in
63 |       s.state <- state ;
64 |       write_i s.flow out >>= fun _w ->
65 |       (match events with
66 |        | [] -> read s
67 |        | x::xs -> s.linger <- xs ; Lwt.return (`Ok x))
68 |     | hd::rest ->
69 |       s.linger <- rest ;
70 |       Lwt.return (`Ok hd)
71 | 
72 |   let writev s xs =
73 |     Lwt_list.fold_left_s (fun r x ->
74 |         match r with
75 |         | `Ok () -> write s x
76 |         | `Eof -> Lwt.return `Eof
77 |         | `Error e -> Lwt.return (`Error e))
78 |       (`Ok ())
79 |       xs
80 | 
81 |   let close s = F.close s.flow
82 | 
83 |   let of_flow flow =
84 |     let state, out = Server.init () in
85 |     write_i flow out >>= function
86 |     | `Ok () ->
87 |       let flow = { flow ; state ; linger = [] } in
88 |       Lwt.return (`Ok flow)
89 |     | `Eof -> Lwt.return `Eof
90 | end
91 | 


--------------------------------------------------------------------------------
/mirage/telnet_mirage.mli:
--------------------------------------------------------------------------------
 1 | module type SFLOW = sig
 2 | 
 3 |   type +'a io
 4 | 
 5 |   type input
 6 |   type output
 7 |   type flow
 8 |   type error
 9 | 
10 |   val error_message : error -> string
11 |   val read   : flow -> [`Ok of input | `Eof | `Error of error ] io
12 |   val write  : flow -> output -> [`Ok of unit | `Eof | `Error of error ] io
13 |   val writev : flow -> output list -> [`Ok of unit | `Eof | `Error of error ] io
14 |   val close  : flow -> unit io
15 | 
16 | end
17 | 
18 | module type SFLOW_LWT = SFLOW with type 'a io = 'a Lwt.t
19 | 
20 | module type TERMINAL_LINK = SFLOW_LWT
21 |   with type input  = [ `Data of Cstruct.t | `Resize of (int * int) ]
22 |    and type output = [ `Data of Cstruct.t | `Line_edit of bool ]
23 | 
24 | (* : TERMINAL_LINK *)
25 | module Make (F : V1_LWT.FLOW) : sig
26 |   module FLOW    : V1_LWT.FLOW
27 | 
28 |   include TERMINAL_LINK
29 | 
30 |   val of_flow : FLOW.flow -> [ `Ok of flow | `Eof ] Lwt.t
31 | end
32 |   with module FLOW = F
33 | 


--------------------------------------------------------------------------------
/mirage/telnet_mirage.mllib:
--------------------------------------------------------------------------------
1 | Telnet_mirage


--------------------------------------------------------------------------------
/opam:
--------------------------------------------------------------------------------
 1 | opam-version: "1.2"
 2 | maintainer: ["Hannes Mehnert <hannes@mehnert.org>"]
 3 | authors: ["Hannes Mehnert <hannes@mehnert.org>"]
 4 | homepage:     "https://github.com/hannesm/telnet"
 5 | dev-repo:     "https://github.com/hannesm/telnet.git"
 6 | bug-reports:  "https://github.com/hannesm/telnet/issues"
 7 | doc:          "https://hannesm.github.io/telnet/doc"
 8 | license:      "BSD2"
 9 | 
10 | build: [
11 |   ["ocaml" "pkg/pkg.ml" "build" "--pinned" "%{pinned}%"
12 |     "--with-mirage" "%{mirage-types+ipaddr+io-page:installed}%" ]
13 | ]
14 | 
15 | depends: [
16 |   "ocamlfind" {build}
17 |   "ocamlbuild" {build}
18 |   "ppx_tools" {build}
19 |   "topkg" {build}
20 |   "cstruct" {>= "1.9.0"}
21 |   "lwt"
22 |   "sexplib"
23 | ]
24 | 
25 | depopts: [
26 |   "mirage-types" "ipaddr" "io-page"
27 | ]
28 | 
29 | available: [ ocaml-version >= "4.02.0" ]
30 | 


--------------------------------------------------------------------------------
/pkg/META:
--------------------------------------------------------------------------------
 1 | description = "Telnet protocol and server side"
 2 | version = "%%VERSION_NUM%%"
 3 | requires = "cstruct"
 4 | archive(byte) = "telnet.cma"
 5 | archive(native) = "telnet.cmxa"
 6 | plugin(byte) = "telnet.cma"
 7 | plugin(native) = "telnet.cmxs"
 8 | exists_if = "telnet.cma"
 9 | 
10 | package "mirage" (
11 |   description = "Mirage telnet server"
12 |   version = "%%VERSION_NUM%%"
13 |   requires = "cstruct mirage-types lwt"
14 |   archive(byte) = "telnet_mirage.cma"
15 |   archive(native) = "telnet_mirage.cmxa"
16 |   plugin(byte) = "telnet_mirage.cma"
17 |   plugin(native) = "telnet_mirage.cmxs"
18 |   exists_if = "telnet_mirage.cma"
19 | )


--------------------------------------------------------------------------------
/pkg/pkg.ml:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ocaml
 2 | #use "topfind"
 3 | #require "topkg"
 4 | open Topkg
 5 | 
 6 | let mirage = Conf.with_pkg ~default:false "mirage"
 7 | 
 8 | let () =
 9 |   let opams =
10 |     [ Pkg.opam_file "opam" ~lint_deps_excluding:(Some ["ppx_tools"; "io-page"; "ipaddr"]) ]
11 |   in
12 |   Pkg.describe ~opams "telnet" @@ fun c ->
13 |   let mirage = Conf.value c mirage in
14 |   Ok [
15 |     Pkg.mllib ~api:["Telnet"] "src/telnet.mllib";
16 |     Pkg.mllib ~cond:mirage "mirage/telnet_mirage.mllib" ~dst_dir:"mirage/"
17 |   ]
18 | 


--------------------------------------------------------------------------------
/rfc/rfc1409.txt:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | 
  4 | 
  5 | 
  6 | 
  7 | Network Working Group                                  D. Borman, Editor
  8 | Request for Comments: 1409                           Cray Research, Inc.
  9 |                                                             January 1993
 10 | 
 11 | 
 12 |                       Telnet Authentication Option
 13 | 
 14 | Status of this Memo
 15 | 
 16 |    This memo defines an Experimental Protocol for the Internet
 17 |    community.  Discussion and suggestions for improvement are requested.
 18 |    Please refer to the current edition of the "IAB Official Protocol
 19 |    Standards" for the standardization state and status of this protocol.
 20 |    Distribution of this memo is unlimited.
 21 | 
 22 | 1.  Command Names and Codes
 23 | 
 24 |    AUTHENTICATION  37
 25 |        IS               0
 26 |        SEND             1
 27 |        REPLY            2
 28 |        NAME             3
 29 | 
 30 |        Authentication Types
 31 |        NULL             0
 32 |        KERBEROS_V4      1
 33 |        KERBEROS_V5      2
 34 |        SPX              3
 35 |        RSA              6
 36 |        LOKI            10
 37 | 
 38 |        Modifiers
 39 |        AUTH_WHO_MASK        1
 40 |        AUTH_CLIENT_TO_SERVER    0
 41 |        AUTH_SERVER_TO_CLIENT    1
 42 |        AUTH_HOW_MASK        2
 43 |        AUTH_HOW_ONE_WAY         0
 44 |        AUTH_HOW_MUTUAL          2
 45 | 
 46 | 2.  Command Meanings
 47 | 
 48 |    This document makes reference to a "server" and a "client".  For the
 49 |    purposes of this document, the "server" is the side of the connection
 50 |    that did the passive TCP open (TCP LISTEN state), and the "client" is
 51 |    the side of the connection that did the active open.
 52 | 
 53 | 
 54 | 
 55 | 
 56 | 
 57 | 
 58 | Telnet Working Group                                            [Page 1]
 59 | 
 60 | RFC 1409              Telnet Authentication Option          January 1993
 61 | 
 62 | 
 63 |    IAC WILL AUTHENTICATION
 64 | 
 65 |       The client side of the connection sends this command to indicate
 66 |       that it is willing to send and receive authentication information.
 67 | 
 68 |    IAC DO AUTHENTICATION
 69 | 
 70 |       The servers side of the connection sends this command to indicate
 71 |       that it is willing to send and receive authentication information.
 72 | 
 73 |    IAC WONT AUTHENTICATION
 74 | 
 75 |       The client side of the connection sends this command to indicate
 76 |       that it refuses to send or receive authentication information; the
 77 |       server side sends this command if it receives a DO AUTHENTICATION
 78 |       command.
 79 | 
 80 |    IAC DONT AUTHENTICATION
 81 | 
 82 |       The server side of the connection sends this command to indicate
 83 |       that it refuses to send or receive authentication information; the
 84 |       client side sends this command if it receives a WILL
 85 |       AUTHENTICATION command.
 86 | 
 87 |    IAC SB AUTHENTICATION SEND authentication-type-pair-list IAC SE
 88 | 
 89 |       The sender of this command (the server) requests that the remote
 90 |       side send authentication information for one of the authentication
 91 |       types listed in "authentication-type-pair-list".  The
 92 |       "authentication-type-pair-list" is an ordered list of
 93 |       "authentication-type" pairs.  Only the server side (DO
 94 |       AUTHENTICATION) is allowed to send this.
 95 | 
 96 |    IAC SB AUTHENTICATION IS authentication-type-pair <auth data> IAC SE
 97 | 
 98 |       The sender of this command (the client) is sending the
 99 |       authentication information for authentication type
100 |       "authentication-type-pair".  Only the client side (WILL
101 |       AUTHENTICATION) is allowed to send this.
102 | 
103 |    IAC SB AUTHENTICATION REPLY authentication-type-pair <auth data> IAC
104 |    SE
105 | 
106 |       The sender of this command (the server) is sending a reply to the
107 |       the authentication information received in a previous IS command.
108 |       Only the server side (DO AUTHENTICATION) is allowed to send this.
109 | 
110 | 
111 | 
112 | 
113 | 
114 | Telnet Working Group                                            [Page 2]
115 | 
116 | RFC 1409              Telnet Authentication Option          January 1993
117 | 
118 | 
119 |    IAC SB AUTHENTICATION NAME remote-user IAC SE
120 | 
121 |       This optional command is sent to specify the account name on the
122 |       remote host that the user wishes to be authorized to use.  Note
123 |       that authentication may succeed, and the authorization to use a
124 |       particular account may still fail.  Some authentication mechanisms
125 |       may ignore this command.
126 | 
127 |    The "authentication-type-pair" is two octets, the first is the
128 |    authentication type (as listed in Section 1, additions to this list
129 |    must be registered with the Internet Assigned Numbers Authority
130 |    (IANA)), and the second is a modifier to the type.  There are
131 |    currently two one bit fields defined in the modifier, the
132 |    AUTH_WHO_MASK bit and the AUTH_HOW_MASK bit, so there are four
133 |    possible combinations:
134 | 
135 |       AUTH_CLIENT_TO_SERVER
136 |       AUTH_HOW_ONE_WAY
137 | 
138 |          The client will send authentication information about the local
139 |          user to the server.  If the negotiation is successful, the
140 |          server will have authenticated the user on the client side of
141 |          the connection.
142 | 
143 |       AUTH_SERVER_TO_CLIENT
144 |       AUTH_HOW_ONE_WAY
145 | 
146 |          The server will authenticate itself to the client.  If the
147 |          negotiation is successful, the client will know that it is
148 |          connected to the server that it wants to be connected to.
149 | 
150 |       AUTH_CLIENT_TO_SERVER
151 |       AUTH_HOW_MUTUAL
152 | 
153 |          The client will send authentication information about the local
154 |          user to the server, and then the server will authenticate
155 |          itself to the client.  If the negotiation is successful, the
156 |          server will have authenticated the user on the client side of
157 |          the connection, and the client will know that it is connected
158 |          to the server that it wants to be connected to.
159 | 
160 |       AUTH_SERVER_TO_CLIENT
161 |       AUTH_HOW_MUTUAL
162 | 
163 |          The server will authenticate itself to the client, and then the
164 |          client will authenticate itself to the server.  If the
165 |          negotiation is successful, the client will know that it is
166 |          connected to the server that it wants to be connected to, and
167 | 
168 | 
169 | 
170 | Telnet Working Group                                            [Page 3]
171 | 
172 | RFC 1409              Telnet Authentication Option          January 1993
173 | 
174 | 
175 |          the server will know that the client is who it claims to be.
176 | 
177 | 3.  Default Specification
178 | 
179 |    The default specification for this option is
180 | 
181 |       WONT AUTHENTICATION
182 |       DONT AUTHENTICATION
183 | 
184 |    meaning there will not be any exchange of authentication information.
185 | 
186 | 4.  Motivation
187 | 
188 |    One of the deficiencies of the Telnet protocol is that in order to
189 |    log into remote systems, users have to type their passwords, which
190 |    are passed in clear text through the network.  If the connections
191 |    goes through untrusted networks, there is the possibility that
192 |    passwords will be compromised by someone watching the packets as they
193 |    go by.
194 | 
195 |    The purpose of the AUTHENTICATION option is to provide a framework
196 |    for the passing of authentication information through the TELNET
197 |    session.  This means that: 1) the users password will not be sent in
198 |    clear text across the network, and 2) if the front end telnet process
199 |    has the appropriate authentication information, it can automatically
200 |    send the information, and the user will not have to type any
201 |    password.
202 | 
203 |    It is intended that the AUTHENTICATION option be general enough that
204 |    it can be used to pass information for any authentication system.
205 | 
206 | 5.  Security Implications
207 | 
208 |    The ability to negotiate a common authentication mechanism between
209 |    client and server is a feature of the authentication option that
210 |    should be used with caution.  When the negotiation is performed, no
211 |    authentication has yet occurred.  Therefore, each system has no way
212 |    of knowing whether or not it is talking to the system it intends.  An
213 |    intruder could attempt to negotiate the use of an authentication
214 |    system which is either weak, or already compromised by the intruder.
215 | 
216 | 6.  Implementation Rules
217 | 
218 |    WILL and DO are used only at the beginning of the connection to
219 |    obtain and grant permission for future negotiations.
220 | 
221 |    The authentication is only negotiated in one directions; the server
222 |    must send the "DO", and the client must send the "WILL".  This
223 | 
224 | 
225 | 
226 | Telnet Working Group                                            [Page 4]
227 | 
228 | RFC 1409              Telnet Authentication Option          January 1993
229 | 
230 | 
231 |    restriction is due to the nature of authentication; there are three
232 |    possible cases; server authenticates client, client authenticates
233 |    server, and server and client authenticate each other.  By only
234 |    negotiating the option in one direction, and then determining which
235 |    of the three cases is being used via the suboption, potential
236 |    ambiguity is removed.  If the server receives a "DO", it must respond
237 |    with a "WONT".  If the client receives a "WILL", it must respond with
238 |    a "DONT".
239 | 
240 |    Once the two hosts have exchanged a DO and a WILL, the server is free
241 |    to request authentication information.  In the request, a list of
242 |    supported authentication types is sent.  Only the server may send
243 |    requests ("IAC SB AUTHENTICATION SEND authentication-type-pair-list
244 |    IAC SE").  Only the client may transmit authentication information
245 |    via the "IAC SB AUTHENTICATION IS authentication-type ... IAC SE"
246 |    command.  Only the server may send replys ("IAC SB AUTHENTICATION
247 |    REPLY authentication-type ... IAC SE").  As many IS and REPLY
248 |    suboptions may be exchanged as are needed for the particular
249 |    authentication scheme chosen.
250 | 
251 |    If the client does not support any of the authentication types listed
252 |    in the authentication-type-pair-list, a type of NULL should be used
253 |    to indicate this in the IS reply.  Note that in this case, the server
254 |    may choose to close the connection.
255 | 
256 |    The order of the authentication types MUST be ordered to indicate a
257 |    preference for different authentication types, the first type being
258 |    the most preferred, and the last type the least preferred.
259 | 
260 |    The following is an example of use of the option:
261 | 
262 |        Client                           Server
263 |                                         IAC DO AUTHENTICATION
264 |        IAC WILL AUTHENTICATION
265 |        [ The server is now free to request authentication information.
266 |          ]
267 |                                         IAC SB AUTHENTICATION SEND
268 |                                         KERBEROS_V4 CLIENT|MUTUAL
269 |                                         KERBEROS_V4 CLIENT|ONE_WAY IAC
270 |                                         SE
271 |        [ The server has requested mutual Kerberos authentication, but is
272 |          willing to do just one-way Kerberos authentication.  The client
273 |          will now respond with the name of the user that it wants to log
274 |          in as, and the Kerberos ticket.  ]
275 |        IAC SB AUTHENTICATION NAME "joe"
276 |        IAC SE
277 |        IAC SB AUTHENTICATION IS
278 |        KERBEROS_V4 CLIENT|MUTUAL AUTH 4
279 | 
280 | 
281 | 
282 | Telnet Working Group                                            [Page 5]
283 | 
284 | RFC 1409              Telnet Authentication Option          January 1993
285 | 
286 | 
287 |        7 1 67 82 65 89 46 67 7 9 77 0
288 |        48 24 49 244 109 240 50 208 43
289 |        35 25 116 104 44 167 21 201 224
290 |        229 145 20 2 244 213 220 33 134
291 |        148 4 251 249 233 229 152 77 2
292 |        109 130 231 33 146 190 248 1 9
293 |        31 95 94 15 120 224 0 225 76 205
294 |        70 136 245 190 199 147 155 13
295 |        IAC SE
296 |        [ The server responds with an ACCEPT command to state that the
297 |          authentication was successful.  ]
298 |                                         IAC SB AUTHENTICATION REPLY
299 |                                         KERBEROS_V4 CLIENT|MUTUAL ACCEPT
300 |                                         IAC SE
301 |        [ Next, the client sends across a CHALLENGE to verify that it is
302 |          really talking to the right server.  ]
303 |        IAC SB AUTHENTICATION REPLY
304 |        KERBEROS_V4 CLIENT|MUTUAL
305 |        CHALLENGE xx xx xx xx xx xx xx
306 |        xx IAC SE
307 |        [ Lastly, the server sends across a RESPONSE to prove that it
308 |          really is the right server.
309 |                                         IAC SB AUTHENTICATION REPLY
310 |                                         KERBEROS_V4 CLIENT|MUTUAL
311 |                                         RESPONSE yy yy yy yy yy yy yy yy
312 |                                         IAC SE
313 | 
314 |    It is expected that any implementation that supports the Telnet
315 |    AUTHENTICATION option will support all of this specification.
316 | 
317 | 7.  References
318 | 
319 |    [1] Reynolds, J., and J. Postel, "Assigned Numbers", STD 2, RFC 1340,
320 |        USC/Information Sciences Institute, July 1992.
321 | 
322 | Security Considerations
323 | 
324 |    Security issues are discussed in Section 5.
325 | 
326 | 
327 | 
328 | 
329 | 
330 | 
331 | 
332 | 
333 | 
334 | 
335 | 
336 | 
337 | 
338 | Telnet Working Group                                            [Page 6]
339 | 
340 | RFC 1409              Telnet Authentication Option          January 1993
341 | 
342 | 
343 | Author's Address
344 | 
345 |    David A. Borman, Editor
346 |    Cray Research, Inc.
347 |    655F Lone Oak Drive
348 |    Eagan, MN 55123
349 | 
350 |    Phone: (612) 452-6650
351 |    EMail: dab@CRAY.COM
352 | 
353 |    Mailing List: telnet-ietf@CRAY.COM
354 | 
355 | Chair's Address
356 | 
357 |    The working group can be contacted via the current chair:
358 | 
359 |    Steve Alexander
360 |    INTERACTIVE Systems Corporation
361 |    1901 North Naper Boulevard
362 |    Naperville, IL 60563-8895
363 | 
364 |    Phone: (708) 505-9100 x256
365 |    EMail: stevea@isc.com
366 | 
367 | 
368 | 
369 | 
370 | 
371 | 
372 | 
373 | 
374 | 
375 | 
376 | 
377 | 
378 | 
379 | 
380 | 
381 | 
382 | 
383 | 
384 | 
385 | 
386 | 
387 | 
388 | 
389 | 
390 | 
391 | 
392 | 
393 | 
394 | Telnet Working Group                                            [Page 7]
395 | 


--------------------------------------------------------------------------------
/rfc/rfc1572.txt:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | 
  4 | 
  5 | 
  6 | 
  7 | Network Working Group                               S. Alexander, Editor
  8 | Request for Comments: 1572                      Lachman Technology, Inc.
  9 | Category: Standards Track                                   January 1994
 10 | 
 11 | 
 12 |                        Telnet Environment Option
 13 | 
 14 | Status of this Memo
 15 | 
 16 |    This document specifies an Internet standards track protocol for the
 17 |    Internet community, and requests discussion and suggestions for
 18 |    improvements.  Please refer to the current edition of the "Internet
 19 |    Official Protocol Standards" (STD 1) for the standardization state
 20 |    and status of this protocol.  Distribution of this memo is unlimited.
 21 | 
 22 | Abstract
 23 | 
 24 |    This document specifies a mechanism for passing environment
 25 |    information between a telnet client and server.  Use of this
 26 |    mechanism enables a telnet user to propagate configuration
 27 |    information to a remote host when connecting.
 28 | 
 29 |    This document corrects some errors in [1].
 30 | 
 31 | 1.  Command Names and Codes
 32 | 
 33 |       NEW-ENVIRON     39
 34 |           IS               0
 35 |           SEND             1
 36 |           INFO             2
 37 | 
 38 |           VAR              0
 39 |           VALUE            1
 40 |           ESC              2
 41 |           USERVAR          3
 42 | 
 43 | 2.  Command Meanings
 44 | 
 45 |    IAC WILL NEW-ENVIRON
 46 | 
 47 |       The sender of this command is willing to send environment
 48 |       variables.
 49 | 
 50 |    IAC WONT NEW-ENVIRON
 51 | 
 52 |       The sender of this command refuses to send environment variables.
 53 | 
 54 | 
 55 | 
 56 | 
 57 | 
 58 | Telnet Working Group                                            [Page 1]
 59 | 
 60 | RFC 1572               Telnet Environment Option            January 1994
 61 | 
 62 | 
 63 |    IAC DO NEW-ENVIRON
 64 | 
 65 |       The sender of this command is willing to receive environment
 66 |       variables.
 67 | 
 68 |    IAC DONT NEW-ENVIRON
 69 | 
 70 |       The sender of this command refuses to accept environment
 71 |       variables.
 72 | 
 73 |    IAC SB NEW-ENVIRON SEND [ type ... [ type ... [ ... ] ] ] IAC SE
 74 | 
 75 |       The sender of this command requests that the remote side send its
 76 |       environment variables.  The "type" may be either VAR or USERVAR,
 77 |       to indicate either well known or user variable names.  Only the
 78 |       side that is DO NEW-ENVIRON may initiate a SEND command.  If a
 79 |       list of variables is specified, then only those variables should
 80 |       be sent.  If no list is specified, then the default environment,
 81 |       of both well known and user defined variables, should be sent.  If
 82 |       one of the variables has no name, then all the variables of that
 83 |       type (well known or user defined)  in the default environment
 84 |       should be sent.
 85 | 
 86 |    IAC SB NEW-ENVIRON IS type ... [ VALUE ... ] [ type ... [ VALUE ... ]
 87 |    [ ... ] ] IAC SE
 88 | 
 89 |       The sender of this command is sending environment variables.  This
 90 |       command is sent in response to a SEND request.  Only the side that
 91 |       is WILL NEW-ENVIRON may send an IS command.  The "type"/VALUE
 92 |       pairs must be returned in the same order as the SEND request
 93 |       specified them, and there must be a response for each "type ..."
 94 |       explicitly requested.  The "type" will be VAR or USERVAR.
 95 |       Multiple environment variables may be sent.  The characters
 96 |       following a "type" up to the next "type" or VALUE specify the
 97 |       variable name.  The characters following a VALUE up to the next
 98 |       "type" specify the value of the variable.  If a "type" is not
 99 |       followed by a VALUE (e.g., by another VAR, USERVAR, or IAC SE)
100 |       then that variable is undefined.  If a VALUE is immediately
101 |       followed by a "type" or IAC, then the variable is defined, but has
102 |       no value.  If an IAC is contained between the IS and the IAC SE,
103 |       it must be sent as IAC IAC.  If a variable or a value contains a
104 |       VAR, it must be sent as ESC VAR.  If a variable or a value
105 |       contains a USERVAR, it must be sent as ESC USERVAR.  If a variable
106 |       or a value contains a VALUE, it must be sent as ESC VALUE.  If a
107 |       variable or a value contains an ESC, it must be sent as ESC ESC.
108 | 
109 | 
110 | 
111 | 
112 | 
113 | 
114 | Telnet Working Group                                            [Page 2]
115 | 
116 | RFC 1572               Telnet Environment Option            January 1994
117 | 
118 | 
119 |    IAC SB NEW-ENVIRON INFO type ... [ VALUE ... ] [ type ... [ VALUE ...
120 |    ] [ ... ] ] IAC SE
121 | 
122 |       The sender of this command is sending information about
123 |       environment variables that have changed.  It is identical to the
124 |       IS command, except that the command is INFO instead of IS.  Only
125 |       the side that is WILL NEW-ENVIRON may send an INFO command.  The
126 |       INFO command is not to be used to send initial information; the
127 |       SEND/IS sequence is to be used for that.  The INFO command is to
128 |       be used to propagate changes in environment variables, and may be
129 |       spontaneously generated.
130 | 
131 | 3.  Default Specification
132 | 
133 |    The default specification for this option is
134 | 
135 |       WONT NEW-ENVIRON
136 |       DONT NEW-ENVIRON
137 | 
138 |    meaning there will not be any exchange of environment information.
139 | 
140 | 4.  Motivation
141 | 
142 |    Many operating systems have startup information and environment
143 |    variables that contain information that should be propagated to
144 |    remote machines when Telnet connections are established.  Rather than
145 |    create a new Telnet option each time someone comes up with some new
146 |    information that they need propagated through a Telnet session, but
147 |    that the Telnet session itself doesn't really need to know about,
148 |    this generic information option can be used.
149 | 
150 | 5.  Well Known Variables
151 | 
152 |    USER        This variable is used to transmit the user or account
153 |                name that the client wishes to log into on the remote
154 |                system.  The format of the value the USER variable is
155 |                system dependent, as determined by the remote system.
156 | 
157 |    JOB         This variable is used to transmit the job ID that the
158 |                client wishes to use when logging into the remote system.
159 |                The format of the value the JOB variable is system
160 |                dependent, as determined by the remote system.
161 | 
162 |    ACCT        This variable is used to transmit the account ID that the
163 |                client wishes to use when logging into the remote system.
164 |                The format of the value the ACCT variable is system
165 |                dependent, as determined by the remote system.
166 | 
167 | 
168 | 
169 | 
170 | Telnet Working Group                                            [Page 3]
171 | 
172 | RFC 1572               Telnet Environment Option            January 1994
173 | 
174 | 
175 |    PRINTER     This variable is used to identify the default location
176 |                for printer output.  Because there does not currently
177 |                exist a standard way of naming a printer on a network,
178 |                the format of this variable is currently undefined.
179 | 
180 |    SYSTEMTYPE  This is used to transmit the type of operating system on
181 |                the system that sends this variable.  It value is
182 |                identical to the value of the SYSTEM (SYST) command in
183 |                FTP [4].  The format of the value shall have as its first
184 |                word one of the system names listed in the current
185 |                version of the Assigned Numbers document [5].
186 | 
187 |    DISPLAY     This variable is used to transmit the X display location
188 |                of the client.  The format for the value of the DISPLAY
189 |                variable is:
190 | 
191 |                   <host>:<dispnum>[.<screennum>]
192 | 
193 |                This information is identical to the information passed
194 |                using the Telnet X-DISPLAY-LOCATION option.  If both the
195 |                DISPLAY environment variable, and the X-DISPLAY-LOCATION
196 |                option [6] are received, and they contain conflicting
197 |                information, the most recently received information
198 |                received should be used.
199 | 
200 |    Because it is impossible to anticipate all variables that users may
201 |    wish to exchange, the USERVAR type is provided to allow users to
202 |    transmit arbitrary variable/value pairs.  The use of an additional
203 |    type allows implementations to distinguish between values derived by
204 |    the remote host software and values supplied by the user.  Paranoid
205 |    implementations will most likely treat both types with an equal level
206 |    of distrust.  The results of a name-space collision between a well-
207 |    known and a user variable are implementation specific.
208 | 
209 | 6.  Implementation Rules
210 | 
211 |    WILL and DO are used only at the beginning of the connection to
212 |    obtain and grant permission for future negotiations.
213 | 
214 |    Once the two hosts have exchanged a WILL and a DO, the sender of the
215 |    DO NEW-ENVIRON is free to request that environment variables be sent.
216 |    Only the sender of the DO may send requests (IAC SB NEW-ENVIRON SEND
217 |    IAC SE) and only the sender of the WILL may transmit actual
218 |    environment information (via the IAC SB NEW-ENVIRON IS ... IAC SE
219 |    command).  Though this option may be used at any time throughout the
220 |    life of the telnet connection, the exchange of environment
221 |    information will usually happen at the startup of the connection.
222 |    This is because many operating systems only have mechanisms for
223 | 
224 | 
225 | 
226 | Telnet Working Group                                            [Page 4]
227 | 
228 | RFC 1572               Telnet Environment Option            January 1994
229 | 
230 | 
231 |    propagating environment information at process creation, so the
232 |    information is needed before the user logs in.
233 | 
234 |    The receiving host is not required to put all variables that it
235 |    receives into the environment.  For example, if the client should
236 |    send across USERVAR "TERM" VALUE "xterm" as an environment variable,
237 |    and the TERMINAL-TYPE [3] option has already been used to determine
238 |    the terminal type, the server may safely ignore the TERM variable.
239 |    Also, some startup information may be used in other ways; for
240 |    example, the values for "USER", "ACCT" and "PROJ" values might be
241 |    used to decide which account to log into, and might never be put into
242 |    the users environment.  In general, if the server has already
243 |    determined the value of an environment variable by some more accurate
244 |    means, or if it does not understand a variable name, it may ignore
245 |    the value sent in the NEW-ENVIRON option.  The server may also prefer
246 |    to just put all unknown information into the users environment.  This
247 |    is the suggested method of implementation, because it allows the user
248 |    the most flexibility.
249 | 
250 |    The following is an example of use of the option:
251 | 
252 |        Host1                            Host2
253 |        IAC DO NEW-ENVIRON
254 |                                         IAC WILL NEW-ENVIRON
255 |        [ Host1 is now free to request environment information ]
256 |        IAC SB NEW-ENVIRON SEND VAR
257 |        "USER" VAR "ACCT" VAR USERVAR
258 |        IAC SE
259 |        [ The server has now explicitly asked for the USER and ACCT
260 |          variables, the default set of well known environment variables,
261 |          and the default set of user defined variables.  Note that the
262 |          client includes the USER information twice; once because it was
263 |          explicitly asked for, and once because it is part of the
264 |          default environment.  ]
265 |                                         IAC SB NEW-ENVIRON IS VAR "USER"
266 |                                         VALUE "joe" VAR "ACCT" VALUE
267 |                                         "kernel" VAR "USER" VALUE "joe"
268 |                                         VAR "DISPLAY" VALUE "foo:0.0"
269 |                                         USERVAR "SHELL" VALUE "/bin/csh"
270 |                                         IAC SE
271 | 
272 |    It is legal for a client to respond with an empty environment (no
273 |    data between the IAC SB and IAC SE) when no well-defined or user
274 |    variables are currently defined.  For example:
275 | 
276 |       IAC SB NEW-ENVIRON IS IAC SE
277 | 
278 |    is a valid response to any of the following:
279 | 
280 | 
281 | 
282 | Telnet Working Group                                            [Page 5]
283 | 
284 | RFC 1572               Telnet Environment Option            January 1994
285 | 
286 | 
287 |       IAC SB NEW-ENVIRON SEND IAC SE
288 |       IAC SB NEW-ENVIRON SEND VAR IAC SE
289 |       IAC SB NEW-ENVIRON SEND USERVAR IAC SE
290 |       IAC SB NEW-ENVIRON SEND VAR USERVAR IAC SE
291 | 
292 |    (The last example is equivalent to the first...)
293 | 
294 |    The earlier version of this specification [1] incorrectly reversed
295 |    the values for VAR and VALUE,  which put the specification at odds
296 |    with existing implementations.  In order to resolve that problem, as
297 |    well as other minor problems, a new option number has been assigned
298 |    to the NEW-ENVIRON option.  This allows implementations of this memo
299 |    to interoperate with no ambiguity.
300 | 
301 |    For a discussion on how to implement to interoperate with the various
302 |    implementations that pre-date this memo, see [2].
303 | 
304 |    It is expected that any implementation that supports the Telnet NEW-
305 |    ENVIRON option will support all of this specification.
306 | 
307 | 7.  Security Concerns
308 | 
309 |    It is important for an implementor of the NEW-ENVIRON option to
310 |    understand the interaction of setting options and the
311 |    login/authentication process. Specifically careful analysis should be
312 |    done to determine which variables are "safe" to set prior to having
313 |    the client login.  An example of a bad choice would be permitting a
314 |    variable to be changed that allows an intruder to circumvent or
315 |    compromise the login/authentication program itself.
316 | 
317 | 8.  References
318 | 
319 |    [1] Borman, D., Editor, "Telnet Environment Option", RFC 1408, Cray
320 |        Research, Inc., January 1993.
321 | 
322 |    [2] Borman, D., "Telnet Environment Option Interoperability Issues",
323 |        RFC 1571, Cray Research, Inc., January 1994.
324 | 
325 |    [3] VanBokkelen, J., "Telnet Terminal-Type Option", RFC 1091, FTP
326 |        Software, Inc., February 1989.
327 | 
328 |    [4] Postel, J., and J. Reynolds, "File Transfer Protocol (FTP)", STD
329 |        9, RFC 959, USC/Information Sciences Institute, October 1985.
330 | 
331 |    [5] Reynolds, J., and J. Postel, "Assigned Numbers", STD 2, RFC 1340,
332 |        USC/Information Sciences Institute, July 1992.
333 | 
334 | 
335 | 
336 | 
337 | 
338 | Telnet Working Group                                            [Page 6]
339 | 
340 | RFC 1572               Telnet Environment Option            January 1994
341 | 
342 | 
343 |    [6] Marcy, G., "Telnet X Display Location Option", RFC 1096, Carnegie
344 |        Mellon University, March 1989.
345 | 
346 | Acknowledgements
347 | 
348 |    The original version of this document was written by Dave Borman of
349 |    Cray Research, Inc.  In addition, the comments of the Telnet Working
350 |    Group of the IETF are gratefully acknowledged.
351 | 
352 | Security Considerations
353 | 
354 |    Security issues are discussed in Section 7.
355 | 
356 | Editor's Address
357 | 
358 |    Steve Alexander
359 |    Lachman Technology, Inc.
360 |    1901 North Naper Boulevard
361 |    Naperville, IL 60563-8895
362 | 
363 |    Phone: (708) 505-9555 x256
364 |    EMail: stevea@lachman.com
365 | 
366 | 
367 | 
368 | 
369 | 
370 | 
371 | 
372 | 
373 | 
374 | 
375 | 
376 | 
377 | 
378 | 
379 | 
380 | 
381 | 
382 | 
383 | 
384 | 
385 | 
386 | 
387 | 
388 | 
389 | 
390 | 
391 | 
392 | 
393 | 
394 | Telnet Working Group                                            [Page 7]
395 | 


--------------------------------------------------------------------------------
/rfc/rfc5198.txt:
--------------------------------------------------------------------------------
   1 | 
   2 | 
   3 | 
   4 | 
   5 | 
   6 | 
   7 | Network Working Group                                         J. Klensin
   8 | Request for Comments: 5198                                  M. Padlipsky
   9 | Obsoletes: 698                                                March 2008
  10 | Updates: 854
  11 | Category: Standards Track
  12 | 
  13 | 
  14 |                  Unicode Format for Network Interchange
  15 | 
  16 | Status of This Memo
  17 | 
  18 |    This document specifies an Internet standards track protocol for the
  19 |    Internet community, and requests discussion and suggestions for
  20 |    improvements.  Please refer to the current edition of the "Internet
  21 |    Official Protocol Standards" (STD 1) for the standardization state
  22 |    and status of this protocol.  Distribution of this memo is unlimited.
  23 | 
  24 | Abstract
  25 | 
  26 |    The Internet today is in need of a standardized form for the
  27 |    transmission of internationalized "text" information, paralleling the
  28 |    specifications for the use of ASCII that date from the early days of
  29 |    the ARPANET.  This document specifies that format, using UTF-8 with
  30 |    normalization and specific line-ending sequences.
  31 | 
  32 | Table of Contents
  33 | 
  34 |    1.  Introduction . . . . . . . . . . . . . . . . . . . . . . . . .  2
  35 |      1.1.  Requirement for a Standardized Text Stream Format  . . . .  2
  36 |      1.2.  Terminology  . . . . . . . . . . . . . . . . . . . . . . .  3
  37 |    2.  Net-Unicode Definition . . . . . . . . . . . . . . . . . . . .  3
  38 |    3.  Normalization  . . . . . . . . . . . . . . . . . . . . . . . .  5
  39 |    4.  Versions of Unicode  . . . . . . . . . . . . . . . . . . . . .  5
  40 |    5.  Applicability and Stability of this Specification  . . . . . .  7
  41 |      5.1.  Use in IETF Applications Specifications  . . . . . . . . .  7
  42 |      5.2.  Unicode Versions and Applicability . . . . . . . . . . . .  7
  43 |    6.  Security Considerations  . . . . . . . . . . . . . . . . . . .  9
  44 |    7.  Acknowledgments  . . . . . . . . . . . . . . . . . . . . . . . 10
  45 |    Appendix A.  History and Context . . . . . . . . . . . . . . . . . 11
  46 |    Appendix B.  The ASCII NVT Definition  . . . . . . . . . . . . . . 12
  47 |    Appendix C.  The Line-Ending Problem . . . . . . . . . . . . . . . 14
  48 |    Appendix D.  A Note about Related Future Work  . . . . . . . . . . 14
  49 |    References . . . . . . . . . . . . . . . . . . . . . . . . . . . . 15
  50 |      Normative References . . . . . . . . . . . . . . . . . . . . . . 15
  51 |      Informative References . . . . . . . . . . . . . . . . . . . . . 16
  52 | 
  53 | 
  54 | 
  55 | 
  56 | 
  57 | 
  58 | Klensin & Padlipsky         Standards Track                     [Page 1]
  59 | 
  60 | RFC 5198                    Network Unicode                   March 2008
  61 | 
  62 | 
  63 | 1.  Introduction
  64 | 
  65 | 1.1.  Requirement for a Standardized Text Stream Format
  66 | 
  67 |    Historically, Internet protocols have been largely ASCII-based and
  68 |    references to "text" in protocols have assumed ASCII text and
  69 |    specifically text in Network Virtual Terminal ("NVT") or "Network
  70 |    ASCII" form (see Appendix A and Appendix B).  Protocols and formats
  71 |    that have moved beyond ASCII have included arrangements to
  72 |    specifically identify the character set and often the language being
  73 |    used.
  74 | 
  75 |    In our more internationalized world, "text" clearly no longer equates
  76 |    unambiguously to "network ASCII".  Fortunately, however, we are
  77 |    converging on Unicode [Unicode] [ISO10646] as a single international
  78 |    interchange character coding and no longer need to deal with per-
  79 |    script standards for character sets (e.g., one standard for each of
  80 |    Arabic, Cyrillic, Devanagari, etc., or even standards keyed to
  81 |    languages that are usually considered to share a script, such as
  82 |    French, German, or Swedish).  Unfortunately, though, while it is
  83 |    certainly time to define a Unicode-based text type for use as a
  84 |    common text interchange format, "use Unicode" involves even more
  85 |    ambiguity than "use ASCII" did decades ago.
  86 | 
  87 |    Unicode identifies each character by an integer, called its "code
  88 |    point", in the range 0-0x10ffff.  These integers can be encoded into
  89 |    byte sequences for transmission in at least three standard and
  90 |    generally-recognized encoding forms, all of which are completely
  91 |    defined in The Unicode Standard and the documents cited below:
  92 | 
  93 |    o  UTF-8 [RFC3629] defines a variable-length encoding that may be
  94 |       applied uniformly to all code points.
  95 | 
  96 |    o  UTF-16 [RFC2781] encodes the range of Unicode characters whose
  97 |       code points are less than 65536 straightforwardly as 16-bit
  98 |       integers, and provides a "surrogate" mechanism for encoding larger
  99 |       code points in 32 bits.
 100 | 
 101 |    o  UTF-32 (also known as UCS-4) simply encodes each code point as a
 102 |       32-bit integer.
 103 | 
 104 |    Older forms and nomenclature, such as the 16-bit UCS-2, are now
 105 |    strongly discouraged.
 106 | 
 107 |    As with ASCII, any of these forms may be used with different line-
 108 |    ending conventions.  That flexibility can be an additional source of
 109 |    confusion with, e.g., index (offset) references into documents based
 110 |    on character counts.
 111 | 
 112 | 
 113 | 
 114 | Klensin & Padlipsky         Standards Track                     [Page 2]
 115 | 
 116 | RFC 5198                    Network Unicode                   March 2008
 117 | 
 118 | 
 119 |    This document proposes to establish "Net-Unicode" as a new
 120 |    standardized text transmission form for the Internet, to serve as an
 121 |    internationalized alternative for NVT ASCII when specified in new --
 122 |    and, where appropriate, updated -- protocols.  UTF-8 [RFC3629] is
 123 |    chosen for the coding because it has good compatibility properties
 124 |    with ASCII and for other reasons discussed in the existing IETF
 125 |    character set policy [RFC2277].  "Net-Unicode" is specified in
 126 |    Section 2; the subsequent sections of the document provide background
 127 |    and explanation.
 128 | 
 129 |    Whenever there is a choice, Unicode SHOULD be used with the text
 130 |    encoding specified here.  This combination is preferred to the
 131 |    double-byte encoding of "extended ASCII" [RFC0698] or the assorted
 132 |    per-language or per-country character coding systems.
 133 | 
 134 | 1.2.  Terminology
 135 | 
 136 |    The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
 137 |    "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
 138 |    document are to be interpreted as described in [RFC2119].
 139 | 
 140 | 2.  Net-Unicode Definition
 141 | 
 142 |    The Network Unicode format (Net-Unicode) is defined as follows.
 143 |    Parts of this definition are deliberately informal, providing
 144 |    guidance for specific profiles or rules in the protocols that
 145 |    reference this one rather than firm rules that apply globally.
 146 | 
 147 |    1.  Characters MUST be encoded in UTF-8 as defined in [RFC3629].
 148 | 
 149 |    2.  If the protocol has the concept of "lines", line-endings MUST be
 150 |        indicated by the sequence Carriage-Return (CR, U+000D) followed
 151 |        by Line-Feed (LF, U+000A), often known just as CRLF.  CR SHOULD
 152 |        NOT appear except when followed by LF.  The only other allowed
 153 |        context in which CR is permitted is in the combination CR NUL,
 154 |        which is not recommended (see the note at the end of this
 155 |        section).
 156 | 
 157 |    3.  The control characters in the ASCII range (U+0000 to U+001F and
 158 |        U+007F to U+009F) SHOULD generally be avoided.  Space (SP,
 159 |        U+0020), CR, LF, and Form Feed (FF, U+000C) are exceptions to
 160 |        this principle, but use of all but the first requires care as
 161 |        discussed elsewhere in this document.  The so-called "C1
 162 |        Controls" (U+0080 through U+009F), which did not appear in ASCII,
 163 |        MUST NOT appear.
 164 | 
 165 |        FF should be used only with caution: it does not have a standard
 166 |        and universal interpretation and, in particular, if its use
 167 | 
 168 | 
 169 | 
 170 | Klensin & Padlipsky         Standards Track                     [Page 3]
 171 | 
 172 | RFC 5198                    Network Unicode                   March 2008
 173 | 
 174 | 
 175 |        assumes a page length, such assumptions may not be appropriate in
 176 |        international contexts (e.g., considering 8.5x11 inch paper
 177 |        versus A4).  Other control characters are used to affect display
 178 |        format, control devices, or to structure files.  None of those
 179 |        uses is appropriate for streams of plain text.
 180 | 
 181 |    4.  Before transmission, all character sequences SHOULD be normalized
 182 |        according to Unicode normalization form "NFC" (see Section 3).
 183 | 
 184 |    5.  As suggested in Section 6 of RFC 3629, the Byte Order Mark
 185 |        ("BOM") signature MUST NOT appear at the beginning of these text
 186 |        strings.
 187 | 
 188 |    6.  Systems conforming to this specification MUST NOT transmit any
 189 |        string containing any code point that is unassigned in the
 190 |        version of Unicode on which they are dependent.  The version of
 191 |        NFC and the version of Unicode used by that system MUST be
 192 |        consistent.
 193 | 
 194 |    The use of LF without CR is questionable; see Appendix B for more
 195 |    discussion.  The newer control characters IND (U+0084) and NEL ("Next
 196 |    Line", U+0085) might have been used to disambiguate the various line-
 197 |    ending situations, but, because their use has not been established on
 198 |    the Internet, because many protocols require CRLF, and because IND
 199 |    and NEL fall within the "C1 Controls" group (see below), they MUST
 200 |    NOT be used.  Similar observations apply to the yet newer line and
 201 |    paragraph separators at U+2028 and U+2029 and any future characters
 202 |    that might be defined to serve these functions.  For this
 203 |    specification and protocols that depend on it, lines end in CRLF and
 204 |    only in CRLF.  Anything that does not end in CRLF is either not a
 205 |    line or is severely malformed.
 206 | 
 207 |    The NVT specification contained a number of additional provisions,
 208 |    e.g., for the optional use of backspacing and "bare CR" (sent as CR
 209 |    NUL) to generate overstruck character sequences.  The much greater
 210 |    number of precomposed characters in Unicode, the availability of
 211 |    combining characters, and the growing use of markup conventions of
 212 |    various types to show, e.g., emphasis (rather than attempting to do
 213 |    that via the use of special characters), should make such sequences
 214 |    largely unnecessary.  These sequences SHOULD be avoided if at all
 215 |    possible.  However, because they were optional in NVT applications
 216 |    and this specification is an NVT superset, they cannot be prohibited
 217 |    entirely.  The most important of these rules is that CR MUST NOT
 218 |    appear unless it is immediately followed by LF (indicating end of
 219 |    line) or NUL.  Because NUL (an octet whose value is all zeros, i.e.,
 220 |    %x00 in the notation of [RFC5234]) is hostile to programming
 221 |    languages that use that character as a string delimiter, the CR NUL
 222 |    sequence SHOULD be avoided for that reason as well.
 223 | 
 224 | 
 225 | 
 226 | Klensin & Padlipsky         Standards Track                     [Page 4]
 227 | 
 228 | RFC 5198                    Network Unicode                   March 2008
 229 | 
 230 | 
 231 | 3.  Normalization
 232 | 
 233 |    There are cases where strings of Unicode are fundamentally
 234 |    equivalent, essentially representing the same text.  These are called
 235 |    "canonical equivalents" in the Unicode Standard.  For example, the
 236 |    following pairs of strings are canonically equivalent:
 237 | 
 238 |    U+2126 OHM SIGN
 239 |    U+03A9 GREEK CAPITAL LETTER OMEGA
 240 | 
 241 |    U+0061 LATIN SMALL LETTER A, U+0300 COMBINING GRAVE ACCENT
 242 |    U+00E0 LATIN SMALL LETTER A WITH GRAVE
 243 | 
 244 |    Comparison of strings becomes much easier if any such cases are
 245 |    always represented by a single unique form.  The Unicode Consortium
 246 |    specifies a normalization form, known as NFC [NFC], which provides
 247 |    the necessary mappings and mechanisms to convert all canonically
 248 |    equivalent sequences to a single unique form.  Typically, this form
 249 |    produces precomposed characters for any sequences that can be
 250 |    represented in that fashion.  It also reorders other combining marks
 251 |    so that they have a unique and unambiguous order.
 252 | 
 253 |    Of the various normalization forms defined as part of Unicode, NFC is
 254 |    closest to actual use in practice, minimizes side-effects due to
 255 |    considering characters equivalent that may not be equivalent in all
 256 |    situations, and typically requires the least work when converting
 257 |    from non-Unicode encodings.
 258 | 
 259 |    The section above requires that, except in very unusual
 260 |    circumstances, all Net-Unicode strings be transmitted in normalized
 261 |    form.  Recognition of the fact that some implementations of
 262 |    applications may rely on operating system libraries over which they
 263 |    have little control and adherence to the robustness principle
 264 |    suggests that receivers of such strings should be prepared to receive
 265 |    unnormalized ones and to not react to that in excessive ways.
 266 | 
 267 | 4.  Versions of Unicode
 268 | 
 269 |    Unicode changes and expands over time.  Large blocks of space are
 270 |    reserved for future expansion.  New versions, which appear at regular
 271 |    intervals, add new scripts and characters.  Occasionally they also
 272 |    change some property definitions.  In retrospect, one of the
 273 |    advantages of ASCII [ASCII] when it was chosen was that the code
 274 |    space was full when the Standard was first published.  There was no
 275 |    practical way to add characters or change code point assignments
 276 |    without being obviously incompatible.
 277 | 
 278 | 
 279 | 
 280 | 
 281 | 
 282 | Klensin & Padlipsky         Standards Track                     [Page 5]
 283 | 
 284 | RFC 5198                    Network Unicode                   March 2008
 285 | 
 286 | 
 287 |    While there are some security issues if people deliberately try to
 288 |    trick the system (see Section 6), Unicode version changes should not
 289 |    have a significant impact on the text stream specification of this
 290 |    document for the following reasons:
 291 | 
 292 |    o  The transformation between Unicode code table positions and the
 293 |       corresponding UTF-8 code is algorithmic; it does not depend on
 294 |       whether a code point has been assigned or not.
 295 | 
 296 |    o  The normalization recommended here, NFC (see Section 3), performs
 297 |       a very limited set of mappings, much more limited than those of
 298 |       the more extensive NFKC used in, e.g., Nameprep [RFC3491].
 299 | 
 300 |    The NFC tables may be updated over time as new characters are added,
 301 |    but the Unicode Consortium has guaranteed the stability of all NFC
 302 |    strings.  That is, if a string does not contain any unassigned
 303 |    characters, and it is normalized according to NFC, it will always be
 304 |    normalized according to all future versions of the Unicode Standard.
 305 |    The stability of the Net-Unicode format is thus guaranteed when any
 306 |    implementation that converts text into Net-Unicode format does not
 307 |    permit unassigned characters.
 308 | 
 309 |    Because Unicode code points that are reserved for private use do not
 310 |    have standard definitions or normalization interpretations, they
 311 |    SHOULD be avoided in strings intended for Internet interchange.
 312 | 
 313 |    Were Unicode to be changed in a way that violated these assumptions,
 314 |    i.e., that either invalidated the byte string order specified in RFC
 315 |    3629 or that changed the stability of NFC as stated above, this
 316 |    specification would not apply.  Put differently, this specification
 317 |    applies only to versions of Unicode starting with version 5.0 and
 318 |    extending to, but not including, any version for which changes are
 319 |    made in either the UTF-8 definition or to NFC stability.  Such
 320 |    changes would violate established Unicode policies and are hence
 321 |    unlikely, but, should they occur, it would be necessary to evaluate
 322 |    them for compatibility with this specification and other Internet
 323 |    uses of NFC.
 324 | 
 325 |    If the specification of a protocol references this one, strings that
 326 |    are received by that protocol and that appear to be UTF-8 and are not
 327 |    otherwise identified (e.g., by charset labeling) SHOULD be treated as
 328 |    using UTF-8 in conformance with this specification.
 329 | 
 330 | 
 331 | 
 332 | 
 333 | 
 334 | 
 335 | 
 336 | 
 337 | 
 338 | Klensin & Padlipsky         Standards Track                     [Page 6]
 339 | 
 340 | RFC 5198                    Network Unicode                   March 2008
 341 | 
 342 | 
 343 | 5.  Applicability and Stability of this Specification
 344 | 
 345 | 5.1.  Use in IETF Applications Specifications
 346 | 
 347 |    During the development of this specification, there was some
 348 |    confusion about where it would be useful given that, e.g., the
 349 |    individual MIME media types used in email and with HTTP have their
 350 |    own rules about UTF-8 character types and normalization, and the
 351 |    application transport protocols impose their own conventions about
 352 |    line endings.  There are three answers.  The first is that, in
 353 |    retrospect, it would have been better to have those protocols and
 354 |    content types standardized in the way specified here, even though it
 355 |    is certainly too late to change them at this time.  The second is
 356 |    that we have several protocols that are dependent on either the
 357 |    original Telnet design or other arrangements requiring a standard,
 358 |    interoperable, string definition without specific content-labels of
 359 |    one sort or another.  Whois [RFC3912] is an example member of this
 360 |    group.  As consideration is given to upgrading them for non-ASCII
 361 |    use, this specification provides a normative reference that provides
 362 |    the same stability that NVT has provided the ASCII forms.  This
 363 |    specification is intended for use by other specifications that have
 364 |    not yet defined how to use Unicode.  Having a preferred standard
 365 |    Internet definition for Unicode text streams -- rather than just one
 366 |    for transmission codings -- may help improve the specification and
 367 |    interoperability of protocols to be developed in the future.  This
 368 |    specification is not intended for use with specifications that
 369 |    already allow the use of UTF-8 and precisely define that use.
 370 | 
 371 | 5.2.  Unicode Versions and Applicability
 372 | 
 373 |    The IETF faces a practical dilemma with regard to versions of
 374 |    Unicode.  Each new version brings with it new characters and
 375 |    sometimes new combining characters.  Version 5.0 introduces the new
 376 |    concept of sequences of characters named as if they were individual
 377 |    characters (see [NamedSequences]).  The normalization represented by
 378 |    NFC is stable if all strings are transmitted and stored in normalized
 379 |    form if corrections are never made to character definitions or
 380 |    normalization tables and if unassigned code points are never used.
 381 |    The latter is important because an unassigned code point always
 382 |    normalizes to itself.  However, if the same code point is assigned to
 383 |    a character in a future version, it may participate in some other
 384 |    normalization mapping (some specific difficulties in this regard are
 385 |    discussed in [RFC4690]).  It is worth noting that transmission in
 386 |    normalized form is not required by either the IETF's UTF-8 Standard
 387 |    [RFC3629] or by standards dependent on the current version of
 388 |    Stringprep [RFC3454].
 389 | 
 390 | 
 391 | 
 392 | 
 393 | 
 394 | Klensin & Padlipsky         Standards Track                     [Page 7]
 395 | 
 396 | RFC 5198                    Network Unicode                   March 2008
 397 | 
 398 | 
 399 |    All would be well with this as described in Section 4 except for one
 400 |    problem: Applications typically do not perform their own conversions
 401 |    to Unicode and may not perform their own normalizations but instead
 402 |    rely on operating system or language library functions -- functions
 403 |    that may be upgraded or otherwise changed without changes to the
 404 |    application code itself.  Consequently, there may be no plausible way
 405 |    for an application to know which version of Unicode, or which version
 406 |    of the normalization procedures, it is utilizing, nor is there any
 407 |    way by which it can guarantee that the two will be consistent.
 408 | 
 409 |    Because of per-version changes in definitions and tables, Stringprep
 410 |    and documents depending on it are now tied to Unicode Version 3.2
 411 |    [Unicode32] and full interoperability of Internet Standard UTF-8
 412 |    [RFC3629], when used with normalization as specified here, is
 413 |    dependent on normalization definitions and the definition of UTF-8
 414 |    itself not changing after Unicode Version 5.0.  These assumptions
 415 |    seem fairly safe, but they are still assumptions.  Rather than being
 416 |    linked to the latest available version of Unicode, version 5.0
 417 |    [Unicode] or broader concepts of version independence based on
 418 |    specific assumptions and conditions, this specification could
 419 |    reasonably have been tied, like Stringprep and Nameprep to Unicode
 420 |    3.2 [Unicode32] or some more recent intermediate version, but, in
 421 |    addition to the obvious disadvantages of having different IETF
 422 |    standards tied to different versions of Unicode, the library-based
 423 |    application implementation behavior described above makes these
 424 |    version linkages nearly meaningless in practice.
 425 | 
 426 |    In theory, one can get around this problem in four ways:
 427 | 
 428 |    1.  Freeze on a particular version of Unicode and try to insist that
 429 |        applications enforce that version by, e.g., containing lists of
 430 |        unassigned characters and prohibiting their use.  Of course, this
 431 |        would prohibit evolution to include newly-added scripts and the
 432 |        tables of unassigned code points would be cumbersome.
 433 | 
 434 |    2.  Require that every Unicode "text" string or file start with a
 435 |        version indication, somewhat akin to the "byte order mark"
 436 |        indicator.  It is unlikely that this provision would be
 437 |        practical.  More important, it would require that each
 438 |        application implementation be prepared to either support multiple
 439 |        normalization tables and versions or that it reject text from
 440 |        Unicode versions with which it was not prepared to deal.
 441 | 
 442 |    3.  Devise a different set of normalization rules that would, e.g.,
 443 |        guarantee that no character assigned to a previously-unassigned
 444 |        code point in Unicode was ever normalized to anything but itself
 445 |        and use those rules instead of NFC.  It is not clear whether or
 446 |        not such a set of rules is possible or whether some other
 447 | 
 448 | 
 449 | 
 450 | Klensin & Padlipsky         Standards Track                     [Page 8]
 451 | 
 452 | RFC 5198                    Network Unicode                   March 2008
 453 | 
 454 | 
 455 |        completely stable set of rules could be devised, perhaps in
 456 |        combination with restrictions on the ways in which characters
 457 |        were added in future versions of Unicode.
 458 | 
 459 |    4.  Devise a normalization process that is otherwise equivalent to
 460 |        NFC but that rejects code points that are unassigned in the
 461 |        current version of Unicode, rather than mapping those code points
 462 |        to themselves.  This would still leave some risk of incompatible
 463 |        corrections in Unicode and possibly a few edge cases, but it is
 464 |        probably stable enough for Internet use in the overwhelming
 465 |        number of cases.  This process has been discussed in the Unicode
 466 |        Consortium under the name "Stable NFC".
 467 | 
 468 |    None of these approaches seems ideal: the ideal procedure would be as
 469 |    stable and predictable as ASCII has been.  But that level is simply
 470 |    not feasible as long as Unicode continues to evolve by the addition
 471 |    of new code points and scripts.  The fourth option listed above
 472 |    appears to be a reasonable compromise.
 473 | 
 474 | 6.  Security Considerations
 475 | 
 476 |    This specification provides a standard form for the use of Unicode as
 477 |    "network text".  Most of the same security issues that apply to
 478 |    UTF-8, as discussed in [RFC3629], apply to it, although it should be
 479 |    slightly less subject to some risks by virtue of requiring NFC
 480 |    normalization and generally being somewhat more restrictive.
 481 |    However, shifts in Unicode versions, as discussed in Section 5.2, may
 482 |    introduce other security issues.
 483 | 
 484 |    Programs that receive these streams should use extreme caution about
 485 |    assuming that incoming data are normalized, since it might be
 486 |    possible to use unnormalized forms, as well as invalid UTF-8, as part
 487 |    of an attack.  In particular, firewalls and other systems that
 488 |    interpret UTF-8 streams should be developed with the clear knowledge
 489 |    that an attacker may deliberately send unnormalized text, for
 490 |    instance, to avoid detection by naive text-matching systems.
 491 | 
 492 |    NVT contains a requirement, of necessity repeated here (see
 493 |    Section 2), that the CR character be immediately followed by either
 494 |    LF or ASCII NUL (an octet with all bits zero).  NUL may be
 495 |    problematic for some programming languages that use it as a string
 496 |    terminator, and hence a trap for the unwary, unless caution is used.
 497 |    This may be an additional reason to avoid the use of CR entirely,
 498 |    except in sequence with LF, as suggested above.
 499 | 
 500 |    The discussion about Unicode versions above (see Section 4 and
 501 |    Section 5.2) makes several assumptions about future versions of
 502 |    Unicode, about NFC normalization being applied properly, and about
 503 | 
 504 | 
 505 | 
 506 | Klensin & Padlipsky         Standards Track                     [Page 9]
 507 | 
 508 | RFC 5198                    Network Unicode                   March 2008
 509 | 
 510 | 
 511 |    UTF-8 being processed and transmitted exactly as specified in RFC
 512 |    3629.  If any of those assumptions are not correct, then there are
 513 |    cases in which strings that would be considered equivalent do not
 514 |    compare equal.  Robust code should be prepared for those
 515 |    possibilities.
 516 | 
 517 | 7.  Acknowledgments
 518 | 
 519 |    Many thanks to Mark Davis, Martin Duerst, and Michel Suignard for
 520 |    suggestions about Unicode normalization that led to the format
 521 |    described here, and especially to Mark for providing the paragraphs
 522 |    that describe the role of NFC.  Thanks also to Mark, Doug Ewell,
 523 |    Asmus Freytag for corrected text describing Unicode transmission
 524 |    forms, and to Tim Bray, Carsten Bormann, Stephane Bortzmeyer, Martin
 525 |    Duerst, Frank Ellermann, Clive D.W. Feather, Ted Hardie, Bjoern
 526 |    Hoehrmann, Alfred Hoenes, Kent Karlsson, Bill McQuillan, George
 527 |    Michaelson, Chris Newman, and Marcos Sanz for a number of helpful
 528 |    comments and clarification requests.
 529 | 
 530 | 
 531 | 
 532 | 
 533 | 
 534 | 
 535 | 
 536 | 
 537 | 
 538 | 
 539 | 
 540 | 
 541 | 
 542 | 
 543 | 
 544 | 
 545 | 
 546 | 
 547 | 
 548 | 
 549 | 
 550 | 
 551 | 
 552 | 
 553 | 
 554 | 
 555 | 
 556 | 
 557 | 
 558 | 
 559 | 
 560 | 
 561 | 
 562 | Klensin & Padlipsky         Standards Track                    [Page 10]
 563 | 
 564 | RFC 5198                    Network Unicode                   March 2008
 565 | 
 566 | 
 567 | Appendix A.  History and Context
 568 | 
 569 |    This subsection contains a review of prior work in the ARPANET and
 570 |    Internet to establish a standard text type, work that establishes the
 571 |    context and motivation for the approach taken in this document.  The
 572 |    text is explanatory rather than normative: nothing in this section is
 573 |    intended to change or update any current specification.  Those who
 574 |    are uninterested in this review and analysis can safely skip this
 575 |    section.
 576 | 
 577 |    One of the earlier application design decisions made in the
 578 |    development of ARPANET, a decision that was carried forward into the
 579 |    Internet, was the decision to standardize on a single and very
 580 |    specific coding for "text" to be passed across the network [RFC0020].
 581 |    Hosts on the network were then responsible for translating or mapping
 582 |    from whatever character coding conventions were used locally to that
 583 |    common intermediate representation, with sending hosts mapping to it
 584 |    and receiving ones mapping from it to their local forms as needed.
 585 |    It is interesting to note that at the time the ARPANET was being
 586 |    developed, participating host operating systems used at least three
 587 |    different character coding standards: the antiquated BCD (Binary
 588 |    Coded Decimal), the then-dominant major manufacturer-backed EBCDIC
 589 |    (Extended BCD Interchange Code), and the then-still emerging ASCII
 590 |    (American Standard Code for Information Interchange).  Since the
 591 |    ARPANET was an "open" project and EBCDIC was intimately linked to a
 592 |    particular hardware vendor, the original Network Working Group agreed
 593 |    that its standard should be ASCII.  That ASCII form was precisely
 594 |    "7-bit ASCII in an 8-bit field", which was in effect a compromise
 595 |    between hosts that were natively 7-bit oriented (e.g., with five
 596 |    seven-bit characters in a 36-bit word), those that were 8-bit
 597 |    oriented (using eight-bit characters) and those that placed the
 598 |    seven-bit ASCII characters in 9-bit fields with two leading zero bits
 599 |    (four characters in a 36-bit word).
 600 | 
 601 |    More standardization was suggested in the first preliminary
 602 |    description of the Telnet protocol [RFC0097].  With the iterations of
 603 |    that protocol [RFC0137] [RFC0139] and the drawing together of an
 604 |    essentially formal definition somewhat later [RFC0318], a standard
 605 |    abstraction, the Network Virtual Terminal (NVT) was established.  NVT
 606 |    character-coding conventions (initially called "Telnet ASCII" and
 607 |    later called "NVT ASCII", or, more casually, "network ASCII")
 608 |    included the requirement that Carriage Return followed by Line Feed
 609 |    (CRLF) be the common representation for ending lines of text (given
 610 |    that some participating "Host" operating systems used the one
 611 |    natively, some the other, at least one used both, and a few used
 612 |    neither (preferring variable-length lines with counts or special
 613 |    delimiters or markers instead) and specified conventions for some
 614 |    other characters.  Also, since NVT ASCII was restricted to seven-bit
 615 | 
 616 | 
 617 | 
 618 | Klensin & Padlipsky         Standards Track                    [Page 11]
 619 | 
 620 | RFC 5198                    Network Unicode                   March 2008
 621 | 
 622 | 
 623 |    characters, use of the high-order bit in octets was reserved for the
 624 |    transmission of control signaling information.
 625 | 
 626 |    At a very high level, the concept was that a system could use
 627 |    whatever character coding and line representations were appropriate
 628 |    locally, but text transmitted over the network as text must conform
 629 |    to the single "network virtual terminal" convention.  Virtually all
 630 |    early Internet protocols that presume transfer of "text" assume this
 631 |    virtual terminal model, although different ones assume or limit it in
 632 |    different ways.  Telnet, the command stream and ASCII Type in FTP
 633 |    [RFC0542], the message stream in SMTP transfer [RFC2821], and the
 634 |    strings passed to finger [RFC0742] and whois [RFC0954] are the
 635 |    classic examples.  More recently, HTTP [RFC1945] [RFC2616] follows
 636 |    the same general model but permits 8-bit data and leaves the line end
 637 |    sequence unspecified (the latter has been the source of a significant
 638 |    number of problems).
 639 | 
 640 | Appendix B.  The ASCII NVT Definition
 641 | 
 642 |    The main body of this specification is intended as an update to, and
 643 |    internationalized version of, the Net-ASCII definition.  The
 644 |    specification is self-contained in that parts of the Net-ASCII
 645 |    definition that are no longer recommended are not included above.
 646 |    Because Net-ASCII evolved somewhat over time and there has been
 647 |    debate about which specification is the "official" Net-ASCII, it is
 648 |    appropriate to review the key elements of that definition here.  This
 649 |    review is informal with regard to the contents of Net-ASCII and
 650 |    should not be considered as a normative update or summary of the
 651 |    earlier specifications (Section 2 does specify some normative updates
 652 |    to those specifications and some comments below are consistent with
 653 |    it).
 654 | 
 655 |    The first part of the section titled "THE NVT PRINTER AND KEYBOARD"
 656 |    in RFC 854 [RFC0854] is generally, although not universally,
 657 |    considered to be the normative definition of the (ASCII) Network
 658 |    Virtual Terminal and hence of Net-ASCII.  It includes not only the
 659 |    graphic ASCII characters but a number of control characters.  The
 660 |    latter are given Internet-specific meanings that are often more
 661 |    specific than the definitions in the ASCII specification.  In today's
 662 |    usage, and for the present specification, the following
 663 |    clarifications and updates to that list should be noted.  Each one is
 664 |    accompanied by a brief explanation of the reason why the original
 665 |    specification is no longer appropriate.
 666 | 
 667 |    1.  The "defined but not required" codes -- BEL (U+0007), BS
 668 |        (U+0008), HT (U+0009), VT (U+000B), and FF (U+000C) -- and the
 669 |        undefined control codes ("C0") SHOULD NOT be used unless required
 670 |        by exceptional circumstances.  Either their original "network
 671 | 
 672 | 
 673 | 
 674 | Klensin & Padlipsky         Standards Track                    [Page 12]
 675 | 
 676 | RFC 5198                    Network Unicode                   March 2008
 677 | 
 678 | 
 679 |        printer" definitions are no longer in general use, common
 680 |        practice has evolved away from the formats specified there, or
 681 |        their use to simulate characters that are better handled by
 682 |        Unicode is no longer appropriate.  While the appearance of some
 683 |        of these characters on the list may seem surprising, BS now has
 684 |        an ambiguous interpretation in practice (erasing in some systems
 685 |        but not in others), the width associated with HT varies with the
 686 |        environment, and VT and FF do not have a uniform effect with
 687 |        regard to either vertical positioning or the associated
 688 |        horizontal position result.  Of course, telnet escapes are not
 689 |        considered part of the data stream and hence are unaffected by
 690 |        this provision.
 691 | 
 692 |    2.  In Net-ASCII, CR MUST NOT appear except when immediately followed
 693 |        by either NUL or LF, with the latter (CR LF) designating the "new
 694 |        line" function.  Today and as specified above, CR should
 695 |        generally appear only when followed by LF.  Because page layout
 696 |        is better done in other ways, because NUL has a special
 697 |        interpretation in some programming languages, and to avoid other
 698 |        types of confusion, CR NUL should preferably be avoided as
 699 |        specified above.
 700 | 
 701 |    3.  LF CR SHOULD NOT appear except as a side-effect of multiple CR LF
 702 |        sequences (e.g., CR LF CR LF).
 703 | 
 704 |    4.  The historical NVT documents do not call out either "bare LF" (LF
 705 |        without CR) or HT for special treatment.  Both have generally
 706 |        been understood to be problematic.  In the case of LF, there is a
 707 |        difference in interpretation as to whether its semantics imply
 708 |        "go to same position on the next line" or "go to the first
 709 |        position on the next line" and interoperability considerations
 710 |        suggest not depending on which interpretation the receiver
 711 |        applies.  At the same time, misinterpretation of LF is less
 712 |        harmful than misinterpretation of "bare" CR: in the CR case, text
 713 |        may be erased or made completely unreadable; in the LF one, the
 714 |        worst consequence is a very funny-looking display.  Obviously, HT
 715 |        is problematic because there is no standard way to transmit
 716 |        intended tab position or width information in running text.
 717 |        Again, the harm is unlikely to be great if HT is simply
 718 |        interpreted as one or more spaces, but, in general, it cannot be
 719 |        relied upon to format information.
 720 | 
 721 |    It is worth noting that the telnet IAC character (an octet consisting
 722 |    of all ones, i.e., %xFF) itself is not a problem for UTF-8 since that
 723 |    particular octet cannot appear in a valid UTF-8 string.  However,
 724 |    while few of them have been used, telnet permits other command-
 725 |    introducer characters whose bit sequences in an octet may be part of
 726 |    valid UTF-8 characters.  While it causes no ambiguity in UTF-8,
 727 | 
 728 | 
 729 | 
 730 | Klensin & Padlipsky         Standards Track                    [Page 13]
 731 | 
 732 | RFC 5198                    Network Unicode                   March 2008
 733 | 
 734 | 
 735 |    Unicode assigns a graphic character ("Latin Small Letter Y with
 736 |    Diaeresis") to U+00FF (octets C3 B0 in UTF-8).  Some caution is
 737 |    clearly in order in this area.
 738 | 
 739 | Appendix C.  The Line-Ending Problem
 740 | 
 741 |    The definition of how a line ending should be denoted in plain text
 742 |    strings on the wire for the Internet has been controversial from even
 743 |    before the introduction of NVT.  Some have argued that recipients
 744 |    should be required to interpret almost anything that a sender might
 745 |    intend as a line ending as actually a line ending.  Others have
 746 |    pointed out that this would lead to some ambiguities of
 747 |    interpretation and presentation and would violate the principle that
 748 |    we should minimize the number of forms that are permitted on the wire
 749 |    in order to promote interoperability and eliminate the "every
 750 |    recipient needs to understand every sender format" problem.  The
 751 |    design of this specification, like that of NVT, takes the latter
 752 |    approach.  Its designers believe that there is little point in a
 753 |    standard if it is to specify "anyone can do whatever they like and
 754 |    the receiver just needs to cope".
 755 | 
 756 |    A further discussion of the nature and evolution of the line-ending
 757 |    problem appears in Section 5.8 of the Unicode Standard [Unicode] and
 758 |    is suggested for additional reading.  If we were starting with the
 759 |    Internet today, it would probably be sensible to follow the
 760 |    recommendation there and use LS (U+2028) exclusively, in preference
 761 |    to CRLF.  However, the installed base of use of CRLF and the
 762 |    importance of forward compatibility with NVT and protocols that
 763 |    assume it makes that impossible, so it is necessary to continue using
 764 |    CRLF as the "New Line Function" ("NLF", see the terminology section
 765 |    in that reference).
 766 | 
 767 | Appendix D.  A Note about Related Future Work
 768 | 
 769 |    Consideration should be given to a Telnet (or SSH [RFC4251]) option
 770 |    to specify this type of stream and an FTP extension [RFC0959] to
 771 |    permit a new "Unicode text" data TYPE.
 772 | 
 773 | 
 774 | 
 775 | 
 776 | 
 777 | 
 778 | 
 779 | 
 780 | 
 781 | 
 782 | 
 783 | 
 784 | 
 785 | 
 786 | Klensin & Padlipsky         Standards Track                    [Page 14]
 787 | 
 788 | RFC 5198                    Network Unicode                   March 2008
 789 | 
 790 | 
 791 | References
 792 | 
 793 | Normative References
 794 | 
 795 |    [ISO10646]        International Organization for Standardization,
 796 |                      "Information Technology - Universal Multiple-Octet
 797 |                      Coded Character Set (UCS) - Part 1: Architecture
 798 |                      and Basic Multilingual Plane", ISO/
 799 |                      IEC 10646-1:2000, October 2000.
 800 | 
 801 |    [NFC]             Davis, M. and M. Duerst, "Unicode Standard Annex
 802 |                      #15: Unicode Normalization Forms", October 2006,
 803 |                      <http://www.unicode.org/reports/tr15/>.
 804 | 
 805 |    [RFC2119]         Bradner, S., "Key words for use in RFCs to Indicate
 806 |                      Requirement Levels", BCP 14, RFC 2119, March 1997.
 807 | 
 808 |    [RFC3629]         Yergeau, F., "UTF-8, a transformation format of ISO
 809 |                      10646", STD 63, RFC 3629, November 2003.
 810 | 
 811 |    [RFC5234]         Crocker, D. and P. Overell, "Augmented BNF for
 812 |                      Syntax Specifications: ABNF", STD 68, RFC 5234,
 813 |                      January 2008.
 814 | 
 815 |    [Unicode]         The Unicode Consortium, "The Unicode Standard,
 816 |                      Version 5.0", 2007.
 817 | 
 818 |                      Boston, MA, USA: Addison-Wesley.  ISBN
 819 |                      0-321-48091-0
 820 | 
 821 |    [Unicode32]       The Unicode Consortium, "The Unicode Standard,
 822 |                      Version 3.0", 2000.
 823 | 
 824 |                      (Reading, MA, Addison-Wesley, 2000.  ISBN 0-201-
 825 |                      61633-5).  Version 3.2 consists of the definition
 826 |                      in that book as amended by the Unicode Standard
 827 |                      Annex #27: Unicode 3.1
 828 |                      (http://www.unicode.org/reports/tr27/) and by the
 829 |                      Unicode Standard Annex #28: Unicode 3.2
 830 |                      (http://www.unicode.org/reports/tr28/).
 831 | 
 832 | 
 833 | 
 834 | 
 835 | 
 836 | 
 837 | 
 838 | 
 839 | 
 840 | 
 841 | 
 842 | Klensin & Padlipsky         Standards Track                    [Page 15]
 843 | 
 844 | RFC 5198                    Network Unicode                   March 2008
 845 | 
 846 | 
 847 | Informative References
 848 | 
 849 |    [ASCII]           American National Standards Institute (formerly
 850 |                      United States of America Standards Institute), "USA
 851 |                      Code for Information Interchange", ANSI X3.4-1968,
 852 |                      1968.
 853 | 
 854 |                      ANSI X3.4-1968 has been replaced by newer versions
 855 |                      with slight modifications, but the 1968 version
 856 |                      remains definitive for the Internet.  ISO 646
 857 |                      International Reverence Version (IRV)
 858 |                      [ISO.646.1991] is usually considered equivalent to
 859 |                      ASCII.
 860 | 
 861 |    [ISO.646.1991]    International Organization for Standardization,
 862 |                      "Information technology - ISO 7-bit coded character
 863 |                      set for information interchange", ISO Standard 646,
 864 |                      1991.
 865 | 
 866 |    [NamedSequences]  The Unicode Consortium, "NamedSequences-4.1.0.txt",
 867 |                      2005, <http://www.unicode.org/Public/UNIDATA/
 868 |                      NamedSequences.txt>.
 869 | 
 870 |    [RFC0020]         Cerf, V., "ASCII format for network interchange",
 871 |                      RFC 20, October 1969.
 872 | 
 873 |    [RFC0097]         Melvin, J. and R. Watson, "First Cut at a Proposed
 874 |                      Telnet Protocol", RFC 97, February 1971.
 875 | 
 876 |    [RFC0137]         O'Sullivan, T., "Telnet Protocol - a proposed
 877 |                      document", RFC 137, April 1971.
 878 | 
 879 |    [RFC0139]         O'Sullivan, T., "Discussion of Telnet Protocol",
 880 |                      RFC 139, May 1971.
 881 | 
 882 |    [RFC0318]         Postel, J., "Telnet Protocols", RFC 318,
 883 |                      April 1972.
 884 | 
 885 |    [RFC0542]         Neigus, N., "File Transfer Protocol", RFC 542,
 886 |                      August 1973.
 887 | 
 888 |    [RFC0698]         Mock, T., "Telnet extended ASCII option", RFC 698,
 889 |                      July 1975.
 890 | 
 891 |    [RFC0742]         Harrenstien, K., "NAME/FINGER Protocol", RFC 742,
 892 |                      December 1977.
 893 | 
 894 | 
 895 | 
 896 | 
 897 | 
 898 | Klensin & Padlipsky         Standards Track                    [Page 16]
 899 | 
 900 | RFC 5198                    Network Unicode                   March 2008
 901 | 
 902 | 
 903 |    [RFC0854]         Postel, J. and J. Reynolds, "Telnet Protocol
 904 |                      Specification", STD 8, RFC 854, May 1983.
 905 | 
 906 |    [RFC0954]         Harrenstien, K., Stahl, M., and E. Feinler,
 907 |                      "NICNAME/WHOIS", RFC 954, October 1985.
 908 | 
 909 |    [RFC0959]         Postel, J. and J. Reynolds, "File Transfer
 910 |                      Protocol", STD 9, RFC 959, October 1985.
 911 | 
 912 |    [RFC1945]         Berners-Lee, T., Fielding, R., and H. Nielsen,
 913 |                      "Hypertext Transfer Protocol -- HTTP/1.0",
 914 |                      RFC 1945, May 1996.
 915 | 
 916 |    [RFC2277]         Alvestrand, H., "IETF Policy on Character Sets and
 917 |                      Languages", BCP 18, RFC 2277, January 1998.
 918 | 
 919 |    [RFC2616]         Fielding, R., Gettys, J., Mogul, J., Frystyk, H.,
 920 |                      Masinter, L., Leach, P., and T. Berners-Lee,
 921 |                      "Hypertext Transfer Protocol -- HTTP/1.1",
 922 |                      RFC 2616, June 1999.
 923 | 
 924 |    [RFC2781]         Hoffman, P. and F. Yergeau, "UTF-16, an encoding of
 925 |                      ISO 10646", RFC 2781, February 2000.
 926 | 
 927 |    [RFC2821]         Klensin, J., "Simple Mail Transfer Protocol",
 928 |                      RFC 2821, April 2001.
 929 | 
 930 |    [RFC3454]         Hoffman, P. and M. Blanchet, "Preparation of
 931 |                      Internationalized Strings ("stringprep")",
 932 |                      RFC 3454, December 2002.
 933 | 
 934 |    [RFC3491]         Hoffman, P. and M. Blanchet, "Nameprep: A
 935 |                      Stringprep Profile for Internationalized Domain
 936 |                      Names (IDN)", RFC 3491, March 2003.
 937 | 
 938 |    [RFC3912]         Daigle, L., "WHOIS Protocol Specification",
 939 |                      RFC 3912, September 2004.
 940 | 
 941 |    [RFC4251]         Ylonen, T. and C. Lonvick, "The Secure Shell (SSH)
 942 |                      Protocol Architecture", RFC 4251, January 2006.
 943 | 
 944 |    [RFC4690]         Klensin, J., Faltstrom, P., Karp, C., and IAB,
 945 |                      "Review and Recommendations for Internationalized
 946 |                      Domain Names (IDNs)", RFC 4690, September 2006.
 947 | 
 948 | 
 949 | 
 950 | 
 951 | 
 952 | 
 953 | 
 954 | Klensin & Padlipsky         Standards Track                    [Page 17]
 955 | 
 956 | RFC 5198                    Network Unicode                   March 2008
 957 | 
 958 | 
 959 | Authors' Addresses
 960 | 
 961 |    John C Klensin
 962 |    1770 Massachusetts Ave, #322
 963 |    Cambridge, MA  02140
 964 |    USA
 965 | 
 966 |    Phone: +1 617 491 5735
 967 |    EMail: john-ietf@jck.com
 968 | 
 969 | 
 970 |    Michael A. Padlipsky
 971 |    8011 Stewart Ave.
 972 |    Los Angeles, CA  90045
 973 |    USA
 974 | 
 975 |    Phone: +1 310-670-4288
 976 |    EMail: the.map@alum.mit.edu
 977 | 
 978 | 
 979 | 
 980 | 
 981 | 
 982 | 
 983 | 
 984 | 
 985 | 
 986 | 
 987 | 
 988 | 
 989 | 
 990 | 
 991 | 
 992 | 
 993 | 
 994 | 
 995 | 
 996 | 
 997 | 
 998 | 
 999 | 
1000 | 
1001 | 
1002 | 
1003 | 
1004 | 
1005 | 
1006 | 
1007 | 
1008 | 
1009 | 
1010 | Klensin & Padlipsky         Standards Track                    [Page 18]
1011 | 
1012 | RFC 5198                    Network Unicode                   March 2008
1013 | 
1014 | 
1015 | Full Copyright Statement
1016 | 
1017 |    Copyright (C) The IETF Trust (2008).
1018 | 
1019 |    This document is subject to the rights, licenses and restrictions
1020 |    contained in BCP 78, and except as set forth therein, the authors
1021 |    retain all their rights.
1022 | 
1023 |    This document and the information contained herein are provided on an
1024 |    "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS
1025 |    OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY, THE IETF TRUST AND
1026 |    THE INTERNET ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS
1027 |    OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF
1028 |    THE INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED
1029 |    WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
1030 | 
1031 | Intellectual Property
1032 | 
1033 |    The IETF takes no position regarding the validity or scope of any
1034 |    Intellectual Property Rights or other rights that might be claimed to
1035 |    pertain to the implementation or use of the technology described in
1036 |    this document or the extent to which any license under such rights
1037 |    might or might not be available; nor does it represent that it has
1038 |    made any independent effort to identify any such rights.  Information
1039 |    on the procedures with respect to rights in RFC documents can be
1040 |    found in BCP 78 and BCP 79.
1041 | 
1042 |    Copies of IPR disclosures made to the IETF Secretariat and any
1043 |    assurances of licenses to be made available, or the result of an
1044 |    attempt made to obtain a general license or permission for the use of
1045 |    such proprietary rights by implementers or users of this
1046 |    specification can be obtained from the IETF on-line IPR repository at
1047 |    http://www.ietf.org/ipr.
1048 | 
1049 |    The IETF invites any interested party to bring to its attention any
1050 |    copyrights, patents or patent applications, or other proprietary
1051 |    rights that may cover technology that may be required to implement
1052 |    this standard.  Please address the information to the IETF at
1053 |    ietf-ipr@ietf.org.
1054 | 
1055 | 
1056 | 
1057 | 
1058 | 
1059 | 
1060 | 
1061 | 
1062 | 
1063 | 
1064 | 
1065 | 
1066 | Klensin & Padlipsky         Standards Track                    [Page 19]
1067 | 
1068 | 


--------------------------------------------------------------------------------
/rfc/rfc854.txt:
--------------------------------------------------------------------------------
  1 | 
  2 | Network Working Group                                          J. Postel
  3 | Request for Comments: 854                                    J. Reynolds
  4 |                                                                      ISI
  5 | Obsoletes: NIC 18639                                            May 1983
  6 | 
  7 |                      TELNET PROTOCOL SPECIFICATION
  8 | 
  9 | 
 10 | This RFC specifies a standard for the ARPA Internet community.  Hosts on
 11 | the ARPA Internet are expected to adopt and implement this standard.
 12 | 
 13 | INTRODUCTION
 14 | 
 15 |    The purpose of the TELNET Protocol is to provide a fairly general,
 16 |    bi-directional, eight-bit byte oriented communications facility.  Its
 17 |    primary goal is to allow a standard method of interfacing terminal
 18 |    devices and terminal-oriented processes to each other.  It is
 19 |    envisioned that the protocol may also be used for terminal-terminal
 20 |    communication ("linking") and process-process communication
 21 |    (distributed computation).
 22 | 
 23 | GENERAL CONSIDERATIONS
 24 | 
 25 |    A TELNET connection is a Transmission Control Protocol (TCP)
 26 |    connection used to transmit data with interspersed TELNET control
 27 |    information.
 28 | 
 29 |    The TELNET Protocol is built upon three main ideas:  first, the
 30 |    concept of a "Network Virtual Terminal"; second, the principle of
 31 |    negotiated options; and third, a symmetric view of terminals and
 32 |    processes.
 33 | 
 34 |    1.  When a TELNET connection is first established, each end is
 35 |    assumed to originate and terminate at a "Network Virtual Terminal",
 36 |    or NVT.  An NVT is an imaginary device which provides a standard,
 37 |    network-wide, intermediate representation of a canonical terminal.
 38 |    This eliminates the need for "server" and "user" hosts to keep
 39 |    information about the characteristics of each other's terminals and
 40 |    terminal handling conventions.  All hosts, both user and server, map
 41 |    their local device characteristics and conventions so as to appear to
 42 |    be dealing with an NVT over the network, and each can assume a
 43 |    similar mapping by the other party.  The NVT is intended to strike a
 44 |    balance between being overly restricted (not providing hosts a rich
 45 |    enough vocabulary for mapping into their local character sets), and
 46 |    being overly inclusive (penalizing users with modest terminals).
 47 | 
 48 |       NOTE:  The "user" host is the host to which the physical terminal
 49 |       is normally attached, and the "server" host is the host which is
 50 |       normally providing some service.  As an alternate point of view,
 51 | 
 52 | 
 53 | 
 54 | 
 55 | Postel & Reynolds                                               [Page 1]
 56 | 
 57 | 
 58 | 
 59 | RFC 854                                                         May 1983
 60 | 
 61 | 
 62 |       applicable even in terminal-to-terminal or process-to-process
 63 |       communications, the "user" host is the host which initiated the
 64 |       communication.
 65 | 
 66 |    2.  The principle of negotiated options takes cognizance of the fact
 67 |    that many hosts will wish to provide additional services over and
 68 |    above those available within an NVT, and many users will have
 69 |    sophisticated terminals and would like to have elegant, rather than
 70 |    minimal, services.  Independent of, but structured within the TELNET
 71 |    Protocol are various "options" that will be sanctioned and may be
 72 |    used with the "DO, DON'T, WILL, WON'T" structure (discussed below) to
 73 |    allow a user and server to agree to use a more elaborate (or perhaps
 74 |    just different) set of conventions for their TELNET connection.  Such
 75 |    options could include changing the character set, the echo mode, etc.
 76 | 
 77 |    The basic strategy for setting up the use of options is to have
 78 |    either party (or both) initiate a request that some option take
 79 |    effect.  The other party may then either accept or reject the
 80 |    request.  If the request is accepted the option immediately takes
 81 |    effect; if it is rejected the associated aspect of the connection
 82 |    remains as specified for an NVT.  Clearly, a party may always refuse
 83 |    a request to enable, and must never refuse a request to disable some
 84 |    option since all parties must be prepared to support the NVT.
 85 | 
 86 |    The syntax of option negotiation has been set up so that if both
 87 |    parties request an option simultaneously, each will see the other's
 88 |    request as the positive acknowledgment of its own.
 89 | 
 90 |    3.  The symmetry of the negotiation syntax can potentially lead to
 91 |    nonterminating acknowledgment loops -- each party seeing the incoming
 92 |    commands not as acknowledgments but as new requests which must be
 93 |    acknowledged.  To prevent such loops, the following rules prevail:
 94 | 
 95 |       a. Parties may only request a change in option status; i.e., a
 96 |       party may not send out a "request" merely to announce what mode it
 97 |       is in.
 98 | 
 99 |       b. If a party receives what appears to be a request to enter some
100 |       mode it is already in, the request should not be acknowledged.
101 |       This non-response is essential to prevent endless loops in the
102 |       negotiation.  It is required that a response be sent to requests
103 |       for a change of mode -- even if the mode is not changed.
104 | 
105 |       c. Whenever one party sends an option command to a second party,
106 |       whether as a request or an acknowledgment, and use of the option
107 |       will have any effect on the processing of the data being sent from
108 |       the first party to the second, then the command must be inserted
109 |       in the data stream at the point where it is desired that it take
110 | 
111 | 
112 | Postel & Reynolds                                               [Page 2]
113 | 
114 | 
115 | 
116 | RFC 854                                                         May 1983
117 | 
118 | 
119 |       effect.  (It should be noted that some time will elapse between
120 |       the transmission of a request and the receipt of an
121 |       acknowledgment, which may be negative.  Thus, a host may wish to
122 |       buffer data, after requesting an option, until it learns whether
123 |       the request is accepted or rejected, in order to hide the
124 |       "uncertainty period" from the user.)
125 | 
126 |    Option requests are likely to flurry back and forth when a TELNET
127 |    connection is first established, as each party attempts to get the
128 |    best possible service from the other party.  Beyond that, however,
129 |    options can be used to dynamically modify the characteristics of the
130 |    connection to suit changing local conditions.  For example, the NVT,
131 |    as will be explained later, uses a transmission discipline well
132 |    suited to the many "line at a time" applications such as BASIC, but
133 |    poorly suited to the many "character at a time" applications such as
134 |    NLS.  A server might elect to devote the extra processor overhead
135 |    required for a "character at a time" discipline when it was suitable
136 |    for the local process and would negotiate an appropriate option.
137 |    However, rather than then being permanently burdened with the extra
138 |    processing overhead, it could switch (i.e., negotiate) back to NVT
139 |    when the detailed control was no longer necessary.
140 | 
141 |    It is possible for requests initiated by processes to stimulate a
142 |    nonterminating request loop if the process responds to a rejection by
143 |    merely re-requesting the option.  To prevent such loops from
144 |    occurring, rejected requests should not be repeated until something
145 |    changes.  Operationally, this can mean the process is running a
146 |    different program, or the user has given another command, or whatever
147 |    makes sense in the context of the given process and the given option.
148 |    A good rule of thumb is that a re-request should only occur as a
149 |    result of subsequent information from the other end of the connection
150 |    or when demanded by local human intervention.
151 | 
152 |    Option designers should not feel constrained by the somewhat limited
153 |    syntax available for option negotiation.  The intent of the simple
154 |    syntax is to make it easy to have options -- since it is
155 |    correspondingly easy to profess ignorance about them.  If some
156 |    particular option requires a richer negotiation structure than
157 |    possible within "DO, DON'T, WILL, WON'T", the proper tack is to use
158 |    "DO, DON'T, WILL, WON'T" to establish that both parties understand
159 |    the option, and once this is accomplished a more exotic syntax can be
160 |    used freely.  For example, a party might send a request to alter
161 |    (establish) line length.  If it is accepted, then a different syntax
162 |    can be used for actually negotiating the line length -- such a
163 |    "sub-negotiation" might include fields for minimum allowable, maximum
164 |    allowable and desired line lengths.  The important concept is that
165 | 
166 | 
167 | 
168 | 
169 | Postel & Reynolds                                               [Page 3]
170 | 
171 | 
172 | 
173 | RFC 854                                                         May 1983
174 | 
175 | 
176 |    such expanded negotiations should never begin until some prior
177 |    (standard) negotiation has established that both parties are capable
178 |    of parsing the expanded syntax.
179 | 
180 |    In summary, WILL XXX is sent, by either party, to indicate that
181 |    party's desire (offer) to begin performing option XXX, DO XXX and
182 |    DON'T XXX being its positive and negative acknowledgments; similarly,
183 |    DO XXX is sent to indicate a desire (request) that the other party
184 |    (i.e., the recipient of the DO) begin performing option XXX, WILL XXX
185 |    and WON'T XXX being the positive and negative acknowledgments.  Since
186 |    the NVT is what is left when no options are enabled, the DON'T and
187 |    WON'T responses are guaranteed to leave the connection in a state
188 |    which both ends can handle.  Thus, all hosts may implement their
189 |    TELNET processes to be totally unaware of options that are not
190 |    supported, simply returning a rejection to (i.e., refusing) any
191 |    option request that cannot be understood.
192 | 
193 |    As much as possible, the TELNET protocol has been made server-user
194 |    symmetrical so that it easily and naturally covers the user-user
195 |    (linking) and server-server (cooperating processes) cases.  It is
196 |    hoped, but not absolutely required, that options will further this
197 |    intent.  In any case, it is explicitly acknowledged that symmetry is
198 |    an operating principle rather than an ironclad rule.
199 | 
200 |    A companion document, "TELNET Option Specifications," should be
201 |    consulted for information about the procedure for establishing new
202 |    options.
203 | 
204 | THE NETWORK VIRTUAL TERMINAL
205 | 
206 |    The Network Virtual Terminal (NVT) is a bi-directional character
207 |    device.  The NVT has a printer and a keyboard.  The printer responds
208 |    to incoming data and the keyboard produces outgoing data which is
209 |    sent over the TELNET connection and, if "echoes" are desired, to the
210 |    NVT's printer as well.  "Echoes" will not be expected to traverse the
211 |    network (although options exist to enable a "remote" echoing mode of
212 |    operation, no host is required to implement this option).  The code
213 |    set is seven-bit USASCII in an eight-bit field, except as modified
214 |    herein.  Any code conversion and timing considerations are local
215 |    problems and do not affect the NVT.
216 | 
217 |    TRANSMISSION OF DATA
218 | 
219 |       Although a TELNET connection through the network is intrinsically
220 |       full duplex, the NVT is to be viewed as a half-duplex device
221 |       operating in a line-buffered mode.  That is, unless and until
222 | 
223 | 
224 | 
225 | 
226 | Postel & Reynolds                                               [Page 4]
227 | 
228 | 
229 | 
230 | RFC 854                                                         May 1983
231 | 
232 | 
233 |       options are negotiated to the contrary, the following default
234 |       conditions pertain to the transmission of data over the TELNET
235 |       connection:
236 | 
237 |          1)  Insofar as the availability of local buffer space permits,
238 |          data should be accumulated in the host where it is generated
239 |          until a complete line of data is ready for transmission, or
240 |          until some locally-defined explicit signal to transmit occurs.
241 |          This signal could be generated either by a process or by a
242 |          human user.
243 | 
244 |          The motivation for this rule is the high cost, to some hosts,
245 |          of processing network input interrupts, coupled with the
246 |          default NVT specification that "echoes" do not traverse the
247 |          network.  Thus, it is reasonable to buffer some amount of data
248 |          at its source.  Many systems take some processing action at the
249 |          end of each input line (even line printers or card punches
250 |          frequently tend to work this way), so the transmission should
251 |          be triggered at the end of a line.  On the other hand, a user
252 |          or process may sometimes find it necessary or desirable to
253 |          provide data which does not terminate at the end of a line;
254 |          therefore implementers are cautioned to provide methods of
255 |          locally signaling that all buffered data should be transmitted
256 |          immediately.
257 | 
258 |          2)  When a process has completed sending data to an NVT printer
259 |          and has no queued input from the NVT keyboard for further
260 |          processing (i.e., when a process at one end of a TELNET
261 |          connection cannot proceed without input from the other end),
262 |          the process must transmit the TELNET Go Ahead (GA) command.
263 | 
264 |          This rule is not intended to require that the TELNET GA command
265 |          be sent from a terminal at the end of each line, since server
266 |          hosts do not normally require a special signal (in addition to
267 |          end-of-line or other locally-defined characters) in order to
268 |          commence processing.  Rather, the TELNET GA is designed to help
269 |          a user's local host operate a physically half duplex terminal
270 |          which has a "lockable" keyboard such as the IBM 2741.  A
271 |          description of this type of terminal may help to explain the
272 |          proper use of the GA command.
273 | 
274 |          The terminal-computer connection is always under control of
275 |          either the user or the computer.  Neither can unilaterally
276 |          seize control from the other; rather the controlling end must
277 |          relinguish its control explicitly.  At the terminal end, the
278 |          hardware is constructed so as to relinquish control each time
279 |          that a "line" is terminated (i.e., when the "New Line" key is
280 |          typed by the user).  When this occurs, the attached (local)
281 | 
282 | 
283 | Postel & Reynolds                                               [Page 5]
284 | 
285 | 
286 | 
287 | RFC 854                                                         May 1983
288 | 
289 | 
290 |          computer processes the input data, decides if output should be
291 |          generated, and if not returns control to the terminal.  If
292 |          output should be generated, control is retained by the computer
293 |          until all output has been transmitted.
294 | 
295 |          The difficulties of using this type of terminal through the
296 |          network should be obvious.  The "local" computer is no longer
297 |          able to decide whether to retain control after seeing an
298 |          end-of-line signal or not; this decision can only be made by
299 |          the "remote" computer which is processing the data.  Therefore,
300 |          the TELNET GA command provides a mechanism whereby the "remote"
301 |          (server) computer can signal the "local" (user) computer that
302 |          it is time to pass control to the user of the terminal.  It
303 |          should be transmitted at those times, and only at those times,
304 |          when the user should be given control of the terminal.  Note
305 |          that premature transmission of the GA command may result in the
306 |          blocking of output, since the user is likely to assume that the
307 |          transmitting system has paused, and therefore he will fail to
308 |          turn the line around manually.
309 | 
310 |       The foregoing, of course, does not apply to the user-to-server
311 |       direction of communication.  In this direction, GAs may be sent at
312 |       any time, but need not ever be sent.  Also, if the TELNET
313 |       connection is being used for process-to-process communication, GAs
314 |       need not be sent in either direction.  Finally, for
315 |       terminal-to-terminal communication, GAs may be required in
316 |       neither, one, or both directions.  If a host plans to support
317 |       terminal-to-terminal communication it is suggested that the host
318 |       provide the user with a means of manually signaling that it is
319 |       time for a GA to be sent over the TELNET connection; this,
320 |       however, is not a requirement on the implementer of a TELNET
321 |       process.
322 | 
323 |       Note that the symmetry of the TELNET model requires that there is
324 |       an NVT at each end of the TELNET connection, at least
325 |       conceptually.
326 | 
327 |    STANDARD REPRESENTATION OF CONTROL FUNCTIONS
328 | 
329 |       As stated in the Introduction to this document, the primary goal
330 |       of the TELNET protocol is the provision of a standard interfacing
331 |       of terminal devices and terminal-oriented processes through the
332 |       network.  Early experiences with this type of interconnection have
333 |       shown that certain functions are implemented by most servers, but
334 |       that the methods of invoking these functions differ widely.  For a
335 |       human user who interacts with several server systems, these
336 |       differences are highly frustrating.  TELNET, therefore, defines a
337 |       standard representation for five of these functions, as described
338 | 
339 | 
340 | Postel & Reynolds                                               [Page 6]
341 | 
342 | 
343 | 
344 | RFC 854                                                         May 1983
345 | 
346 | 
347 |       below.  These standard representations have standard, but not
348 |       required, meanings (with the exception that the Interrupt Process
349 |       (IP) function may be required by other protocols which use
350 |       TELNET); that is, a system which does not provide the function to
351 |       local users need not provide it to network users and may treat the
352 |       standard representation for the function as a No-operation.  On
353 |       the other hand, a system which does provide the function to a
354 |       local user is obliged to provide the same function to a network
355 |       user who transmits the standard representation for the function.
356 | 
357 |       Interrupt Process (IP)
358 | 
359 |          Many systems provide a function which suspends, interrupts,
360 |          aborts, or terminates the operation of a user process.  This
361 |          function is frequently used when a user believes his process is
362 |          in an unending loop, or when an unwanted process has been
363 |          inadvertently activated.  IP is the standard representation for
364 |          invoking this function.  It should be noted by implementers
365 |          that IP may be required by other protocols which use TELNET,
366 |          and therefore should be implemented if these other protocols
367 |          are to be supported.
368 | 
369 |       Abort Output (AO)
370 | 
371 |          Many systems provide a function which allows a process, which
372 |          is generating output, to run to completion (or to reach the
373 |          same stopping point it would reach if running to completion)
374 |          but without sending the output to the user's terminal.
375 |          Further, this function typically clears any output already
376 |          produced but not yet actually printed (or displayed) on the
377 |          user's terminal.  AO is the standard representation for
378 |          invoking this function.  For example, some subsystem might
379 |          normally accept a user's command, send a long text string to
380 |          the user's terminal in response, and finally signal readiness
381 |          to accept the next command by sending a "prompt" character
382 |          (preceded by <CR><LF>) to the user's terminal.  If the AO were
383 |          received during the transmission of the text string, a
384 |          reasonable implementation would be to suppress the remainder of
385 |          the text string, but transmit the prompt character and the
386 |          preceding <CR><LF>.  (This is possibly in distinction to the
387 |          action which might be taken if an IP were received; the IP
388 |          might cause suppression of the text string and an exit from the
389 |          subsystem.)
390 | 
391 |          It should be noted, by server systems which provide this
392 |          function, that there may be buffers external to the system (in
393 | 
394 | 
395 | 
396 | 
397 | Postel & Reynolds                                               [Page 7]
398 | 
399 | 
400 | 
401 | RFC 854                                                         May 1983
402 | 
403 | 
404 |          the network and the user's local host) which should be cleared;
405 |          the appropriate way to do this is to transmit the "Synch"
406 |          signal (described below) to the user system.
407 | 
408 |       Are You There (AYT)
409 | 
410 |          Many systems provide a function which provides the user with
411 |          some visible (e.g., printable) evidence that the system is
412 |          still up and running.  This function may be invoked by the user
413 |          when the system is unexpectedly "silent" for a long time,
414 |          because of the unanticipated (by the user) length of a
415 |          computation, an unusually heavy system load, etc.  AYT is the
416 |          standard representation for invoking this function.
417 | 
418 |       Erase Character (EC)
419 | 
420 |          Many systems provide a function which deletes the last
421 |          preceding undeleted character or "print position"* from the
422 |          stream of data being supplied by the user.  This function is
423 |          typically used to edit keyboard input when typing mistakes are
424 |          made.  EC is the standard representation for invoking this
425 |          function.
426 | 
427 |             *NOTE:  A "print position" may contain several characters
428 |             which are the result of overstrikes, or of sequences such as
429 |             <char1> BS <char2>...
430 | 
431 |       Erase Line (EL)
432 | 
433 |          Many systems provide a function which deletes all the data in
434 |          the current "line" of input.  This function is typically used
435 |          to edit keyboard input.  EL is the standard representation for
436 |          invoking this function.
437 | 
438 |    THE TELNET "SYNCH" SIGNAL
439 | 
440 |       Most time-sharing systems provide mechanisms which allow a
441 |       terminal user to regain control of a "runaway" process; the IP and
442 |       AO functions described above are examples of these mechanisms.
443 |       Such systems, when used locally, have access to all of the signals
444 |       supplied by the user, whether these are normal characters or
445 |       special "out of band" signals such as those supplied by the
446 |       teletype "BREAK" key or the IBM 2741 "ATTN" key.  This is not
447 |       necessarily true when terminals are connected to the system
448 |       through the network; the network's flow control mechanisms may
449 |       cause such a signal to be buffered elsewhere, for example in the
450 |       user's host.
451 | 
452 | 
453 | 
454 | Postel & Reynolds                                               [Page 8]
455 | 
456 | 
457 | 
458 | RFC 854                                                         May 1983
459 | 
460 | 
461 |       To counter this problem, the TELNET "Synch" mechanism is
462 |       introduced.  A Synch signal consists of a TCP Urgent notification,
463 |       coupled with the TELNET command DATA MARK.  The Urgent
464 |       notification, which is not subject to the flow control pertaining
465 |       to the TELNET connection, is used to invoke special handling of
466 |       the data stream by the process which receives it.  In this mode,
467 |       the data stream is immediately scanned for "interesting" signals
468 |       as defined below, discarding intervening data.  The TELNET command
469 |       DATA MARK (DM) is the synchronizing mark in the data stream which
470 |       indicates that any special signal has already occurred and the
471 |       recipient can return to normal processing of the data stream.
472 | 
473 |          The Synch is sent via the TCP send operation with the Urgent
474 |          flag set and the DM as the last (or only) data octet.
475 | 
476 |       When several Synchs are sent in rapid succession, the Urgent
477 |       notifications may be merged.  It is not possible to count Urgents
478 |       since the number received will be less than or equal the number
479 |       sent.  When in normal mode, a DM is a no operation; when in urgent
480 |       mode, it signals the end of the urgent processing.
481 | 
482 |          If TCP indicates the end of Urgent data before the DM is found,
483 |          TELNET should continue the special handling of the data stream
484 |          until the DM is found.
485 | 
486 |          If TCP indicates more Urgent data after the DM is found, it can
487 |          only be because of a subsequent Synch.  TELNET should continue
488 |          the special handling of the data stream until another DM is
489 |          found.
490 | 
491 |       "Interesting" signals are defined to be:  the TELNET standard
492 |       representations of IP, AO, and AYT (but not EC or EL); the local
493 |       analogs of these standard representations (if any); all other
494 |       TELNET commands; other site-defined signals which can be acted on
495 |       without delaying the scan of the data stream.
496 | 
497 |       Since one effect of the SYNCH mechanism is the discarding of
498 |       essentially all characters (except TELNET commands) between the
499 |       sender of the Synch and its recipient, this mechanism is specified
500 |       as the standard way to clear the data path when that is desired.
501 |       For example, if a user at a terminal causes an AO to be
502 |       transmitted, the server which receives the AO (if it provides that
503 |       function at all) should return a Synch to the user.
504 | 
505 |       Finally, just as the TCP Urgent notification is needed at the
506 |       TELNET level as an out-of-band signal, so other protocols which
507 |       make use of TELNET may require a TELNET command which can be
508 |       viewed as an out-of-band signal at a different level.
509 | 
510 | 
511 | Postel & Reynolds                                               [Page 9]
512 | 
513 | 
514 | 
515 | RFC 854                                                         May 1983
516 | 
517 | 
518 |       By convention the sequence [IP, Synch] is to be used as such a
519 |       signal.  For example, suppose that some other protocol, which uses
520 |       TELNET, defines the character string STOP analogously to the
521 |       TELNET command AO.  Imagine that a user of this protocol wishes a
522 |       server to process the STOP string, but the connection is blocked
523 |       because the server is processing other commands.  The user should
524 |       instruct his system to:
525 | 
526 |          1. Send the TELNET IP character;
527 | 
528 |          2. Send the TELNET SYNC sequence, that is:
529 | 
530 |             Send the Data Mark (DM) as the only character
531 |             in a TCP urgent mode send operation.
532 | 
533 |          3. Send the character string STOP; and
534 | 
535 |          4. Send the other protocol's analog of the TELNET DM, if any.
536 | 
537 |       The user (or process acting on his behalf) must transmit the
538 |       TELNET SYNCH sequence of step 2 above to ensure that the TELNET IP
539 |       gets through to the server's TELNET interpreter.
540 | 
541 |          The Urgent should wake up the TELNET process; the IP should
542 |          wake up the next higher level process.
543 | 
544 |    THE NVT PRINTER AND KEYBOARD
545 | 
546 |       The NVT printer has an unspecified carriage width and page length
547 |       and can produce representations of all 95 USASCII graphics (codes
548 |       32 through 126).  Of the 33 USASCII control codes (0 through 31
549 |       and 127), and the 128 uncovered codes (128 through 255), the
550 |       following have specified meaning to the NVT printer:
551 | 
552 |          NAME                  CODE         MEANING
553 | 
554 |          NULL (NUL)              0      No Operation
555 |          Line Feed (LF)         10      Moves the printer to the
556 |                                         next print line, keeping the
557 |                                         same horizontal position.
558 |          Carriage Return (CR)   13      Moves the printer to the left
559 |                                         margin of the current line.
560 | 
561 | 
562 | 
563 | 
564 | 
565 | 
566 | 
567 | 
568 | Postel & Reynolds                                              [Page 10]
569 | 
570 | 
571 | 
572 | RFC 854                                                         May 1983
573 | 
574 | 
575 |          In addition, the following codes shall have defined, but not
576 |          required, effects on the NVT printer.  Neither end of a TELNET
577 |          connection may assume that the other party will take, or will
578 |          have taken, any particular action upon receipt or transmission
579 |          of these:
580 | 
581 |          BELL (BEL)              7      Produces an audible or
582 |                                         visible signal (which does
583 |                                         NOT move the print head).
584 |          Back Space (BS)         8      Moves the print head one
585 |                                         character position towards
586 |                                         the left margin.
587 |          Horizontal Tab (HT)     9      Moves the printer to the
588 |                                         next horizontal tab stop.
589 |                                         It remains unspecified how
590 |                                         either party determines or
591 |                                         establishes where such tab
592 |                                         stops are located.
593 |          Vertical Tab (VT)       11     Moves the printer to the
594 |                                         next vertical tab stop.  It
595 |                                         remains unspecified how
596 |                                         either party determines or
597 |                                         establishes where such tab
598 |                                         stops are located.
599 |          Form Feed (FF)          12     Moves the printer to the top
600 |                                         of the next page, keeping
601 |                                         the same horizontal position.
602 | 
603 |       All remaining codes do not cause the NVT printer to take any
604 |       action.
605 | 
606 |       The sequence "CR LF", as defined, will cause the NVT to be
607 |       positioned at the left margin of the next print line (as would,
608 |       for example, the sequence "LF CR").  However, many systems and
609 |       terminals do not treat CR and LF independently, and will have to
610 |       go to some effort to simulate their effect.  (For example, some
611 |       terminals do not have a CR independent of the LF, but on such
612 |       terminals it may be possible to simulate a CR by backspacing.)
613 |       Therefore, the sequence "CR LF" must be treated as a single "new
614 |       line" character and used whenever their combined action is
615 |       intended; the sequence "CR NUL" must be used where a carriage
616 |       return alone is actually desired; and the CR character must be
617 |       avoided in other contexts.  This rule gives assurance to systems
618 |       which must decide whether to perform a "new line" function or a
619 |       multiple-backspace that the TELNET stream contains a character
620 |       following a CR that will allow a rational decision.
621 | 
622 |          Note that "CR LF" or "CR NUL" is required in both directions
623 | 
624 | 
625 | Postel & Reynolds                                              [Page 11]
626 | 
627 | 
628 | 
629 | RFC 854                                                         May 1983
630 | 
631 | 
632 |          (in the default ASCII mode), to preserve the symmetry of the
633 |          NVT model.  Even though it may be known in some situations
634 |          (e.g., with remote echo and suppress go ahead options in
635 |          effect) that characters are not being sent to an actual
636 |          printer, nonetheless, for the sake of consistency, the protocol
637 |          requires that a NUL be inserted following a CR not followed by
638 |          a LF in the data stream.  The converse of this is that a NUL
639 |          received in the data stream after a CR (in the absence of
640 |          options negotiations which explicitly specify otherwise) should
641 |          be stripped out prior to applying the NVT to local character
642 |          set mapping.
643 | 
644 |       The NVT keyboard has keys, or key combinations, or key sequences,
645 |       for generating all 128 USASCII codes.  Note that although many
646 |       have no effect on the NVT printer, the NVT keyboard is capable of
647 |       generating them.
648 | 
649 |       In addition to these codes, the NVT keyboard shall be capable of
650 |       generating the following additional codes which, except as noted,
651 |       have defined, but not reguired, meanings.  The actual code
652 |       assignments for these "characters" are in the TELNET Command
653 |       section, because they are viewed as being, in some sense, generic
654 |       and should be available even when the data stream is interpreted
655 |       as being some other character set.
656 | 
657 |       Synch
658 | 
659 |          This key allows the user to clear his data path to the other
660 |          party.  The activation of this key causes a DM (see command
661 |          section) to be sent in the data stream and a TCP Urgent
662 |          notification is associated with it.  The pair DM-Urgent is to
663 |          have required meaning as defined previously.
664 | 
665 |       Break (BRK)
666 | 
667 |          This code is provided because it is a signal outside the
668 |          USASCII set which is currently given local meaning within many
669 |          systems.  It is intended to indicate that the Break Key or the
670 |          Attention Key was hit.  Note, however, that this is intended to
671 |          provide a 129th code for systems which require it, not as a
672 |          synonym for the IP standard representation.
673 | 
674 |       Interrupt Process (IP)
675 | 
676 |          Suspend, interrupt, abort or terminate the process to which the
677 |          NVT is connected.  Also, part of the out-of-band signal for
678 |          other protocols which use TELNET.
679 | 
680 | 
681 | 
682 | Postel & Reynolds                                              [Page 12]
683 | 
684 | 
685 | 
686 | RFC 854                                                         May 1983
687 | 
688 | 
689 |       Abort Output (AO)
690 | 
691 |          Allow the current process to (appear to) run to completion, but
692 |          do not send its output to the user.  Also, send a Synch to the
693 |          user.
694 | 
695 |       Are You There (AYT)
696 | 
697 |          Send back to the NVT some visible (i.e., printable) evidence
698 |          that the AYT was received.
699 | 
700 |       Erase Character (EC)
701 | 
702 |          The recipient should delete the last preceding undeleted
703 |          character or "print position" from the data stream.
704 | 
705 |       Erase Line (EL)
706 | 
707 |          The recipient should delete characters from the data stream
708 |          back to, but not including, the last "CR LF" sequence sent over
709 |          the TELNET connection.
710 | 
711 |       The spirit of these "extra" keys, and also the printer format
712 |       effectors, is that they should represent a natural extension of
713 |       the mapping that already must be done from "NVT" into "local".
714 |       Just as the NVT data byte 68 (104 octal) should be mapped into
715 |       whatever the local code for "uppercase D" is, so the EC character
716 |       should be mapped into whatever the local "Erase Character"
717 |       function is.  Further, just as the mapping for 124 (174 octal) is
718 |       somewhat arbitrary in an environment that has no "vertical bar"
719 |       character, the EL character may have a somewhat arbitrary mapping
720 |       (or none at all) if there is no local "Erase Line" facility.
721 |       Similarly for format effectors:  if the terminal actually does
722 |       have a "Vertical Tab", then the mapping for VT is obvious, and
723 |       only when the terminal does not have a vertical tab should the
724 |       effect of VT be unpredictable.
725 | 
726 | TELNET COMMAND STRUCTURE
727 | 
728 |    All TELNET commands consist of at least a two byte sequence:  the
729 |    "Interpret as Command" (IAC) escape character followed by the code
730 |    for the command.  The commands dealing with option negotiation are
731 |    three byte sequences, the third byte being the code for the option
732 |    referenced.  This format was chosen so that as more comprehensive use
733 |    of the "data space" is made -- by negotiations from the basic NVT, of
734 |    course -- collisions of data bytes with reserved command values will
735 |    be minimized, all such collisions requiring the inconvenience, and
736 | 
737 | 
738 | 
739 | Postel & Reynolds                                              [Page 13]
740 | 
741 | 
742 | 
743 | RFC 854                                                         May 1983
744 | 
745 | 
746 |    inefficiency, of "escaping" the data bytes into the stream.  With the
747 |    current set-up, only the IAC need be doubled to be sent as data, and
748 |    the other 255 codes may be passed transparently.
749 | 
750 |    The following are the defined TELNET commands.  Note that these codes
751 |    and code sequences have the indicated meaning only when immediately
752 |    preceded by an IAC.
753 | 
754 |       NAME               CODE              MEANING
755 | 
756 |       SE                  240    End of subnegotiation parameters.
757 |       NOP                 241    No operation.
758 |       Data Mark           242    The data stream portion of a Synch.
759 |                                  This should always be accompanied
760 |                                  by a TCP Urgent notification.
761 |       Break               243    NVT character BRK.
762 |       Interrupt Process   244    The function IP.
763 |       Abort output        245    The function AO.
764 |       Are You There       246    The function AYT.
765 |       Erase character     247    The function EC.
766 |       Erase Line          248    The function EL.
767 |       Go ahead            249    The GA signal.
768 |       SB                  250    Indicates that what follows is
769 |                                  subnegotiation of the indicated
770 |                                  option.
771 |       WILL (option code)  251    Indicates the desire to begin
772 |                                  performing, or confirmation that
773 |                                  you are now performing, the
774 |                                  indicated option.
775 |       WON'T (option code) 252    Indicates the refusal to perform,
776 |                                  or continue performing, the
777 |                                  indicated option.
778 |       DO (option code)    253    Indicates the request that the
779 |                                  other party perform, or
780 |                                  confirmation that you are expecting
781 |                                  the other party to perform, the
782 |                                  indicated option.
783 |       DON'T (option code) 254    Indicates the demand that the
784 |                                  other party stop performing,
785 |                                  or confirmation that you are no
786 |                                  longer expecting the other party
787 |                                  to perform, the indicated option.
788 |       IAC                 255    Data Byte 255.
789 | 
790 | 
791 | 
792 | 
793 | 
794 | 
795 | 
796 | Postel & Reynolds                                              [Page 14]
797 | 
798 | 
799 | 
800 | RFC 854                                                         May 1983
801 | 
802 | 
803 | CONNECTION ESTABLISHMENT
804 | 
805 |    The TELNET TCP connection is established between the user's port U
806 |    and the server's port L.  The server listens on its well known port L
807 |    for such connections.  Since a TCP connection is full duplex and
808 |    identified by the pair of ports, the server can engage in many
809 |    simultaneous connections involving its port L and different user
810 |    ports U.
811 | 
812 |    Port Assignment
813 | 
814 |       When used for remote user access to service hosts (i.e., remote
815 |       terminal access) this protocol is assigned server port 23
816 |       (27 octal).  That is L=23.
817 | 
818 | 
819 | 
820 | 
821 | 
822 | 
823 | 
824 | 
825 | 
826 | 
827 | 
828 | 
829 | 
830 | 
831 | 
832 | 
833 | 
834 | 
835 | 
836 | 
837 | 
838 | 
839 | 
840 | 
841 | 
842 | 
843 | 
844 | 
845 | 
846 | 
847 | 
848 | 
849 | 
850 | 
851 | 
852 | 
853 | Postel & Reynolds                                              [Page 15]
854 | 
855 | 


--------------------------------------------------------------------------------
/rfc/rfc855.txt:
--------------------------------------------------------------------------------
  1 | Network Working Group                                          J. Postel
  2 | Request for Comments: 855                                    J. Reynolds
  3 |                                                                      ISI
  4 | Obsoletes: NIC 18640                                            May 1983
  5 | 
  6 |                       TELNET OPTION SPECIFICATIONS
  7 | 
  8 | 
  9 | This RFC specifies a standard for the ARPA Internet community.  Hosts on
 10 | the ARPA Internet are expected to adopt and implement this standard.
 11 | 
 12 | The intent of providing for options in the TELNET Protocol is to permit
 13 | hosts to obtain more elegant solutions to the problems of communication
 14 | between dissimilar devices than is possible within the framework
 15 | provided by the Network Virtual Terminal (NVT).  It should be possible
 16 | for hosts to invent, test, or discard options at will.  Nevertheless, it
 17 | is envisioned that options which prove to be generally useful will
 18 | eventually be supported by many hosts; therefore it is desirable that
 19 | options should be carefully documented and well publicized.  In
 20 | addition, it is necessary to insure that a single option code is not
 21 | used for several different options.
 22 | 
 23 | This document specifies a method of option code assignment and standards
 24 | for documentation of options.  The individual responsible for assignment
 25 | of option codes may waive the requirement for complete documentation for
 26 | some cases of experimentation, but in general documentation will be
 27 | required prior to code assignment.  Options will be publicized by
 28 | publishing their documentation as RFCs; inventors of options may, of
 29 | course, publicize them in other ways as well.
 30 | 
 31 |    Option codes will be assigned by:
 32 | 
 33 |       Jonathan B. Postel
 34 |       University of Southern California
 35 |       Information Sciences Institute (USC-ISI)
 36 |       4676 Admiralty Way
 37 |       Marina Del Rey, California 90291
 38 |       (213) 822-1511
 39 | 
 40 |       Mailbox = POSTEL@USC-ISIF
 41 | 
 42 | Documentation of options should contain at least the following sections:
 43 | 
 44 |    Section 1 - Command Name and Option Code
 45 | 
 46 |    Section 2 - Command Meanings
 47 | 
 48 |       The meaning of each possible TELNET command relevant to this
 49 |       option should be described.  Note that for complex options, where
 50 | 
 51 | 
 52 | 
 53 | 
 54 | Postel & Reynolds                                               [Page 1]
 55 | 
 56 | 
 57 | 
 58 | RFC 855                                                         May 1983
 59 | 
 60 | 
 61 |       "subnegotiation" is required, there may be a larger number of
 62 |       possible commands.  The concept of "subnegotiation" is described
 63 |       in more detail below.
 64 | 
 65 |    Section 3 - Default Specification
 66 | 
 67 |       The default assumptions for hosts which do not implement, or use,
 68 |       the option must be described.
 69 | 
 70 |    Section 4 - Motivation
 71 | 
 72 |       A detailed explanation of the motivation for inventing a
 73 |       particular option, or for choosing a particular form for the
 74 |       option, is extremely helpful to those who are not faced (or don't
 75 |       realize that they are faced) by the problem that the option is
 76 |       designed to solve.
 77 | 
 78 |    Section 5 - Description (or Implementation Rules)
 79 | 
 80 |       Merely defining the command meanings and providing a statement of
 81 |       motivation are not always sufficient to insure that two
 82 |       implementations of an option will be able to communicate.
 83 |       Therefore, a more complete description should be furnished in most
 84 |       cases.  This description might take the form of text, a sample
 85 |       implementation, hints to implementers, etc.
 86 | 
 87 | A Note on "Subnegotiation"
 88 | 
 89 |    Some options will require more information to be passed between hosts
 90 |    than a single option code.  For example, any option which requires a
 91 |    parameter is such a case.  The strategy to be used consists of two
 92 |    steps:  first, both parties agree to "discuss" the parameter(s) and,
 93 |    second, the "discussion" takes place.
 94 | 
 95 |    The first step, agreeing to discuss the parameters, takes place in
 96 |    the normal manner; one party proposes use of the option by sending a
 97 |    DO (or WILL) followed by the option code, and the other party accepts
 98 |    by returning a WILL (or DO) followed by the option code.  Once both
 99 |    parties have agreed to use the option, subnegotiation takes place by
100 |    using the command SB, followed by the option code, followed by the
101 |    parameter(s), followed by the command SE.  Each party is presumed to
102 |    be able to parse the parameter(s), since each has indicated that the
103 |    option is supported (via the initial exchange of WILL and DO).  On
104 |    the other hand, the receiver may locate the end of a parameter string
105 |    by searching for the SE command (i.e., the string IAC SE), even if
106 |    the receiver is unable to parse the parameters.  Of course, either
107 |    party may refuse to pursue further subnegotiation at any time by
108 |    sending a WON'T or DON'T to the other party.
109 | 
110 | 
111 | Postel & Reynolds                                               [Page 2]
112 | 
113 | 
114 | 
115 | RFC 855                                                         May 1983
116 | 
117 | 
118 |    Thus, for option "ABC", which requires subnegotiation, the formats of
119 |    the TELNET commands are:
120 | 
121 |       IAC WILL ABC
122 | 
123 |          Offer to use option ABC (or favorable acknowledgment of other
124 |          party's request)
125 | 
126 |       IAC DO ABC
127 | 
128 |          Request for other party to use option ABC (or favorable
129 |          acknowledgment of other party's offer)
130 | 
131 |       IAC SB ABC <parameters> IAC SE
132 | 
133 |          One step of subnegotiation, used by either party.
134 | 
135 |    Designers of options requiring "subnegotiation" must take great care
136 |    to avoid unending loops in the subnegotiation process.  For example,
137 |    if each party can accept any value of a parameter, and both parties
138 |    suggest parameters with different values, then one is likely to have
139 |    an infinite oscillation of "acknowledgments" (where each receiver
140 |    believes it is only acknowledging the new proposals of the other).
141 |    Finally, if parameters in an option "subnegotiation" include a byte
142 |    with a value of 255, it is necessary to double this byte in
143 |    accordance the general TELNET rules.
144 | 
145 | 
146 | 
147 | 
148 | 
149 | 
150 | 
151 | 
152 | 
153 | 
154 | 
155 | 
156 | 
157 | 
158 | 
159 | 
160 | 
161 | 
162 | 
163 | 
164 | 
165 | 
166 | 
167 | 
168 | Postel & Reynolds                                               [Page 3]
169 | 
170 | 


--------------------------------------------------------------------------------
/rfc/rfc856.txt:
--------------------------------------------------------------------------------
  1 | 
  2 | Network Working Group                                          J. Postel
  3 | Request for Comments: 856                                    J. Reynolds
  4 |                                                                      ISI
  5 | Obsoletes: NIC 15389                                            May 1983
  6 | 
  7 |                        TELNET BINARY TRANSMISSION
  8 | 
  9 | 
 10 | This RFC specifies a standard for the ARPA Internet community.  Hosts on
 11 | the ARPA Internet are expected to adopt and implement this standard.
 12 | 
 13 | 1.  Command Name and Code
 14 | 
 15 |    TRANSMIT-BINARY      0
 16 | 
 17 | 2.  Command Meanings
 18 | 
 19 |    IAC WILL TRANSMIT-BINARY
 20 | 
 21 |       The sender of this command REQUESTS permission to begin
 22 |       transmitting, or confirms that it will now begin transmitting
 23 |       characters which are to be interpreted as 8 bits of binary data by
 24 |       the receiver of the data.
 25 | 
 26 |    IAC WON'T TRANSMIT-BINARY
 27 | 
 28 |       If the connection is already being operated in binary transmission
 29 |       mode, the sender of this command DEMANDS to begin transmitting
 30 |       data characters which are to be interpreted as standard NVT ASCII
 31 |       characters by the receiver of the data.  If the connection is not
 32 |       already being operated in binary transmission mode, the sender of
 33 |       this command REFUSES to begin transmitting characters which are to
 34 |       be interpreted as binary characters by the receiver of the data
 35 |       (i.e., the sender of the data demands to continue transmitting
 36 |       characters in its present mode).
 37 | 
 38 |       A connection is being operated in binary transmission mode only
 39 |       when one party has requested it and the other has acknowledged it.
 40 | 
 41 |    IAC DO TRANSMIT-BINARY
 42 | 
 43 |       The sender of this command REQUESTS that the sender of the data
 44 |       start transmitting, or confirms that the sender of data is
 45 |       expected to transmit, characters which are to be interpreted as 8
 46 |       bits of binary data (i.e., by the party sending this command).
 47 | 
 48 |    IAC DON'T TRANSMIT-BINARY
 49 | 
 50 |       If the connection is already being operated in binary transmission
 51 |       mode, the sender of this command DEMANDS that the sender of the
 52 |       data start transmitting characters which are to be interpreted as
 53 | 
 54 | 
 55 | Postel & Reynolds                                               [Page 1]
 56 | 
 57 | 
 58 | 
 59 | RFC 856                                                         May 1983
 60 | 
 61 | 
 62 |       standard NVT ASCII characters by the receiver of the data (i.e.,
 63 |       the party sending this command).  If the connection is not already
 64 |       being operated in binary transmission mode, the sender of this
 65 |       command DEMANDS that the sender of data continue transmitting
 66 |       characters which are to be interpreted in the present mode.
 67 | 
 68 |       A connection is being operated in binary transmission mode only
 69 |       when one party has requested it and the other has acknowledged it.
 70 | 
 71 | 3.  Default
 72 | 
 73 |    WON'T TRANSMIT-BINARY
 74 | 
 75 |    DON'T TRANSMIT-BINARY
 76 | 
 77 |       The connection is not operated in binary mode.
 78 | 
 79 | 4.  Motivation for the Option
 80 | 
 81 |    It is sometimes useful to have available a binary transmission path
 82 |    within TELNET without having to utilize one of the more efficient,
 83 |    higher level protocols providing binary transmission (such as the
 84 |    File Transfer Protocol).  The use of the IAC prefix within the basic
 85 |    TELNET protocol provides the option of binary transmission in a
 86 |    natural way, requiring only the addition of a mechanism by which the
 87 |    parties involved can agree to INTERPRET the characters transmitted
 88 |    over a TELNET connection as binary data.
 89 | 
 90 | 5.  Description of the Option
 91 | 
 92 |    With the binary transmission option in effect, the receiver should
 93 |    interpret characters received from the transmitter which are not
 94 |    preceded with IAC as 8 bit binary data, with the exception of IAC
 95 |    followed by IAC which stands for the 8 bit binary data with the
 96 |    decimal value 255.  IAC followed by an effective TELNET command (plus
 97 |    any additional characters required to complete the command) is still
 98 |    the command even with the binary transmission option in effect.  IAC
 99 |    followed by a character which is not a defined TELNET command has the
100 |    same meaning as IAC followed by NOP, although an IAC followed by an
101 |    undefined command should not normally be sent in this mode.
102 | 
103 | 6.  Implementation Suggestions
104 | 
105 |    It is foreseen that implementations of the binary transmission option
106 |    will choose to refuse some other options (such as the EBCDIC
107 |    transmission option) while the binary transmission option is in
108 | 
109 | 
110 | 
111 | 
112 | Postel & Reynolds                                               [Page 2]
113 | 
114 | 
115 | 
116 | RFC 856                                                         May 1983
117 | 
118 | 
119 |    effect.  However, if a pair of hosts can understand being in binary
120 |    transmission mode simultaneous with being in, for example, echo mode,
121 |    then it is all right if they negotiate that combination.
122 | 
123 |    It should be mentioned that the meanings of WON'T and DON'T are
124 |    dependent upon whether the connection is presently being operated in
125 |    binary mode or not.  Consider a connection operating in, say, EBCDIC
126 |    mode which involves a system which has chosen not to implement any
127 |    knowledge of the binary command.  If this system were to receive a DO
128 |    TRANSMIT-BINARY, it would not recognize the TRANSMIT-BINARY option
129 |    and therefore would return a WON'T TRANSMIT-BINARY.  If the default
130 |    for the WON'T TRANSMIT-BINARY were always NVT ASCII, the sender of
131 |    the DO TRANSMIT-BINARY would expect the recipient to have switched to
132 |    NVT ASCII, whereas the receiver of the DO TRANSMIT-BINARY would not
133 |    make this interpretation.
134 | 
135 |    Thus, we have the rule that when a connection is not presently
136 |    operating in binary mode, the default (i.e., the interpretation of
137 |    WON'T and DON'T) is to continue operating in the current mode,
138 |    whether that is NVT ASCII, EBCDIC, or some other mode.  This rule,
139 |    however, is not applied once a connection is operating in a binary
140 |    mode (as agreed to by both ends); this would require each end of the
141 |    connection to maintain a stack, containing all of the encoding-method
142 |    transitions which had previously occurred on the connection, in order
143 |    to properly interpret a WON'T or DON'T.  Thus, a WON'T or DON'T
144 |    received after the connection is operating in binary mode causes the
145 |    encoding method to revert to NVT ASCII.
146 | 
147 |    It should be remembered that a TELNET connection is a two way
148 |    communication channel.  The binary transmission mode must be
149 |    negotiated separately for each direction of data flow, if that is
150 |    desired.
151 | 
152 |    Implementation of the binary transmission option, as is the case with
153 |    implementations of all other TELNET options, must follow the loop
154 |    preventing rules given in the General Considerations section of the
155 |    TELNET Protocol Specification.
156 | 
157 |    Consider now some issues of binary transmission both to and from
158 |    both a process and a terminal:
159 | 
160 |       a. Binary transmission from a terminal.
161 | 
162 |          The implementer of the binary transmission option should
163 |          consider how (or whether) a terminal transmitting over a TELNET
164 |          connection with binary transmission in effect is allowed to
165 |          generate all eight bit characters, ignoring parity
166 |          considerations, etc., on input from the terminal.
167 | 
168 | 
169 | Postel & Reynolds                                               [Page 3]
170 | 
171 | 
172 | 
173 | RFC 856                                                         May 1983
174 | 
175 | 
176 |       b. Binary transmission to a process.
177 | 
178 |          The implementer of the binary transmission option should
179 |          consider how (or whether) all characters are passed to a
180 |          process receiving over a connection with binary transmission in
181 |          effect.  As an example of the possible problem, TOPS-20
182 |          intercepts certain characters (e.g., ETX, the terminal
183 |          control-C) at monitor level and does not pass them to the
184 |          process.
185 | 
186 |       c. Binary transmission from a process.
187 | 
188 |          The implementer of the binary transmission option should
189 |          consider how (or whether) a process transmitting over a
190 |          connection with binary transmission in effect is allowed to
191 |          send all eight bit characters with no characters intercepted by
192 |          the monitor and changed to other characters.  An example of
193 |          such a conversion may be found in the TOPS-20 system where
194 |          certain non-printing characters are normally converted to a
195 |          Circumflex (up-arrow) followed by a printing character.
196 | 
197 |       d. Binary transmission to a terminal.
198 | 
199 |          The implementer of the binary transmission option should
200 |          consider how (or whether) all characters received over a
201 |          connection with binary transmission in effect are sent to a
202 |          local terminal.  At issue may be the addition of timing
203 |          characters normally inserted locally, parity calculations, and
204 |          any normal code conversion.
205 | 
206 | 
207 | 
208 | 
209 | 
210 | 
211 | 
212 | 
213 | 
214 | 
215 | 
216 | 
217 | 
218 | 
219 | 
220 | 
221 | 
222 | 
223 | 
224 | 
225 | 
226 | Postel & Reynolds                                               [Page 4]
227 | 
228 | 


--------------------------------------------------------------------------------
/rfc/rfc857.txt:
--------------------------------------------------------------------------------
  1 | 
  2 | Network Working Group                                          J. Postel
  3 | Request for Comments: 857                                    J. Reynolds
  4 |                                                                      ISI
  5 | Obsoletes: NIC 15390                                            May 1983
  6 | 
  7 |                            TELNET ECHO OPTION
  8 | 
  9 | 
 10 | This RFC specifies a standard for the ARPA Internet community.  Hosts on
 11 | the ARPA Internet are expected to adopt and implement this standard.
 12 | 
 13 | 1. Command Name and Code
 14 | 
 15 |    ECHO       1
 16 | 
 17 | 2. Command Meanings
 18 | 
 19 |    IAC WILL ECHO
 20 | 
 21 |       The sender of this command REQUESTS to begin, or confirms that it
 22 |       will now begin, echoing data characters it receives over the
 23 |       TELNET connection back to the sender of the data characters.
 24 | 
 25 |    IAC WON'T ECHO
 26 | 
 27 |       The sender of this command DEMANDS to stop, or refuses to start,
 28 |       echoing the data characters it receives over the TELNET connection
 29 |       back to the sender of the data characters.
 30 | 
 31 |    IAC DO ECHO
 32 | 
 33 |       The sender of this command REQUESTS that the receiver of this
 34 |       command begin echoing, or confirms that the receiver of this
 35 |       command is expected to echo, data characters it receives over the
 36 |       TELNET connection back to the sender.
 37 | 
 38 |    IAC DON'T ECHO
 39 | 
 40 |       The sender of this command DEMANDS the receiver of this command
 41 |       stop, or not start, echoing data characters it receives over the
 42 |       TELNET connection.
 43 | 
 44 | 3. Default
 45 | 
 46 |    WON'T ECHO
 47 | 
 48 |    DON'T ECHO
 49 | 
 50 |       No echoing is done over the TELNET connection.
 51 | 
 52 | 4. Motivation for the Option
 53 | 
 54 | 
 55 | Postel & Reynolds                                               [Page 1]
 56 | 
 57 | 
 58 | 
 59 | RFC 857                                                         May 1983
 60 | 
 61 | 
 62 |    The NVT has a printer and a keyboard which are nominally
 63 |    interconnected so that "echoes" need never traverse the network; that
 64 |    is to say, the NVT nominally operates in a mode where characters
 65 |    typed on the keyboard are (by some means) locally turned around and
 66 |    printed on the printer.  In highly interactive situations it is
 67 |    appropriate for the remote process (command language interpreter,
 68 |    etc.) to which the characters are being sent to control the way they
 69 |    are echoed on the printer.  In order to support such interactive
 70 |    situations, it is necessary that there be a TELNET option to allow
 71 |    the parties at the two ends of the TELNET connection to agree that
 72 |    characters typed on an NVT keyboard are to be echoed by the party at
 73 |    the other end of the TELNET connection.
 74 | 
 75 | 5. Description of the Option
 76 | 
 77 |    When the echoing option is in effect, the party at the end performing
 78 |    the echoing is expected to transmit (echo) data characters it
 79 |    receives back to the sender of the data characters.  The option does
 80 |    not require that the characters echoed be exactly the characters
 81 |    received (for example, a number of systems echo the ASCII ESC
 82 |    character with something other than the ESC character).  When the
 83 |    echoing option is not in effect, the receiver of data characters
 84 |    should not echo them back to the sender; this, of course, does not
 85 |    prevent the receiver from responding to data characters received.
 86 | 
 87 |    The normal TELNET connection is two way.  That is, data flows in each
 88 |    direction on the connection independently; and neither, either, or
 89 |    both directions may be operating simultaneously in echo mode.  There
 90 |    are five reasonable modes of operation for echoing on a connection
 91 |    pair:
 92 | 
 93 |       
 94 |                 <----------------
 95 |       
 96 |       Process 1                   Process 2
 97 |                 ---------------->
 98 |                  Neither end echoes
 99 | 
100 |       
101 |                 <----------------
102 |                    \
103 |       Process 1    /              Process 2
104 |                 ---------------->
105 |              One end echoes for itself
106 | 
107 | 
108 | 
109 | 
110 | 
111 | 
112 | Postel & Reynolds                                               [Page 2]
113 | 
114 | 
115 | 
116 | RFC 857                                                         May 1983
117 | 
118 | 
119 |       
120 |                 <----------------
121 |                              \
122 |       Process 1              /    Process 2
123 |                 ---------------->
124 |           One end echoes for the other
125 | 
126 |       
127 |                 <----------------
128 |                    \         /
129 |       Process 1    /         \    Process 2
130 |                 ---------------->
131 |           Both ends echo for themselves
132 | 
133 |       
134 |                 <----------------
135 |                    \ /
136 |       Process 1    / \            Process 2
137 |                 ---------------->
138 |            One end echoes for both ends
139 | 
140 |    This option provides the capability to decide on whether or not
141 |    either end will echo for the other.  It does not, however, provide
142 |    any control over whether or not an end echoes for itself;  this
143 |    decision must be left to the sole discretion of the systems at each
144 |    end (although they may use information regarding the state of
145 |    "remote" echoing negotiations in making this decision).
146 | 
147 |    It should be noted that if BOTH hosts enter the mode of echoing
148 |    characters transmitted by the other host, then any character
149 |    transmitted in either direction will be "echoed" back and forth
150 |    indefinitely.  Therefore, care should be taken in each implementation
151 |    that if one site is echoing, echoing is not permitted to be turned on
152 |    at the other.
153 | 
154 |    As discussed in the TELNET Protocol Specification, both parties to a
155 |    full-duplex TELNET connection initially assume each direction of the
156 |    connection is being operated in the default mode which is non-echo
157 |    (non-echo is not using this option, and the same as DON'T ECHO, WON'T
158 |    ECHO).
159 | 
160 |    If either party desires himself to echo characters to the other party
161 |    or for the other party to echo characters to him, that party gives
162 |    the appropriate command (WILL ECHO or DO ECHO) and waits (and hopes)
163 |    for acceptance of the option.  If the request to operate the
164 |    connection in echo mode is refused, then the connection continues to
165 |    operate in non-echo mode.  If the request to operate the connection
166 |    in echo mode is accepted, the connection is operated in echo mode.
167 | 
168 | 
169 | Postel & Reynolds                                               [Page 3]
170 | 
171 | 
172 | 
173 | RFC 857                                                         May 1983
174 | 
175 | 
176 |    After a connection has been changed to echo mode, either party may
177 |    demand that it revert to non-echo mode by giving the appropriate
178 |    DON'T ECHO or WON'T ECHO command (which the other party must confirm
179 |    thereby allowing the connection to operate in non-echo mode).  Just
180 |    as each direction of the TELNET connection may be put in remote
181 |    echoing mode independently, each direction of the TELNET connection
182 |    must be removed from remote echoing mode separately.
183 | 
184 |    Implementations of the echo option, as implementations of all other
185 |    TELNET options, must follow the loop preventing rules given in the
186 |    General Considerations section of the TELNET Protocol Specification.
187 |    Also, so that switches between echo and non-echo mode can be made
188 |    with minimal confusion (momentary double echoing, etc.), switches in
189 |    mode of operation should be made at times precisely coordinated with
190 |    the reception and transmission of echo requests and demands.  For
191 |    instance, if one party responds to a DO ECHO with a WILL ECHO, all
192 |    data characters received after the DO ECHO should be echoed and the
193 |    WILL ECHO should immediately precede the first of the echoed
194 |    characters.
195 | 
196 |    The echoing option alone will normally not be sufficient to effect
197 |    what is commonly understood to be remote computer echoing of
198 |    characters typed on a terminal keyboard--the SUPPRESS-GO AHEAD option
199 |    will normally have to be invoked in conjunction with the ECHO option
200 |    to effect character-at-a-time remote echoing.
201 | 
202 | 6. A Sample Implementation of the Option
203 | 
204 |    The following is a description of a possible implementation for a
205 |    simple user system called "UHOST".
206 | 
207 |    A possible implementation could be that for each user terminal, the
208 |    UHOST would keep three state bits: whether the terminal echoes for
209 |    itself (UHOST ECHO always) or not (ECHO mode possible), whether the
210 |    (human) user prefers to operate in ECHO mode or in non-ECHO mode, and
211 |    whether the connection from this terminal to the server is in ECHO or
212 |    non-ECHO mode.  We will call these three bits P(hysical), D(esired),
213 |    and A(ctual).
214 | 
215 |    When a terminal dials up the UHOST the P-bit is set appropriately,
216 |    the D-bit is set equal to it, and the A-bit is set to non-ECHO.  The
217 |    P-bit and D-bit may be manually reset by direct commands if the user
218 |    so desires.  For example, a user in Hawaii on a "full-duplex"
219 |    terminal, would choose not to operate in ECHO mode, regardless of the
220 |    preference of a mainland server.  He should direct the UHOST to
221 |    change his D-bit from ECHO to non-ECHO.
222 | 
223 |    When a connection is opened from the UHOST terminal to a server, the
224 | 
225 | 
226 | Postel & Reynolds                                               [Page 4]
227 | 
228 | 
229 | 
230 | RFC 857                                                         May 1983
231 | 
232 | 
233 |    UHOST would send the server a DO ECHO command if the MIN (with
234 |    non-ECHO less than ECHO) of the P- and D-bits is different from the
235 |    A-bit.  If a WON'T ECHO or WILL ECHO arrives from the server, the
236 |    UHOST will set the A-bit to the MIN of the received request, the
237 |    P-bit, and the D-bit.  If this changes the state of the A-bit, the
238 |    UHOST will send off the appropriate acknowledgment; if it does not,
239 |    then the UHOST will send off the appropriate refusal if not changing
240 |    meant that it had to deny the request (i.e., the MIN of the P-and
241 |    D-bits was less than the received A-request).
242 | 
243 |    If while a connection is open, the UHOST terminal user changes either
244 |    the P-bit or D-bit, the UHOST will repeat the above tests and send
245 |    off a DO ECHO or DON'T ECHO, if necessary.  When the connection is
246 |    closed, the UHOST would reset the A-bit to indicate UHOST echoing.
247 | 
248 |    While the UHOST's implementation would not involve DO ECHO or DON'T
249 |    ECHO commands being sent to the server except when the connection is
250 |    opened or the user explicitly changes his echoing mode, bigger hosts
251 |    might invoke such mode switches quite frequently.  For instance,
252 |    while a line-at-a-time system were running, the server might attempt
253 |    to put the user in local echo mode by sending the WON'T ECHO command
254 |    to the user; but while a character-at-a-time system were running, the
255 |    server might attempt to invoke remote echoing for the user by sending
256 |    the WILL ECHO command to the user.  Furthermore, while the UHOST will
257 |    never send a WILL ECHO command and will only send a WON'T ECHO to
258 |    refuse a server sent DO ECHO command, a server host might often send
259 |    the WILL and WON'T ECHO commands.
260 | 
261 | 
262 | 
263 | 
264 | 
265 | 
266 | 
267 | 
268 | 
269 | 
270 | 
271 | 
272 | 
273 | 
274 | 
275 | 
276 | 
277 | 
278 | 
279 | 
280 | 
281 | 
282 | 
283 | Postel & Reynolds                                               [Page 5]
284 | 
285 | 


--------------------------------------------------------------------------------
/src/server.ml:
--------------------------------------------------------------------------------
  1 | open Wire
  2 | 
  3 | type cmd = [ `WILL | `WILL_NOT | `DO | `DO_NOT ]
  4 | 
  5 | let cmd_to_string = function
  6 |   | `WILL -> "will"
  7 |   | `WILL_NOT -> "will not"
  8 |   | `DO -> "do"
  9 |   | `DO_NOT -> "do not"
 10 | 
 11 | type status =
 12 |   | Data
 13 |   | Command
 14 |   | Option of cmd
 15 |   | Suboption
 16 |   | EatSub of int * telnet_option * int list
 17 |   | SubDone
 18 | 
 19 | let status_to_string = function
 20 |   | Data -> "data"
 21 |   | Command -> "cmd"
 22 |   | Option cmd -> "option " ^ cmd_to_string cmd
 23 |   | Suboption -> "sub"
 24 |   | EatSub (n, _, col) -> Printf.sprintf "eatsub %d left, %d collected" n (List.length col)
 25 |   | SubDone -> "sub done"
 26 | 
 27 | type option_state = [
 28 |   | `Requested (* we requested *)
 29 |   | `Denied (* other side denied *)
 30 |   | `Accepted (* other side accepted *)
 31 | ]
 32 | 
 33 | let option_state_to_string = function
 34 |   | `Requested -> "requested"
 35 |   | `Denied -> "denied"
 36 |   | `Accepted -> "accepted"
 37 | 
 38 | type state = {
 39 |   machina : status ;
 40 |   server_config : (cmd * telnet_option) list ;
 41 |   client_options : (option_state * telnet_option) list ;
 42 | }
 43 | 
 44 | let option_to_string (o, t) =
 45 |   Printf.sprintf "%s %s" (option_state_to_string o) (telnet_option_to_string t)
 46 | 
 47 | let state_to_string s =
 48 |   Printf.sprintf "state %s options %s\n"
 49 |     (status_to_string s.machina)
 50 |     (String.concat ", " (List.map option_to_string s.client_options))
 51 | 
 52 | let client_option_state s option =
 53 |   try Some (fst (List.find (fun (_, o) -> o = option) s.client_options))
 54 |   with Not_found -> None
 55 | 
 56 | let emit_cmd cmd =
 57 |   let b = match cmd with
 58 |   | `WILL -> WILL
 59 |   | `WILL_NOT -> WILL_NOT
 60 |   | `DO -> DO
 61 |   | `DO_NOT -> DO_NOT
 62 |   in
 63 |   let c = telnet_command_to_int b in
 64 |   let b = Cstruct.create 2 in
 65 |   Cstruct.set_uint8 b 0 0xFF ;
 66 |   Cstruct.set_uint8 b 1 c ;
 67 |   b
 68 | 
 69 | let emit_option cmd opt =
 70 |   let c = telnet_option_to_int opt in
 71 |   let cs = Cstruct.create 1 in
 72 |   Cstruct.set_uint8 cs 0 c ;
 73 |   Cstruct.concat [ emit_cmd cmd; cs ]
 74 | 
 75 | let handle_option state cmd what =
 76 |   let not_option = List.filter (fun (_, opt) -> opt <> what) state.client_options in
 77 |   let client_options, out = match cmd, client_option_state state what with
 78 |     | `WILL, Some `Requested -> (`Accepted, what) :: not_option, []
 79 |     | `WILL_NOT, Some `Requested -> (`Denied, what) :: not_option, []
 80 |     | `DO_NOT, Some `Requested -> (`Denied, what) :: not_option, []
 81 |     | `DO, _ -> state.client_options, [] (*`Option (`WILL_NOT, what)] XXX depends on state *)
 82 |     | `WILL, _ ->
 83 |       Printf.printf "requested, but won't %s\n%!" (telnet_option_to_string what) ;
 84 |       state.client_options, [`Option (`WILL_NOT, what)]
 85 |     | cmd, _ ->
 86 |       Printf.printf "ignoring unknown request %s %s\n%!" (cmd_to_string cmd) (telnet_option_to_string what) ;
 87 |       state.client_options, []
 88 |   in
 89 |   { state with machina = Data ; client_options }, out
 90 | 
 91 | let handle_sub _state = function
 92 |   | Negotiate_About_Window_Size -> EatSub (4, Negotiate_About_Window_Size, [])
 93 |   | _ -> Data
 94 | 
 95 | let of_list ints =
 96 |   let l = List.length ints in
 97 |   let res = Cstruct.create l in
 98 |   let rec go idx = function
 99 |     | x :: xs -> Cstruct.set_uint8 res idx x ; go (succ idx) xs
100 |     | [] -> ()
101 |   in
102 |   go 0 ints ;
103 |   res
104 | 
105 | let handle_subcommand _state cs = function
106 |   | Negotiate_About_Window_Size ->
107 |     let (width : int) = Cstruct.BE.get_uint16 cs 0
108 |     and (height : int) = Cstruct.BE.get_uint16 cs 2
109 |     in
110 |     [ `Resize (width, height) ]
111 |   | _ -> []
112 | 
113 | let handle_command state =
114 |   let status machina = { state with machina } in
115 |   function
116 |   | IAC -> status Data, [`Data 0xFF]
117 |   | SUBNEG_END -> status Data, []
118 |   | NOP -> status Data, []
119 |   | WILL -> status (Option `WILL), []
120 |   | WILL_NOT -> status (Option `WILL_NOT), []
121 |   | DO -> status (Option `DO), []
122 |   | DO_NOT -> status (Option `DO_NOT), []
123 |   | SUBNEG -> status Suboption, []
124 |   | x ->
125 |     Printf.printf "received %s\n%!" (telnet_command_to_string x) ;
126 |     status Data, [] (* this is incorrect *)
127 | 
128 | let handle_main state data =
129 |   match state.machina, data with
130 |   | Data, 0xFF -> { state with machina = Command }, []
131 |   | Data, c -> state, [`Data c]
132 |   | Command, cmd -> (match int_to_telnet_command cmd with
133 |       | None -> Printf.printf "unknown command %x\n%!" data ; { state with machina = Data }, []
134 |       | Some x -> handle_command state x)
135 |   | Option cmd, data -> (match int_to_telnet_option data with
136 |       | None -> Printf.printf "unknown option %x\n%!" data ; { state with machina = Data }, []
137 |       | Some x -> handle_option state cmd x)
138 |   | Suboption, x -> (match int_to_telnet_option x with
139 |       | None -> Printf.printf "unknown suboption %x\n%!" data ; { state with machina = Data }, []
140 |       | Some x -> let machina = handle_sub state x in { state with machina }, [])
141 |   | EatSub (1, opt, xs), x ->
142 |     let out = handle_subcommand state (of_list (List.rev (x::xs))) opt in
143 |     { state with machina = SubDone }, out
144 |   | EatSub (x, opt, xs), c -> { state with machina = EatSub (pred x, opt, c :: xs) }, []
145 |   | SubDone, 0xFF -> { state with machina = Command }, []
146 |   | SubDone, x -> { state with machina = Data }, [`Data x]
147 | 
148 | let ev_s = function
149 |   | `Data c -> Printf.printf "data %d:" (Cstruct.len c) ; Cstruct.hexdump c
150 |   | `Resize (w, h) -> Printf.printf "resize %d, %d\n%!" w h
151 | 
152 | let handle state buf =
153 |   (* Printf.printf "state in %s" (state_to_string state) ; *)
154 |   let l = Cstruct.len buf in
155 |   let rec go state idx acc =
156 |     if idx >= l then
157 |       state, List.flatten (List.rev acc)
158 |     else
159 |       let x = Cstruct.get_uint8 buf idx in
160 |       let state, outs = handle_main state x in
161 |       go state (succ idx) (outs :: acc)
162 |   in
163 |   let state, out = go state 0 [] in
164 |   let maybe_data e = function
165 |     | [] -> e
166 |     | xs -> `Data (of_list (List.rev xs)) :: e
167 |   in
168 |   let data, options, events = List.fold_left ( fun (d, o, e) ev ->
169 |       match ev with
170 |       | `Option (cmd, x) -> ([], emit_option cmd x :: o, maybe_data e d)
171 |       | `Data x -> (x :: d, o, e)
172 |       | `Resize (w, h) -> ([], o, maybe_data (`Resize (w, h) :: e) d) )
173 |       ([], [], []) out
174 |   in
175 |   let events = maybe_data events data in
176 |   let options = Cstruct.concat (List.rev options) in
177 |   (* Printf.printf "state out %s" (state_to_string state) ; *)
178 |   (state, events, options)
179 | 
180 | let init () =
181 |   let server_config = [
182 |     `DO, Negotiate_About_Window_Size ;
183 |     `DO, Binary_Transmission ;
184 |     `WILL, Binary_Transmission ;
185 |     (* `DO, Remote_Controlled_Trans_and_Echo ; *)
186 |     (* `DO, Telnet_Suppress_Local_Echo ; *)
187 |     `WILL, Echo ;
188 |     (* `DO, Linemode ; *)
189 |     (* `DO_NOT, Echo ; *)
190 |     (*    `DO, Suppress_Go_Ahead ; *)
191 |     (*    `WILL, Suppress_Go_Ahead ; *)
192 |   ] in
193 |   let client_options = List.map (fun (_, o) -> (`Requested, o)) server_config in
194 |   ({ machina = Data ; server_config ; client_options },
195 |    Cstruct.concat (List.map (fun (c, o) -> emit_option c o) server_config))
196 | 
197 | let encode cs =
198 |   let l = Cstruct.len cs in
199 |   let res = Cstruct.create (2 * l) in
200 |   let off = ref 0 in
201 |   for i = 0 to pred l do
202 |     match Cstruct.get_uint8 cs i with
203 |     | 0xFF ->
204 |       Cstruct.BE.set_uint16 res (i + !off) 0xFFFF ;
205 |       incr off
206 |     | x -> Cstruct.set_uint8 res (i + !off) x
207 |   done ;
208 |   Cstruct.sub res 0 (l + !off)
209 | 


--------------------------------------------------------------------------------
/src/telnet.ml:
--------------------------------------------------------------------------------
1 | module Server = Server
2 | 


--------------------------------------------------------------------------------
/src/telnet.mli:
--------------------------------------------------------------------------------
 1 | 
 2 | module Server : sig
 3 |   type state
 4 | 
 5 |   val handle : state -> Cstruct.t ->
 6 |     (state * [ `Data of Cstruct.t | `Resize of int * int ] list * Cstruct.t)
 7 | 
 8 |   val init : unit -> (state * Cstruct.t)
 9 | 
10 |   val encode : Cstruct.t -> Cstruct.t
11 | end
12 | 


--------------------------------------------------------------------------------
/src/telnet.mllib:
--------------------------------------------------------------------------------
1 | Wire
2 | Server
3 | Telnet
4 | 


--------------------------------------------------------------------------------
/src/wire.ml:
--------------------------------------------------------------------------------
 1 | 
 2 | [%%cenum
 3 | type telnet_command =
 4 |   | SUBNEG_END [@id 240] (* SE *)
 5 |   | NOP [@id 241]
 6 |   | DATA_MARK [@id 242] (* The data stream portion of a Synch. This should always be accompanied by a TCP Urgent notification. *)
 7 |   | BREAK [@id 243] (* BRK *)
 8 |   | INTERRUPT_PROCESS [@id 244]
 9 |   | ABORT_OUTPUT [@id 245]
10 |   | ARE_YOU_THERE [@id 246]
11 |   | ERASE_CHARACTER [@id 247]
12 |   | ERASE_LINE [@id 248]
13 |   | GO_AHEAD [@id 249]
14 |   | SUBNEG [@id 250]
15 |   | WILL [@id 251] (* option code *)
16 |   | WILL_NOT [@id 252] (* option code *)
17 |   | DO [@id 253] (* option code *)
18 |   | DO_NOT [@id 254]
19 |   | IAC [@id 255] (* Data Byte 255. *)
20 |   [@@uint8_t] [@@sexp]
21 | ]
22 | 
23 | [%%cenum
24 | type telnet_option =
25 |   | Binary_Transmission
26 |   | Echo
27 |   | Reconnection
28 |   | Suppress_Go_Ahead
29 |   | Approx_Message_Size_Negotiation
30 |   | Status
31 |   | Timing_Mark
32 |   | Remote_Controlled_Trans_and_Echo
33 |   | Output_Line_Width
34 |   | Output_Page_Size
35 |   | Output_Carriage_Return_Disposition
36 |   | Output_Horizontal_Tab_Stops
37 |   | Output_Horizontal_Tab_Disposition
38 |   | Output_Formfeed_Disposition
39 |   | Output_Vertical_Tabstops
40 |   | Output_Vertical_Tab_Disposition
41 |   | Output_Linefeed_Disposition
42 |   | Extended_ASCII
43 |   | Logout
44 |   | Byte_Macro
45 |   | Data_Entry_Terminal
46 |   | SUPDUP
47 |   | SUPDUP_Output
48 |   | Send_Location
49 |   | Terminal_Type
50 |   | End_of_Record
51 |   | TACACS_User_Identification
52 |   | Output_Marking
53 |   | Terminal_Location_Number
54 |   | Telnet_3270_Regime
55 |   | X_3_PAD
56 |   | Negotiate_About_Window_Size
57 |   | Terminal_Speed
58 |   | Remote_Flow_Control
59 |   | Linemode
60 |   | X_Display_Location
61 |   | Environment_Option
62 |   | Authentication_Option
63 |   | Encryption_Option
64 |   | New_Environment_Option
65 |   | TN3270E
66 |   | XAUTH
67 |   | CHARSET
68 |   | Telnet_Remote_Serial_Port
69 |   | Com_Port_Control_Option
70 |   | Telnet_Suppress_Local_Echo
71 |   | Telnet_Start_TLS
72 |   | KERMIT
73 |   | SEND_URL
74 |   | FORWARD_X
75 |   (* 50-137,Unassigned *)
76 |   | TELOPT_PRAGMA_LOGON [@id 138]
77 |   | TELOPT_SSPI_LOGON
78 |   | TELOPT_PRAGMA_HEARTBEAT
79 |   (* 141-254 *)
80 |   | Extended_Options_List [@id 255]
81 |   [@@uint8_t] [@@sexp]
82 | ]
83 | 


--------------------------------------------------------------------------------