From 947e6667063b05f75649984c6b4296b8159b29e7 Mon Sep 17 00:00:00 2001 From: atagen Date: Tue, 26 Nov 2024 14:48:39 +1100 Subject: [PATCH] parser, nix, project setup --- .envrc | 1 + .gitignore | 5 ++ bin/.ocamlformat | 0 bin/dune | 5 ++ bin/main.ml | 5 ++ culr.opam | 31 ++++++++++++ dune-project | 26 ++++++++++ flake.lock | 117 ++++++++++++++++++++++++++++++++++++++++++ flake.nix | 94 ++++++++++++++++++++++++++++++++++ lib/.ocamlformat | 0 lib/ansi.ml | 120 ++++++++++++++++++++++++++++++++++++++++++++ lib/cli.ml | 1 + lib/dune | 5 ++ lib/emit.ml | 1 + lib/parse.ml | 84 +++++++++++++++++++++++++++++++ lib/pipes.ml | 89 ++++++++++++++++++++++++++++++++ lib/types.ml | 13 +++++ nix/culr-module.nix | 31 ++++++++++++ nix/default.nix | 15 ++++++ nix/module.nix | 6 +++ test/dune | 2 + test/test_culr.ml | 0 22 files changed, 651 insertions(+) create mode 100644 .envrc create mode 100644 .gitignore create mode 100644 bin/.ocamlformat create mode 100644 bin/dune create mode 100644 bin/main.ml create mode 100644 culr.opam create mode 100644 dune-project create mode 100644 flake.lock create mode 100644 flake.nix create mode 100644 lib/.ocamlformat create mode 100644 lib/ansi.ml create mode 100644 lib/cli.ml create mode 100644 lib/dune create mode 100644 lib/emit.ml create mode 100644 lib/parse.ml create mode 100644 lib/pipes.ml create mode 100644 lib/types.ml create mode 100644 nix/culr-module.nix create mode 100644 nix/default.nix create mode 100644 nix/module.nix create mode 100644 test/dune create mode 100644 test/test_culr.ml diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..3550a30 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use flake diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f7937c5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +.direnv +_build/ +.vscode +mobydick.txt +_build/ diff --git a/bin/.ocamlformat b/bin/.ocamlformat new file mode 100644 index 0000000..e69de29 diff --git a/bin/dune b/bin/dune new file mode 100644 index 0000000..5d8f690 --- /dev/null +++ b/bin/dune @@ -0,0 +1,5 @@ +(executable + (public_name culr) + (name main) + (modes byte exe) + (libraries culr)) diff --git a/bin/main.ml b/bin/main.ml new file mode 100644 index 0000000..f2020cf --- /dev/null +++ b/bin/main.ml @@ -0,0 +1,5 @@ +open! Culr + + + +let () = Culr.Pipes.read_all () diff --git a/culr.opam b/culr.opam new file mode 100644 index 0000000..b296c11 --- /dev/null +++ b/culr.opam @@ -0,0 +1,31 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +synopsis: "A short synopsis" +description: "A longer description" +maintainer: ["Maintainer Name"] +authors: ["Author Name"] +license: "LICENSE" +tags: ["topics" "to describe" "your" "project"] +homepage: "https://github.com/username/reponame" +doc: "https://url/to/documentation" +bug-reports: "https://github.com/username/reponame/issues" +depends: [ + "ocaml" + "dune" {>= "3.16"} + "odoc" {with-doc} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] +] +dev-repo: "git+https://github.com/username/reponame.git" diff --git a/dune-project b/dune-project new file mode 100644 index 0000000..b8ffa54 --- /dev/null +++ b/dune-project @@ -0,0 +1,26 @@ +(lang dune 3.16) + +(name culr) + +(generate_opam_files true) + +(source + (github username/reponame)) + +(authors "Author Name") + +(maintainers "Maintainer Name") + +(license LICENSE) + +(documentation https://url/to/documentation) + +(package + (name culr) + (synopsis "A short synopsis") + (description "A longer description") + (depends ocaml dune) + (tags + (topics "to describe" your project))) + +; See the complete stanza docs at https://dune.readthedocs.io/en/stable/reference/dune-project/index.html diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..297d893 --- /dev/null +++ b/flake.lock @@ -0,0 +1,117 @@ +{ + "nodes": { + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1726560853, + "narHash": "sha256-X6rJYSESBVr3hBoH0WbKE5KvhPU5bloyZ2L4K60/fPQ=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "c1dfcf08411b08f6b8615f7d8971a2bfa81d5e8a", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "flake-utils_2": { + "inputs": { + "systems": "systems_2" + }, + "locked": { + "lastModified": 1726560853, + "narHash": "sha256-X6rJYSESBVr3hBoH0WbKE5KvhPU5bloyZ2L4K60/fPQ=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "c1dfcf08411b08f6b8615f7d8971a2bfa81d5e8a", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1730359060, + "narHash": "sha256-Hkk0mf4pgvX9Ut0YA397nsFqMLhzFVBdFHc4PhBrxYE=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "e19cfce6f3f08d07653157d8826f5c920c770d7b", + "type": "github" + }, + "original": { + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "e19cfce6f3f08d07653157d8826f5c920c770d7b", + "type": "github" + } + }, + "ocaml-overlay": { + "inputs": { + "flake-utils": "flake-utils_2", + "nixpkgs": "nixpkgs" + }, + "locked": { + "lastModified": 1730414122, + "narHash": "sha256-eAfo1XsQMdKuiOOhqCuai7vpIBH8S4ll7Sm4BP/M58c=", + "owner": "nix-ocaml", + "repo": "nix-overlays", + "rev": "1de1cabdb68cbc667dd48da2f128c2df6d5fe604", + "type": "github" + }, + "original": { + "owner": "nix-ocaml", + "repo": "nix-overlays", + "type": "github" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": [ + "ocaml-overlay", + "nixpkgs" + ], + "ocaml-overlay": "ocaml-overlay" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + }, + "systems_2": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..ed428e3 --- /dev/null +++ b/flake.nix @@ -0,0 +1,94 @@ +{ + inputs = { + # nixpkgs.url = "github:nixOS/nixpkgs"; + nixpkgs.follows = "ocaml-overlay/nixpkgs"; + ocaml-overlay = { + url = "github:nix-ocaml/nix-overlays"; + }; + flake-utils.url = "github:numtide/flake-utils"; + }; + + outputs = { + self, + nixpkgs, + flake-utils, + ocaml-overlay, + }: let + version = builtins.toString self.lastModified; + in + flake-utils.lib.eachDefaultSystem ( + system: let + pkgs = import nixpkgs { + inherit system; + overlays = [ + ocaml-overlay.overlays.default + ]; + extra-substituters = "https://anmonteiro.nix-cache.workers.dev"; + extra-trusted-public-keys = "ocaml.nix-cache.com-1:/xI2h2+56rwFfKyyFVbkJSeGqSIYMC/Je+7XXqGKDIY="; + }; + inherit (pkgs) mkShell ocaml-ng; + inherit + (ocaml-ng.ocamlPackages_5_2) + dune_3 + ocaml + utop + ocaml-lsp + ocamlformat + ocamlformat-rpc-lib + angstrom + angstrom-unix + faraday + ppx_inline_test + ; + minimal = [ + dune_3 + ocaml + angstrom + angstrom-unix + faraday + ppx_inline_test + ]; + dev = [ + utop + ocaml-lsp + ocamlformat + ocamlformat-rpc-lib + ]; + in { + devShells.default = mkShell { + buildInputs = minimal ++ dev; + }; + + packages.default = pkgs.callPackage ./nix/default.nix {ocaml-deps = minimal; inherit version;}; + + defaultPackage = self.packages.${system}.default; + } + ) + // { + nixosModules.culr = import ./nix/module.nix {overlay = self.overlays.culr;}; + + overlays.culr = final: prev: let + inherit + (prev.ocaml-ng) + dune_3 + ocaml + angstrom + angstrom-unix + faraday + ppx_inline_test + ; + ocaml-deps = [ + dune_3 + ocaml + angstrom + angstrom-unix + faraday + ppx_inline_test + ]; + in { + meat = final.callPackage ./nix/default.nix { + inherit ocaml-deps version; + }; + }; + }; +} diff --git a/lib/.ocamlformat b/lib/.ocamlformat new file mode 100644 index 0000000..e69de29 diff --git a/lib/ansi.ml b/lib/ansi.ml new file mode 100644 index 0000000..f167c3b --- /dev/null +++ b/lib/ansi.ml @@ -0,0 +1,120 @@ +open Angstrom +open Types + +let is_digit = function '0' .. '9' -> true | _ -> false +let digit = take_while is_digit >>| int_of_string +let is_semi c = c == ';' +let skip_semi = skip is_semi +let ansi_escape = string "\x1b[" +let ansi_values = sep_by skip_semi digit + +let ansi_sgr = + let valid = + [ + 'm'; + 'A'; + 'B'; + 'C'; + 'D'; + 'E'; + 'F'; + 'G'; + 'H'; + 'J'; + 'K'; + 'S'; + 'T'; + 'F'; + 'i'; + 'n'; + 's'; + 'u'; + 'h'; + 'l'; + ] + in + peek_char >>= function + | Some c -> + if List.mem c valid then advance 1 *> return (Some c) else return None + | None -> return None + +let capture_ansi = both ansi_values ansi_sgr + +let ansi_parse = + ansi_escape *> capture_ansi + +let%test "ansi_parse" = + parse_string ~consume:Angstrom.Consume.Prefix ansi_parse "\x1b[35;23;12m" + |> function + | Ok (d, t) -> d = [ 35; 23; 12 ] && t = Some 'm' + | _ -> false + +type parser_state_colour = ParsingFg | ParsingBg + +type parser_state = + | ParsingRgb of int list * parser_state_colour + | ParsingSimple of parser_state_colour + | ParsingColour of parser_state_colour + | Done + +let parse_ansi_codes = + let regular code parsed = + match code with + | 0 -> (Done, parsed @ [ Reset ]) + | n when n >= 30 && n <= 37 -> + (Done, parsed @ [ Fg (Intrinsic (code - 30)) ]) + | 38 -> (ParsingColour ParsingFg, parsed) + | n when n >= 40 && n <= 47 -> + (Done, parsed @ [ Bg (Intrinsic (code - 40)) ]) + | 48 -> (ParsingColour ParsingBg, parsed) + | _ -> (Done, parsed @ [ Other code ]) + in + let waste_state p = + Other (match p with ParsingFg -> 38 | ParsingBg -> 48) + in + let colour code parsed t = + match code with + | 5 -> (ParsingRgb ([], t), parsed) + | 2 -> (ParsingSimple t, parsed) + | n -> (Done, parsed @ [ waste_state t; Other n ]) + (* malformed, just reproduce it *) + in + let wrap a t = match t with ParsingBg -> Bg a | ParsingFg -> Fg a in + let rgb code parsed (triple : int list) t : parser_state * ansi_gfx list = + match List.length triple with + | 2 -> + let r, g = match triple with [ r; g ] -> (r, g) | _ -> (0, 0) in + (Done, parsed @ [ wrap (RGB (r, g, code)) t ]) + | _ -> (ParsingRgb (triple @ [ code ], t), parsed) + in + fun ((parsing : parser_state), (parsed : ansi_gfx list)) (code : int) : + (parser_state * ansi_gfx list) -> + match parsing with + | Done -> regular code parsed + | ParsingColour t -> colour code parsed t + | ParsingRgb (triple, t) -> rgb code parsed triple t + | ParsingSimple t -> (Done, parsed @ [ wrap (Simple code) t ]) + +let parse_ansi_intermediate (codes, terminator) : chunk = + let (_state, final) : parser_state * ansi_gfx list = + match terminator with + | Some 'm' -> List.fold_left parse_ansi_codes (Done, []) codes + | _ -> (Done, List.map (fun i -> Other i) codes) + in + Ansi final + +let%test "ansi_intermediate_parse" = + let result = + parse_ansi_intermediate ([ 38; 5; 25; 24; 23; 1; 0 ], Some 'm') + in + result = Ansi [ Fg (RGB (25, 24, 23)); Other 1; Reset ] + +let%test "ansi_full" = + let final = + parse_string ~consume:Angstrom.Consume.Prefix ansi_parse + "\x1b[38;5;25;24;23;1;0m" + |> function + | Ok (raw, term) -> parse_ansi_intermediate (raw, term) + | Error e -> failwith ("parsing failed: " ^ e) + in + final = Ansi [ Fg (RGB (25, 24, 23)); Other 1; Reset ] diff --git a/lib/cli.ml b/lib/cli.ml new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/lib/cli.ml @@ -0,0 +1 @@ + diff --git a/lib/dune b/lib/dune new file mode 100644 index 0000000..4487bc2 --- /dev/null +++ b/lib/dune @@ -0,0 +1,5 @@ +(library + (name culr) + (libraries angstrom angstrom-unix faraday ) + (inline_tests) + (preprocess (pps ppx_inline_test))) diff --git a/lib/emit.ml b/lib/emit.ml new file mode 100644 index 0000000..29c89a2 --- /dev/null +++ b/lib/emit.ml @@ -0,0 +1 @@ +open! Faraday diff --git a/lib/parse.ml b/lib/parse.ml new file mode 100644 index 0000000..7a649cd --- /dev/null +++ b/lib/parse.ml @@ -0,0 +1,84 @@ +(* open Ansi *) +open Types +open Angstrom + +let is_sep = function '\x20' | '\x09' -> true | _ -> false +let is_delim = function '\x0a' | '\x0d' -> true | _ -> false +let is_text c = not (is_sep c || is_delim c || c = '\x1b') +let sep = take_while is_sep >>| fun s -> Separator s +let delim = take_while is_delim >>| fun s -> Delimiter s +let text = take_while is_text >>| fun s -> Text s +let ansi = Ansi.ansi_parse >>| Ansi.parse_ansi_intermediate + +let culr_parse = + peek_char_fail >>= function + | n when is_sep n -> sep + | n when is_delim n -> delim + | n when n = '\x1b' -> ansi + | _n -> text + +let debug_print = + let print_colour = + List.fold_left + (fun _acc el -> + match el with + | Fg c -> ( + match c with + | Simple n | Intrinsic n -> + print_endline ("fg simple/intrinsic " ^ string_of_int n) + | RGB (r, g, b) -> + print_endline + ("fg rgb: " + ^ List.fold_left + (fun acc el -> acc ^ ", " ^ string_of_int el) + "" [ r; g; b ])) + | Bg c -> ( + match c with + | Simple n | Intrinsic n -> + print_endline ("bg simple/intrinsic " ^ string_of_int n) + | RGB (r, g, b) -> + print_endline + ("bg rgb: " + ^ List.fold_left + (fun acc el -> acc ^ ", " ^ string_of_int el) + "" [ r; g; b ])) + | Reset -> print_endline "ansi reset" + | Other n -> print_endline ("other ansi: " ^ string_of_int n)) + () + in + function + | Text s | Separator s | Delimiter s -> print_endline ("parsed '" ^ s ^ "'") + | Ansi a -> print_colour a + +let%test "sep_parse" = + Angstrom.parse_string ~consume:Consume.All sep " " |> function + | Ok (Separator " ") -> true + | _ -> false + +let%test "delim_parse" = + Angstrom.parse_string ~consume:Consume.All delim "\n\n\n" |> function + | Ok (Delimiter "\n\n\n") -> true + | _ -> false + +let%test "text_parse" = + Angstrom.parse_string ~consume:Consume.All text "okokyeahok" |> function + | Ok (Text "okokyeahok") -> true + | _ -> false + +let%test "culr_parse" = + Angstrom.parse_string ~consume:Consume.Prefix (many1 culr_parse) + "sometext \n\n\n \x1b[38;2;2m" + |> function + | Ok + [ + Text "sometext"; + Separator " "; + Delimiter "\n\n\n"; + Separator " "; + Ansi [ Fg (Simple 2) ]; + ] -> + true + | Ok n -> + n |> List.fold_left (fun _acc el -> el |> debug_print) (); + false + | _ -> false diff --git a/lib/pipes.ml b/lib/pipes.ml new file mode 100644 index 0000000..22bcfbe --- /dev/null +++ b/lib/pipes.ml @@ -0,0 +1,89 @@ +open Angstrom.Buffered +open Parse + +let print_buf buf = print_endline ("buffer has " ^ Bigstringaf.to_string buf) + +(* let read_all () = + let parser = Angstrom.Unbuffered.parse culr_parse + and buf = Bigstringaf.create 4096 in + In_channel.set_binary_mode stdin true; + print_endline "doing it"; + let rec loop parser = + parser |> function + (* START CONDITION *) + | Partial { committed = 0; continue } -> ( + match In_channel.input_bigarray stdin buf 0 4096 with + | 0 -> + print_endline "got 0 end"; + raise End_of_file + | _ -> + print_buf buf; + print_endline "got partial 0"; + loop (continue buf ~off:0 ~len:4096 Incomplete) (* CONTINUE *)) + | Partial { continue; _ } -> ( + match In_channel.input_bigarray stdin buf 0 4096 with + | 0 -> + print_endline "got + end"; + raise End_of_file + | _ -> + print_buf buf; + print_endline "got partial +"; + loop (continue buf ~off:0 ~len:4096 Incomplete)) + | Done (_, _) -> + print_endline "got done"; + raise End_of_file + | Fail (_, _, _) -> print_endline "got fail" + in + try loop parser + with End_of_file -> ( + print_endline "eof"; + parser |> state_to_option |> function + | Some chunks -> + print_endline "chunks:"; + List.iter (fun x -> debug_print x) chunks + | None -> + print_endline "no chunks"; + ()) *) + +(* let read_all_3 () = + let parser = parse ~initial_buffer_size:4096 culr_parse + and buf = Bigstringaf.create 4096 in + In_channel.set_binary_mode stdin true; + print_endline "doing it"; + let rec loop parser = + Bigarray.Array1.fill buf '\x00'; + match In_channel.input_bigarray stdin buf 0 4096 with + | 0 -> ( + print_endline "got eof"; + parser + |> (function Partial cont -> cont `Eof | _ -> parser) + |> state_to_option + |> function + | Some chunks -> List.iter debug_print chunks + | None -> print_endline "nothing") + | _ -> ( + print_endline "got buf:"; + print_buf buf; + parser |> function + | Partial cont -> loop (cont (`Bigstring buf)) + | Done (_, chunks) -> + print_endline "got done"; + List.iter debug_print chunks + | Fail (_, _, _) -> print_endline "got fail") + in + loop parser *) + +(* let rec loop state = + state |> function + | Partial {committed; continue} -> continue + | Done(_,_) -> () + | Fail (_,_,_) -> () *) + +let read_all () = + In_channel.set_binary_mode stdin true; + Angstrom_unix.parse_many ~buf_size:4096 culr_parse (fun c -> debug_print c) In_channel.stdin + |> function + | unparsed, Ok _ -> + print_endline ("didn't parse this much: " ^ string_of_int unparsed.len ^ "\nunparsed:\n" ^ (Bigstringaf.substring unparsed.buf ~off:unparsed.off ~len:unparsed.len) ); + () + | _ -> () diff --git a/lib/types.ml b/lib/types.ml new file mode 100644 index 0000000..cec2fdd --- /dev/null +++ b/lib/types.ml @@ -0,0 +1,13 @@ + +type colour = + | Intrinsic of int (* _0-_7 *) + | Simple of int (* _8;5;n *) + | RGB of int * int * int (* _8;2;r;g;b *) + +type ansi_gfx = Bg of colour | Fg of colour | Reset | Other of int + +type chunk = + | Text of string + | Separator of string + | Delimiter of string + | Ansi of ansi_gfx list diff --git a/nix/culr-module.nix b/nix/culr-module.nix new file mode 100644 index 0000000..9fcbfc3 --- /dev/null +++ b/nix/culr-module.nix @@ -0,0 +1,31 @@ +{ + pkgs, + lib, + config, + ... +}: let + inherit (lib) mkIf mkEnableOption mkOption types; + cfg = config.programs.culr; +in { + options.programs.meat = { + enable = mkEnableOption "culr"; + pattern = mkOption { + type = with types; nullOr str; + default = null; + description = "colourising pattern"; + }; + palette = mkOption { + type = with types; nullOr str; + default = null; + description = "palette to use in comma separated RGB hex eg. #0f0f0f"; + }; + }; + + config = mkIf cfg.enable { + environment.systemPackages = let inherit (pkgs) culr; in [culr]; + environment.sessionVariables = { + CULR_PATTERN = mkIf cfg.pattern cfg.pattern; + CULR_PALETTE = mkIf cfg.palette cfg.palette; + }; + }; +} diff --git a/nix/default.nix b/nix/default.nix new file mode 100644 index 0000000..aa2db52 --- /dev/null +++ b/nix/default.nix @@ -0,0 +1,15 @@ +{ + ocaml-deps, + ocamlPackages, + version, + ... +}: +ocamlPackages.buildDunePackage { + pname = "culr"; + version = "0.1-${version}"; + + minimalOCamlVersion = "5.2"; + + src = ./..; + buildInputs = ocaml-deps; +} diff --git a/nix/module.nix b/nix/module.nix new file mode 100644 index 0000000..991afdb --- /dev/null +++ b/nix/module.nix @@ -0,0 +1,6 @@ +{ + overlay +}: { + imports = [./culr-module.nix]; + nixpkgs.overlays = overlay; +} diff --git a/test/dune b/test/dune new file mode 100644 index 0000000..497db94 --- /dev/null +++ b/test/dune @@ -0,0 +1,2 @@ +(test + (name test_culr)) diff --git a/test/test_culr.ml b/test/test_culr.ml new file mode 100644 index 0000000..e69de29