From 106f8e76e41ae7093b198feeb278e347c53a58c4 Mon Sep 17 00:00:00 2001 From: ArthurW Date: Tue, 16 Jan 2024 20:34:56 +0100 Subject: [PATCH] performances and bugs fix thanks to voodoo testing --- README.md | 191 +++-------- cli/dune | 1 + cli/search.ml | 47 ++- db/entry.ml | 159 ++++----- db/entry.mli | 13 - db/string_automata.ml | 95 +++--- db/string_automata.mli | 10 +- db/type_polarity.ml | 6 +- db/type_polarity.mli | 5 +- dune-project | 19 +- dune-workspace | 3 + index/db_writer.ml | 47 ++- index/db_writer.mli | 3 +- index/index.ml | 28 +- index/load_doc.ml | 29 +- index/suffix_tree.ml | 134 ++++++-- index/suffix_tree.mli | 2 +- jsoo/dune | 8 - jsoo/main.ml | 18 +- query/dune | 2 +- query/dynamic_cost.ml | 6 +- query/io.ml | 40 +++ query/name_cost.ml | 4 +- query/priority_queue.ml | 83 +++-- query/priority_queue.mli | 4 +- query/query.ml | 52 +-- query/query.mli | 48 +-- query/query_parser.ml | 9 +- query/succ.ml | 178 +++++----- query/succ.mli | 1 + query/top_results.ml | 36 ++- query/top_results.mli | 10 +- query/type_distance.ml | 222 ++++++------- query/type_distance.mli | 8 +- sherlodoc.opam | 17 +- store/db_store.default.ml | 4 +- test/cram/base_cli.t | 502 ++++++++++++++--------------- test/cram/base_web.t | 8 +- test/cram/cli.t/run.t | 38 +-- test/cram/cli_small.t/run.t | 10 +- test/cram/module_type_cost.t/run.t | 10 +- test/cram/simple.t/run.t | 2 +- www/dune | 3 +- www/static/style.css | 42 ++- www/ui.ml | 26 +- www/www.ml | 15 +- 46 files changed, 1200 insertions(+), 998 deletions(-) create mode 100644 dune-workspace create mode 100644 query/io.ml diff --git a/README.md b/README.md index d01fd458..240aaca1 100644 --- a/README.md +++ b/README.md @@ -1,182 +1,91 @@ **Try it online at [doc.sherlocode.com](https://doc.sherlocode.com) !** -A Hoogle-like search engine for OCaml documentation. It can be used in -differents ways, [online](https://doc.sherlocode.com), or offline with -the dev version of odoc. +Sherlodoc is a search engine for OCaml documentation (inspired by [Hoogle](https://hoogle.haskell.org/)), which allows you to search through OCaml libraries by names and approximate type signatures: -It has fuzzy type search supported by a polarity search. As an example, the type -`string -> int -> char` gets simplified to `{ -string, -int, +char }` which -means that it consumes a `string` and an `int` and produces a `char` -(irrespective of the order of the arguments). This polarity search is fast -enough and yields good candidates which are then sorted by similarity with the -query. The sort is slower but the number of candidates is small. +- Search by name: [`list map`](https://doc.sherlocode.com/?q=list%20map) +- Search inside documentation comments: [`raise Not_found`](https://doc.sherlocode.com/?q=raise%20Not_found) +- Fuzzy type search is introduced with a colon, e.g. [`: map -> list`](https://doc.sherlocode.com/?q=%3A%20map%20-%3E%20list) +- Search by name and type with a colon separator [`Bogue : Button.t`](https://doc.sherlocode.com/?q=Bogue%20%3A%20Button.t) +- An underscore `_` can be used as a wildcard in type queries: [`(int -> _) -> list -> _`](https://doc.sherlocode.com/?q=(int%20-%3E%20_)%20-%3E%20list%20-%3E%20_) +- Type search supports products and reordering of function arguments: [`array -> ('a * int -> bool) -> array`](https://doc.sherlocode.com/?q=%3A%20array%20-%3E%20(%27a%20*%20int%20-%3E%20bool)%20-%3E%20array) -You can search for anything that can exists in an MLI files : values, types, -modules, exceptions, constructors etc... +## Local usage -Fuzzy type search is available for values, sum-types constructors, exceptions, -and record fields. - -# Usage - -First, install sherlodoc : +First, install sherlodoc and odig: ```bash -opam pin add https://github.com/art-w/sherlodoc.git#jsoo -opam install sherlodoc -``` +$ opam pin add 'https://github.com/art-w/sherlodoc.git' # optional -## Generating a search-database - -The first step to using sherlodoc is generating a search-database. You do this -with the command `sherlodoc index` : - -```bash -sherlodoc index --format=marshal -o db.marshal a.odocl b.odocl +$ opam install sherlodoc odig ``` -The `--format` option determines in which format the database is outputted. The -available format are `marshal`, `js`. The `js` format, for -javascript, is the one compatible with odoc, and the `marshal` for most other -uses. - -There is a third format : `ancient`, that is only available if the package - `ancient` is installed. It is more complicated than the other two, you can read -on it [here](https://github.com/UnixJunkie/ocaml-ancient). It is used for the -[online](https://doc.sherlocode.com) version of sherlodoc, and is an optional -dependency of the `sherlodoc` package. - -The `-o` option is the filename of the output. - -Then you need to provide a list of .odocl files that contains the signatures -items that are going to be searchable. They are build artifacts of odoc. - -There are others options that are documented by `sherlodoc index --help`. - -## Queries +[Odig](https://erratique.ch/software/odig) can generate the odoc documentation of your current switch with: -To query sherlodoc, be it on the command-line or in a web interface, you need -to input a string query. A query is a list of words, separated by spaces. -Results will be entries that have every word of the list present in them. - -``` -"list map" -``` - -The above query will return entries that have both `list` and `map` in them. - -You can also add `: ` at the end of your query, and in that case, results -will only be results whose type match . This can only be a value, an -exception, a constructor or a record field. - -Matching a type is fuzzy, if you do the following query : - -``` -"blabla : string" +```bash +$ odig odoc # followed by `odig doc` to browse your switch documentation ``` -It could return `val blablabla : int -> string` and `val blabla2 : string`. +Which sherlodoc can then index to create a search database: -You can have just the type-part of the query : `": string -> int"` is a valid -query. +```bash +# name your sherlodoc database +$ export SHERLODOC_DB=/tmp/sherlodoc.marshal -You can use wildcards : +# if you are using OCaml 4, we recommend the `ancient` database format: +$ opam install ancient +$ export SHERLODOC_DB=/tmp/sherlodoc.ancient +# index all odoc files generated by odig for your current switch: +$ sherlodoc index $(find $OPAM_SWITCH_PREFIX/var/cache/odig/odoc -name '*.odocl') ``` -": string -> _" -``` - -will only return functions that take a string a argument, no matter what they -return. - -There is limited support for polymorphism : you cannot search for `'a -> 'a` and -get every function `int -> int`, `string -> string` etc. However it will return -a function whose literal type is `'a -> 'a`. Having the first behaviour would -be a lot harder to program, and probably not a good idea, as it would be -impossible to search for polymorphic functions. -## Searching on the command line - -If you have a search database in `marshal` format, you can search on the command -line : +Enjoy searching from the command-line or run the webserver: ```bash -sherlodoc --db=db.marshal "blabla : int -> string" -``` - -`--db` is the filename of the search database. If absent, the environment -variable `SHERLODOC_DB` will be used instead. +$ sherlodoc search "map : list" +$ sherlodoc search # interactice cli -In my example, I gave a query, but if you give none, sherlodoc enter an -interactive mode where you can enter queries until you decide to quit. +$ opam install dream +$ sherlodoc serve # webserver at http://localhost:1234 +``` -There are more option documented by `sherlodoc --help`, some of them are for -debugging/testing purposes, others might be useful. +The different commands support a `--help` argument for more details/options. -### Search your switch +In particular, sherlodoc supports three different file formats for its database, which can be specified either in the filename extension or through the `--db-format=` flag: +- `ancient` for fast database loading using mmap, but is only compatible with OCaml 4. +- `marshal` for when ancient is unavailable, with slower database opening. +- `js` for integration with odoc static html documentation for client-side search without a server. -A reasonable use of sherlodoc on the cli is to search for signatures items from -your whole switch. Since odig can generate the documentation of the switch, we -can get the .odocl files with it : +## Integration with Odoc -Generate the documentation of your switch : +Odoc 2.4.0 adds a search bar inside the statically generated html documentation. [Integration with dune is in progress](https://github.com/ocaml/dune/pull/9772), you can try it inside a fresh opam switch with: (warning! this will recompile any installed package that depends on dune!) ```bash -odig odoc -``` - -Generate the search database : +$ opam pin https://github.com/emileTrotignon/dune.git#search-odoc-new -```bash -sherlodoc index --format=marshal -o db.marshal $(find $OPAM_SWITCH_PREFIX/var/cache/odig/odoc -name "*.odocl") +$ dune build @doc # in your favorite project ``` -Enjoy searching : +Otherwise, manual integration with odoc requires to add to every call of `odoc html-generate` the flags `--search-uri sherlodoc.js --search-uri db.js` to activate the search bar. You'll also need to generate a search database `db.js` and provide the `sherlodoc.js` dependency (a version of the sherlodoc search engine with odoc support, compiled to javascript): ```bash -sherlodoc search --db=db.marshal -``` +$ sherlodoc index --db=_build/default/_doc/_html/YOUR_LIB/db.js \ + $(find _build/default/_doc/_odocls/YOUR_LIB -name '*.odocl') -## Searching from an odoc search bar - -The latest unreleased version of odoc is compatible with sherlodoc. This allows -you to upload the documentation of a package with a search for this package -embedded. - -For this to work, you need to generate a search database with format `js`, and -then add to every call of `odoc html-generate` the flags `--search-uri -sherlodoc.js --search-uri db.js`. - -Be sure to copy the two js files in the output directory given to the -html-generate command : - -```bash -sherlodoc js html_output/sherlodoc.js ; -cp db.js html_output/db.js ; +$ sherlodoc js > _build/default/_doc/_html/sherlodoc.js ``` -Obviously, most people use dune, and do not call `odoc html-generate`. A patch -for dune is being [worked on](https://github.com/emileTrotignon/dune/tree/search-odoc-new). -If you want to, you can test it, it should work. It is still work in progress. +## How it works -## Sherlodoc online +The sherlodoc database uses [Suffix Trees](https://en.wikipedia.org/wiki/Suffix_tree) to search for substrings in value names, documentation and types. During indexation, the suffix trees are compressed to state machine automatas. The children of every node are also sorted, such that a sub-tree can be used as a priority queue during search enumeration. -If you want to use sherlodoc as a server, like on -[doc.sherlocode.com](https://doc.sherlocode.com) it is also possible. +To rank the search results, sherlodoc computes a static evaluation of each candidate during indexation. This static scoring biases the search to favor short names, short types, the presence of documentation, etc. When searching, a dynamic evaluation dependent on the user query is used to adjust the static ordering of the results: -As usual, generate your search database : +- How similar is the result name to the search query? (to e.g. prefer results which respect the case: [`map`](https://doc.sherlocode.com/?q=map) vs [`Map`](https://doc.sherlocode.com/?q=Map)) +- How similar are the types? (using a tree diff algorithm, as for example [`('a -> 'b -> 'a) -> 'a -> 'b list -> 'a`](https://doc.sherlocode.com/?q=(%27a%20-%3E%20%27b%20-%3E%20%27a)%20-%3E%20%27a%20-%3E%20%27b%20list%20-%3E%20%27a) and [`('a -> 'b -> 'b) -> 'a list -> 'b -> 'b`](https://doc.sherlocode.com/?q=(%27a%20-%3E%20%27b%20-%3E%20%27b)%20-%3E%20%27a%20list%20-%3E%20%27b%20-%3E%20%27b) are isomorphic yet point to `fold_left` and `fold_right` respectively) -```bash -sherlodoc index --format=ancient -o db.ancient $(find /path/to/doc -name "*.odocl") -``` - -Then you can run the website : +For fuzzy type search, sherlodoc aims to provide good results without requiring a precise search query, on the basis that the user doesn't know the exact type of the things they are looking for (e.g. [`string -> file_descr`](https://doc.sherlocode.com/?q=string%20-%3E%20file_descr) is incomplete but should still point in the right direction). In particular when exploring a package documentation, the common question "how do I produce a value of type `foo`" can be answered with the query `: foo` (and "which functions consume a value of type `bar`" with `: bar -> _`). This should also work when the type can only be produced indirectly through a callback (for example [`: Eio.Switch.t`](https://doc.sherlocode.com/?q=%3A%20Eio.Switch.t) has no direct constructor). To achieve this, sherlodoc performs a type decomposition based on the polarity of each term: A value produced by a function is said to be positive, while an argument consumed by a function is negative. This simplifies away the tree shape of types, allowing their indexation in the suffix trees. The cardinality of each value type is also indexed, to e.g. differentiate between [`list -> list`](https://doc.sherlocode.com/?q=list%20-%3E%20list) and [`list -> list -> list`](https://doc.sherlocode.com/?q=list%20-%3E%20list%20-%3E%20list). -```bash -sherlodoc serve db.ancient -``` +While the polarity search results are satisfying, sherlodoc offers very limited support for polymorphic variables, type aliases and true type isomorphisms. You should check out the extraordinary [Dowsing](https://github.com/Drup/dowsing) project for this! -The real magic for [doc.sherlocode.com](https://doc.sherlocode.com) is all the -.odocl artifacts of the package documentation generated for -[`ocaml.org/packages`](https://ocaml.org/packages), which I got my hands on -thanks to insider trading (but don't have the bandwidth to share back... sorry!) +And if you speak French, a more detailed [presentation of Sherlodoc](https://www.irill.org/videos/OUPS/2023-03/wendling.html) (and [Sherlocode](https://sherlocode.com)) was given at the [OCaml Users in PariS (OUPS)](https://oups.frama.io/) in March 2023. diff --git a/cli/dune b/cli/dune index 778a0838..2b1d3ffa 100644 --- a/cli/dune +++ b/cli/dune @@ -9,6 +9,7 @@ index query db_store + unix (select serve.ml from diff --git a/cli/search.ml b/cli/search.ml index 95629857..46a52632 100644 --- a/cli/search.ml +++ b/cli/search.ml @@ -36,31 +36,46 @@ let print_result ~print_cost ~no_rhs (elt : Db.Entry.t) = in Format.printf "%s%s %s%s%a@." cost kind typedecl_params name pp_rhs elt.rhs -let search ~print_cost ~static_sort ~limit ~db ~no_rhs ~pretty_query query = +let search ~print_cost ~static_sort ~limit ~db ~no_rhs ~pretty_query ~time query = let query = Query.{ query; packages = []; limit } in if pretty_query then print_endline (Query.pretty query) ; - match Query.search ~shards:db ~dynamic_sort:(not static_sort) query with + let t0 = Unix.gettimeofday () in + let r = Query.Blocking.search ~shards:db ~dynamic_sort:(not static_sort) query in + let t1 = Unix.gettimeofday () in + match r with | [] -> print_endline "[No results]" | _ :: _ as results -> List.iter (print_result ~print_cost ~no_rhs) results ; - flush stdout + flush stdout ; + if time then Format.printf "Search in %f@." (t1 -. t0) -let rec search_loop ~print_cost ~no_rhs ~pretty_query ~static_sort ~limit ~db = +let rec search_loop ~print_cost ~no_rhs ~pretty_query ~static_sort ~limit ~time ~db = Printf.printf "%ssearch>%s %!" "\027[0;36m" "\027[0;0m" ; match Stdlib.input_line stdin with | query -> - search ~print_cost ~static_sort ~limit ~db ~no_rhs ~pretty_query query ; - search_loop ~print_cost ~no_rhs ~pretty_query ~static_sort ~limit ~db + search ~print_cost ~static_sort ~limit ~db ~no_rhs ~pretty_query ~time query ; + search_loop ~print_cost ~no_rhs ~pretty_query ~static_sort ~limit ~time ~db | exception End_of_file -> Printf.printf "\n%!" -let search query print_cost no_rhs static_sort limit pretty_query db_format db_filename = +let search + query + print_cost + no_rhs + static_sort + limit + pretty_query + time + db_format + db_filename + = let module Storage = (val Db_store.storage_module db_format) in let db = Storage.load db_filename in match query with | None -> print_endline header ; - search_loop ~print_cost ~no_rhs ~pretty_query ~static_sort ~limit ~db - | Some query -> search ~print_cost ~no_rhs ~pretty_query ~static_sort ~limit ~db query + search_loop ~print_cost ~no_rhs ~pretty_query ~static_sort ~limit ~time ~db + | Some query -> + search ~print_cost ~no_rhs ~pretty_query ~static_sort ~limit ~time ~db query open Cmdliner @@ -76,6 +91,10 @@ let print_cost = let doc = "For debugging purposes: prints the cost of each result" in Arg.(value & flag & info [ "print-cost" ] ~doc) +let print_time = + let doc = "For debugging purposes: prints the search time" in + Arg.(value & flag & info [ "print-time" ] ~doc) + let static_sort = let doc = "Sort the results without looking at the query.\n\ @@ -93,4 +112,12 @@ let pretty_query = Arg.(value & flag & info [ "pretty-query" ] ~doc) let term = - Term.(const search $ query $ print_cost $ no_rhs $ static_sort $ limit $ pretty_query) + Term.( + const search + $ query + $ print_cost + $ no_rhs + $ static_sort + $ limit + $ pretty_query + $ print_time) diff --git a/db/entry.ml b/db/entry.ml index 33d44ac5..17c9c1c9 100644 --- a/db/entry.ml +++ b/db/entry.ml @@ -1,5 +1,8 @@ let empty_string = String.make 0 '_' -let non_empty_string s = if s = "" then empty_string else s + +let non_empty_string s = + (* to protect against `ancient` segfaulting on statically allocated values *) + if s = "" then empty_string else s module Kind = struct type t = @@ -38,91 +41,101 @@ module Package = struct { name = non_empty_string name; version = non_empty_string version } let compare a b = String.compare a.name b.name - let link { name; version } = Printf.sprintf "https://ocaml.org/p/%s/%s" name version + let link { name; version } = "https://ocaml.org/p/" ^ name ^ "/" ^ version end -module T = struct - type t = - { name : string - ; rhs : string option - ; url : string - ; kind : Kind.t - ; cost : int - ; doc_html : string - ; pkg : Package.t - } +type t = + { name : string + ; rhs : string option + ; url : string + ; kind : Kind.t + ; cost : int + ; doc_html : string + ; pkg : Package.t + } - let string_compare_shorter a b = - match Int.compare (String.length a) (String.length b) with - | 0 -> String.compare a b - | c -> c +let string_compare_shorter a b = + match Int.compare (String.length a) (String.length b) with + | 0 -> String.compare a b + | c -> c - let structural_compare a b = - match string_compare_shorter a.name b.name with +let structural_compare a b = + match string_compare_shorter a.name b.name with + | 0 -> begin + match Package.compare a.pkg b.pkg with | 0 -> begin - match Package.compare a.pkg b.pkg with + match Stdlib.compare a.kind b.kind with | 0 -> begin - match Stdlib.compare a.kind b.kind with - | 0 -> begin - match string_compare_shorter a.doc_html b.doc_html with - | 0 -> String.compare a.url b.url - | c -> c - end + match string_compare_shorter a.doc_html b.doc_html with + | 0 -> String.compare a.url b.url | c -> c end | c -> c end | c -> c - - let compare a b = - if a == b - then 0 - else begin - match Int.compare a.cost b.cost with - | 0 -> structural_compare a b - | cmp -> cmp - end - - let equal a b = compare a b = 0 -end - -include T -module Set = Set.Make (T) - -(** Array of elts. For use in functors that require a type [t] and not ['a t].*) -module Array = struct - type elt = t - type t = elt array option - - let is_empty = function - | None -> true - | Some arr -> - assert (Array.length arr > 0) ; - false - - let empty = None - - let minimum = function - | None -> None - | Some arr -> Some arr.(0) - - let of_list arr = - let arr = Array.of_list arr in - Array.sort compare arr ; - if Array.length arr = 0 then empty else Some arr - - let equal_elt = T.equal - let compare_elt = T.compare -end + end + | c -> c + +let compare a b = + if a == b + then 0 + else begin + match Int.compare a.cost b.cost with + | 0 -> structural_compare a b + | cmp -> cmp + end + +let equal a b = compare a b = 0 + +let stdlib_link ~name t = + let path, hashref = + match List.rev name, String.index_opt t.url '#' with + | _ :: path, Some idx -> + let idx = idx + 1 in + let tgt = + match String.index_from_opt t.url idx '-' with + | None -> String.sub t.url idx (String.length t.url - idx) + | Some jdx -> + let kind = String.sub t.url idx (jdx - idx) in + let jdx = jdx + 1 in + let target = String.sub t.url jdx (String.length t.url - jdx) in + String.uppercase_ascii kind ^ target + in + path, "#" ^ tgt + | path, _ -> path, "" + in + let path = String.concat "." (List.rev path) in + "https://v2.ocaml.org/releases/5.1/api/" ^ path ^ ".html" ^ hashref let link t = - let pkg_link = Package.link t.pkg in - let name, path = - match List.rev (String.split_on_char '.' t.name) with - | name :: path -> name, String.concat "/" (List.rev path) - | _ -> "", "" - in - pkg_link ^ "/doc/" ^ path ^ "/index.html#val-" ^ name + let fullname = String.split_on_char '.' t.name in + match fullname with + | "Stdlib" :: name -> stdlib_link ~name t + | _ -> + let pkg_link = Package.link t.pkg in + let rec align n ys = + match ys with + | _ when n = 0 -> [] + | [] -> [] + | y :: ys -> y :: align (n - 1) ys + in + let length = List.length fullname in + let length = + match String.index_opt t.url '#' with + | None -> length + 1 + | Some idx -> + let tgt = String.sub t.url idx (String.length t.url - idx) in + let count = ref 0 in + String.iter + (function + | '.' -> incr count + | _ -> ()) + tgt ; + length - !count + in + let path = align length (List.rev (String.split_on_char '/' t.url)) in + let path = String.concat "/" (List.rev path) in + pkg_link ^ "/doc/" ^ path let v ~name ~kind ~cost ~rhs ~doc_html ~url ~pkg () = { name = non_empty_string name diff --git a/db/entry.mli b/db/entry.mli index 53856413..4cc1904e 100644 --- a/db/entry.mli +++ b/db/entry.mli @@ -52,16 +52,3 @@ val v val link : t -> string val compare : t -> t -> int val equal : t -> t -> bool - -module Set : Set.S with type elt = t - -module Array : sig - type elt = t - type t = elt array option - - val of_list : elt list -> t - val is_empty : t -> bool - val minimum : t -> elt option - val equal_elt : elt -> elt -> bool - val compare_elt : elt -> elt -> int -end diff --git a/db/string_automata.ml b/db/string_automata.ml index a0047a20..5866aed5 100644 --- a/db/string_automata.ml +++ b/db/string_automata.ml @@ -1,7 +1,13 @@ +type terminals = + | Empty + | Terminals of Entry.t array + | Summary of Entry.t array + type node = { start : int ; len : int - ; terminals : Entry.Array.t + ; size : int + ; terminals : terminals ; children : node array option } @@ -10,7 +16,12 @@ type t = ; t : node } -let empty = { str = ""; t = { start = 0; len = 0; terminals = None; children = None } } +let size t = t.t.size + +let minimum { t; _ } = + match t.terminals with + | Empty -> assert false + | Terminals arr | Summary arr -> arr.(0) let array_find ~str chr arr = let rec go i = @@ -32,9 +43,10 @@ let lcp i_str i j_str j j_len = let rec go_lcp i j = if i >= String.length i_str || j >= j_stop then i - else ( + else begin let i_chr, j_chr = i_str.[i], j_str.[j] in - if i_chr <> j_chr then i else go_lcp (i + 1) (j + 1)) + if i_chr <> j_chr then i else go_lcp (i + 1) (j + 1) + end in let i' = go_lcp i j in i' - i @@ -69,77 +81,54 @@ let stepback node = assert (node.len >= 0) ; { node with start = node.start - 1; len = node.len + 1 } -let rec find_skip ~spaces t pattern = +let rec find_skip ~spaces t pattern yield = let skip () = let node = t.t in if node.len >= 1 then begin let spaces = spaces + if t.str.[node.start] = ' ' then 1 else 0 in - if spaces > 1 then [] else find_skip ~spaces { t with t = advance t.t } pattern + if spaces > 1 + then () + else find_skip ~spaces { t with t = advance t.t } pattern yield end else begin match node.children with - | None -> [] + | None -> () | Some children -> - snd - @@ List.fold_left - (fun (i, acc) child -> - let xs = find_skip ~spaces { t with t = stepback child } pattern in - i + 1, List.rev_append xs acc) - (0, []) - @@ Array.to_list children + Array.iter + (fun child -> find_skip ~spaces { t with t = stepback child } pattern yield) + children end in if spaces = 0 then skip () + else if spaces = 1 && pattern = Type_polarity.poly + then begin + match find t pattern with + | None -> () + | Some here -> yield here + end else begin - let skip = skip () in + skip () ; match find t pattern with - | Some here -> here :: skip - | None -> skip + | None -> () + | Some here -> yield here end -let find_star t pattern = +let find_star t pattern yield = let rec go t = function - | [] -> [ t ] - | p :: ps -> begin - let ts = find_skip ~spaces:0 t p in - List.fold_left - (fun acc t -> - let xs = go t ps in - List.rev_append xs acc) - [] - ts - end + | [] -> yield t + | p :: ps -> find_skip ~spaces:0 t p @@ fun t -> go t ps in match String.split_on_char ' ' pattern with - | [] -> [] + | [] -> () | p :: ps -> begin match find t p with - | None -> [] + | None -> () | Some t -> go t ps end -let min_opt a b = - match a, b with - | Some x, Some y -> Some (if Entry.compare x y <= 0 then x else y) - | Some x, None | None, Some x -> Some x - | None, None -> None - -let rec minimum t = - let min_terminal = - match t.terminals with - | None -> None - | Some arr -> Some arr.(0) - in - let min_child = - match t.children with - | None -> None - | Some children -> minimum children.(0) - in - min_opt min_terminal min_child - -let minimum { t; _ } = - match minimum t with - | None -> assert false - | Some elt -> elt +let find_star t pattern = + let found = ref [] in + find_star t pattern (fun t -> found := t :: !found) ; + !found diff --git a/db/string_automata.mli b/db/string_automata.mli index 4b4ad60a..7e3bce78 100644 --- a/db/string_automata.mli +++ b/db/string_automata.mli @@ -1,10 +1,16 @@ (* A string automata, constructed from a suffix tree and optimized for fast queries and small serialization. *) +type terminals = + | Empty + | Terminals of Entry.t array + | Summary of Entry.t array + type node = { start : int ; len : int - ; terminals : Entry.Array.t + ; size : int + ; terminals : terminals ; children : node array option } @@ -13,7 +19,7 @@ type t = ; t : node } -val empty : t val find : t -> string -> t option val find_star : t -> string -> t list val minimum : t -> Entry.t +val size : t -> int diff --git a/db/type_polarity.ml b/db/type_polarity.ml index 61067bcf..47bcec1c 100644 --- a/db/type_polarity.ml +++ b/db/type_polarity.ml @@ -18,9 +18,11 @@ let rev_concat lst = List.fold_left (fun acc xs -> List.rev_append xs acc) [] ls type t = string * int * Sign.t +let poly = "@" + let rec of_typ ~any_is_poly ~prefix ~sgn = function - | Poly _ -> [ sgn, "POLY" :: prefix ] - | Any -> if any_is_poly then [ sgn, "POLY" :: prefix ] else [ sgn, prefix ] + | Poly _ -> [ sgn, poly :: prefix ] + | Any -> if any_is_poly then [ sgn, poly :: prefix ] else [ sgn, prefix ] | Arrow (a, b) -> List.rev_append (of_typ ~any_is_poly ~prefix ~sgn:(Sign.not sgn) a) diff --git a/db/type_polarity.mli b/db/type_polarity.mli index f199d8b5..0bbcac31 100644 --- a/db/type_polarity.mli +++ b/db/type_polarity.mli @@ -76,5 +76,6 @@ val of_typ : any_is_poly:bool -> Typexpr.t -> t Seq.t corresponding to [typ]. - If [any_is_poly] is true, the type [_] will be treated like a type variable - ['a], otherwise it will be represented solely by its sign ("+" or "-"). - *) + ['a], otherwise it will be represented solely by its sign ("+" or "-"). *) + +val poly : string diff --git a/dune-project b/dune-project index ddae2e11..fd2418af 100644 --- a/dune-project +++ b/dune-project @@ -10,7 +10,7 @@ (source (github art-w/sherlodoc)) -(authors "Arthur Wendling") +(authors "Arthur Wendling" "Emile Trotignon") (maintainers "art.wendling@gmail.com") @@ -18,23 +18,24 @@ (package (name sherlodoc) - (synopsis "Fuzzy search in OCaml documentation") + (synopsis "Search engine for OCaml documentation") (depends (ocaml (>= 4.0.8)) - (cmdliner (>= 1.2.0)) + (odoc (>= 2.4.0)) + (base64 (>= 3.5.1)) (bigstringaf (>= 0.9.1)) + (js_of_ocaml (>= 5.6.0)) + (brr (>= 0.0.6)) + (cmdliner (>= 1.2.0)) (decompress (>= 1.5.3)) - (base64 (>= 3.5.1)) (fpath (>= 0.7.3)) (lwt (>= 5.7.0)) (menhir (>= 20230608)) - (odoc (>= 2.4.0)) - (tyxml (>= 4.6.0)) - (brr (>= 0.0.6)) (ppx_blob (>= 0.7.2)) - (alcotest :with-test) + (tyxml (>= 4.6.0)) (odig :with-test) - (base (and :with-test (= v0.16.3)))) + (base (and :with-test (= v0.16.3))) + (alcotest :with-test)) (depopts (dream (>= 1.0.0~alpha5)) (ancient (>= 0.9.1)))) diff --git a/dune-workspace b/dune-workspace new file mode 100644 index 00000000..7d2408da --- /dev/null +++ b/dune-workspace @@ -0,0 +1,3 @@ +(lang dune 3.5) + +(profile release) diff --git a/index/db_writer.ml b/index/db_writer.ml index e69c9dcb..e847421e 100644 --- a/index/db_writer.ml +++ b/index/db_writer.ml @@ -1,30 +1,48 @@ open Db -type t = - { writer_names : Suffix_tree.t +type s = + { mutable load : int + ; writer_names : Suffix_tree.t ; buffer_types : Suffix_tree.Buf.t ; mutable writer_pos_types : Suffix_tree.t Occurences.t ; mutable writer_neg_types : Suffix_tree.t Occurences.t ; type_cache : Type_cache.t } -let make () = +type t = s ref + +let load t = !t.load + +let make_empty () = let buffer_names = Suffix_tree.Buf.make () in let buffer_types = Suffix_tree.Buf.make () in - { writer_names = Suffix_tree.make buffer_names + { load = 0 + ; writer_names = Suffix_tree.make buffer_names ; buffer_types ; writer_pos_types = Occurences.empty ; writer_neg_types = Occurences.empty ; type_cache = Type_cache.make () } -let export db = - { Storage.db_names = Suffix_tree.export db.writer_names - ; db_pos_types = Occurences.map Suffix_tree.export db.writer_pos_types - ; db_neg_types = Occurences.map Suffix_tree.export db.writer_neg_types - } +let make () = ref (make_empty ()) + +let export ~summarize db = + let shard = + let db = !db in + let db_names = Suffix_tree.export ~summarize db.writer_names in + let db_pos_types = + Occurences.map (Suffix_tree.export ~summarize) db.writer_pos_types + in + let db_neg_types = + Occurences.map (Suffix_tree.export ~summarize) db.writer_neg_types + in + { Storage.db_names; db_pos_types; db_neg_types } + in + db := make_empty () ; + shard let store db name elt ~count ~polarity = + db.load <- db.load + 1 ; let st = match polarity with | Type_polarity.Sign.Pos -> begin @@ -45,7 +63,14 @@ let store db name elt ~count ~polarity = Suffix_tree.add_suffixes st name elt let store_type_polarities db elt polarities = + let db = !db in Seq.iter (fun (path, count, polarity) -> store db ~count ~polarity path elt) polarities -let store_word db word elt = Suffix_tree.add_suffixes db.writer_names word elt -let type_of_odoc ~db ty = Type_cache.of_odoc ~cache:db.type_cache ty +let store_word db word elt = + let db = !db in + db.load <- db.load + 1 ; + Suffix_tree.add_suffixes db.writer_names word elt + +let type_of_odoc ~db ty = + let db = !db in + Type_cache.of_odoc ~cache:db.type_cache ty diff --git a/index/db_writer.mli b/index/db_writer.mli index f3a221ac..746626a5 100644 --- a/index/db_writer.mli +++ b/index/db_writer.mli @@ -2,11 +2,12 @@ type t (** The type that builds a database. You can use it to add things to it, but you cannot make queries on it. *) -val export : t -> Db.t +val export : summarize:bool -> t -> Db.t val make : unit -> t (** [make ()] returns an empty search database. *) +val load : t -> int val type_of_odoc : db:t -> Odoc_model.Lang.TypeExpr.t -> Db.Typexpr.t val store_type_polarities : t -> Db.Entry.t -> Db.Type_polarity.t Seq.t -> unit val store_word : t -> string -> Db.Entry.t -> unit diff --git a/index/index.ml b/index/index.ml index ecb3cfd7..0da9a9c9 100644 --- a/index/index.ml +++ b/index/index.ml @@ -18,13 +18,12 @@ let index_file register filename = let main files file_list index_docstring index_name type_search db_format db_filename = let module Storage = (val Db_store.storage_module db_format) in let db = Db_writer.make () in - let pkg = Db.Entry.Package.v ~name:"" ~version:"" in - let register id () item = + let no_pkg = Db.Entry.Package.v ~name:"" ~version:"" in + let register ~pkg id () item = List.iter (Load_doc.register_entry ~db ~index_docstring ~index_name ~type_search ~pkg) (Odoc_search.Entry.entries_of_item id item) in - let h = Storage.open_out db_filename in let files = match file_list with | None -> files @@ -39,9 +38,24 @@ let main files file_list index_docstring index_name type_search db_format db_fil close_in h ; files @ other_files in - List.iter (index_file register) files ; - let t = Db_writer.export db in - Storage.save ~db:h t ; + let h = Storage.open_out db_filename in + let flush () = + let t = Db_writer.export ~summarize:(db_format = `ancient) db in + Storage.save ~db:h t + in + List.iter + (fun odoc -> + let pkg, odoc = + match String.split_on_char '\t' odoc with + | [ filename ] -> no_pkg, filename + | [ name; filename ] -> Db.Entry.Package.v ~name ~version:"", filename + | [ name; version; filename ] -> Db.Entry.Package.v ~name ~version, filename + | _ -> failwith ("invalid line: " ^ odoc) + in + index_file (register ~pkg) odoc ; + if db_format = `ancient && Db_writer.load db > 1_000_000 then flush ()) + files ; + flush () ; Storage.close_out h open Cmdliner @@ -67,7 +81,7 @@ let file_list = let odoc_files = let doc = "Path to a .odocl file" in - Arg.(non_empty & (pos_all file [] @@ info ~doc ~docv:"ODOCL_FILE" [])) + Arg.(value & (pos_all file [] @@ info ~doc ~docv:"ODOCL_FILE" [])) let term = Term.(const main $ odoc_files $ file_list $ index_docstring $ index_name $ type_search) diff --git a/index/load_doc.ml b/index/load_doc.ml index f00da850..5cbb9748 100644 --- a/index/load_doc.ml +++ b/index/load_doc.ml @@ -18,19 +18,30 @@ let path_length str = let kind_cost = function | Entry.Kind.Constructor _ | Entry.Kind.Exception _ | Entry.Kind.Extension_constructor _ - | Entry.Kind.Field _ | Entry.Kind.Type_decl _ | Entry.Kind.Type_extension - | Entry.Kind.Val _ -> + | Entry.Kind.Field _ | Entry.Kind.Module | Entry.Kind.Type_decl _ + | Entry.Kind.Type_extension | Entry.Kind.Val _ -> 0 | _ -> 50 +let rhs_cost = function + | Some str -> String.length str + | None -> 20 + +let cost_doc = function + | Entry.Kind.Constructor _ | Entry.Kind.Exception _ | Entry.Kind.Extension_constructor _ + | Entry.Kind.Field _ | Entry.Kind.Module | Entry.Kind.Module_type + | Entry.Kind.Type_decl _ | Entry.Kind.Type_extension -> + 0 + | _ -> 100 + let cost ~name ~kind ~doc_html ~rhs ~cat = String.length name + (5 * path_length name) - + (if string_starts_with ~prefix:"Stdlib." name then 0 else 20) - + String.length (Option.value ~default:"" rhs) + + (if string_starts_with ~prefix:"Stdlib." name then 0 else 50) + + rhs_cost rhs + kind_cost kind + (if cat = `definition then 0 else 100) - + if doc_html <> "" then 0 else 100 + + if doc_html <> "" then 0 else cost_doc kind let string_of_html = Format.asprintf "%a" (Tyxml.Html.pp_elt ()) @@ -125,13 +136,15 @@ let register_kind ~db elt = | None -> () | Some typ -> register_type_expr ~db elt typ -let rec categorize (id : Odoc_model.Paths.Identifier.Any.t) = +let rec categorize id = let open Odoc_model.Paths in - match id.iv with + match id.Identifier.iv with | `CoreType _ | `CoreException _ | `Root _ | `Page _ | `LeafPage _ -> `definition | `ModuleType _ -> `declaration | `Parameter _ -> `ignore (* redundant with indexed signature *) - | #Identifier.NonSrc.t_pv as x -> + | ( `InstanceVariable _ | `Method _ | `Field _ | `Result _ | `Label _ | `Type _ + | `Exception _ | `Class _ | `ClassType _ | `Value _ | `Constructor _ | `Extension _ + | `ExtensionDecl _ | `Module _ ) as x -> let parent = Identifier.label_parent { id with iv = x } in categorize (parent :> Identifier.Any.t) | `AssetFile _ | `SourceDir _ | `SourceLocationMod _ | `SourceLocation _ | `SourcePage _ diff --git a/index/suffix_tree.ml b/index/suffix_tree.ml index b6b12356..34f455a4 100644 --- a/index/suffix_tree.ml +++ b/index/suffix_tree.ml @@ -63,6 +63,17 @@ end module Entry = Db.Entry +module Uid = struct + type t = int + + let gen = ref 0 + + let make () = + let u = !gen in + gen := u + 1 ; + u +end + module Terminals = struct type t = Entry.t list @@ -79,12 +90,23 @@ module Terminals = struct let rec equal xs ys = match xs, ys with | [], [] -> true - | x :: xs, y :: ys when Entry.equal x y -> equal xs ys + | x :: xs, y :: ys when x == y -> equal xs ys | _ -> false + let equal xs ys = xs == ys || equal xs ys + let mem x = function | y :: _ -> Entry.equal x y | _ -> false + + let minimum = function + | [] -> None + | x :: xs -> + Some + (List.fold_left + (fun found elt -> if Entry.compare found elt <= 0 then found else elt) + x + xs) end module Char_map = Map.Make (Char) @@ -303,64 +325,110 @@ let add_document trie doc = let add_suffixes t text elt = add_document t { Doc.text; uid = elt } -module Uid = struct - let gen = ref 0 - - let make () = - let u = !gen in - gen := u + 1 ; - u -end - module Terminals_cache = Hashtbl.Make (Terminals) +module Seen = Set.Make (Db.Entry) -let export_terminals ~cache_term ts = +let export_terminals ~cache_term ~is_summary ts = try Terminals_cache.find cache_term ts with | Not_found -> - let result = Uid.make (), Entry.Array.of_list ts in + let terminals = + if ts = [] + then Db.String_automata.Empty + else if is_summary + then Db.String_automata.Summary (Array.of_list ts) + else Db.String_automata.Terminals (Array.of_list ts) + in + let result = Uid.make (), terminals in Terminals_cache.add cache_term ts result ; result -let rec export ~cache ~cache_term node = - let terminals_uid, terminals = export_terminals ~cache_term node.terminals in +type result = + { uid : Uid.t + ; t : Db.String_automata.node + ; min : Entry.t + ; seen : Seen.t + } + +let size_of_terminals = function + | Db.String_automata.Empty -> 1 + | Summary arr | Terminals arr -> Array.length arr + +let rec export ~cache ~cache_term ~summarize ~is_root node = + let is_summary = summarize && not is_root in let children = - Char_map.bindings @@ Char_map.map (export ~cache ~cache_term) node.children + Char_map.bindings + @@ Char_map.map (export ~cache ~cache_term ~summarize ~is_root:false) node.children in let children = List.sort - (fun (a_chr, (_, _, a)) (b_chr, (_, _, b)) -> + (fun (a_chr, { min = a; _ }) (b_chr, { min = b; _ }) -> match Entry.compare a b with | 0 -> Char.compare a_chr b_chr | c -> c) children in - let min_terminal = Entry.Array.minimum terminals in + let children_seen = + List.fold_left (fun acc (_, child) -> Seen.union acc child.seen) Seen.empty children + in + let seen = List.fold_left (fun acc e -> Seen.add e acc) children_seen node.terminals in + let children_uids = List.map (fun (chr, { uid; _ }) -> chr, uid) children in + let terminals = + if is_summary + then List.of_seq (Seen.to_seq seen) + else List.filter (fun e -> not (Seen.mem e children_seen)) node.terminals + in let min_child = - match min_terminal, children with - | Some a, (_, (_, _, b)) :: _ -> if Entry.compare a b <= 0 then a else b - | Some a, [] -> a - | None, (_, (_, _, b)) :: _ -> b - | None, [] -> assert false + match children with + | [] -> None + | (_, { min = elt; _ }) :: _ -> Some elt + in + let min_terminal = Terminals.minimum terminals in + let min_child, terminals = + match min_child, min_terminal with + | None, None -> failwith "suffix_tree: empty node" + | None, Some min_terminal -> min_terminal, terminals + | Some min_child, None -> min_child, min_child :: terminals + | Some min_child, Some min_terminal -> + if Db.Entry.compare min_child min_terminal < 0 + then min_child, min_child :: terminals + else min_terminal, terminals in - let children_uids = List.map (fun (chr, (uid, _, _)) -> chr, uid) children in + assert (terminals <> []) ; + let terminals_uid, terminals = export_terminals ~cache_term ~is_summary terminals in let key = node.start, node.len, terminals_uid, children_uids in try Hashtbl.find cache key with | Not_found -> - let children = Array.of_list @@ List.map (fun (_, (_, child, _)) -> child) children in + let children = + Array.of_list @@ List.map (fun (_, { t = child; _ }) -> child) children + in + let size = size_of_terminals terminals in + let size = + if is_summary + then size + else + Array.fold_left + (fun acc child -> acc + child.Db.String_automata.size) + size + children + in let children = if Array.length children = 0 then None else Some children in let node = - { Db.String_automata.start = node.start; len = node.len; terminals; children } + { Db.String_automata.start = node.start; len = node.len; size; terminals; children } in - let result = Uid.make (), node, min_child in + let result = { uid = Uid.make (); t = node; min = min_child; seen } in Hashtbl.add cache key result ; result -let export { buffer; root = t } = - if Char_map.is_empty t.children - then Db.String_automata.empty - else ( - let str = Buf.contents buffer in +let export ~summarize { buffer; root = t } = + let str = Buf.contents buffer in + if String.length str = 0 + then + { Db.String_automata.str + ; t = { start = 0; len = 0; size = 0; children = None; terminals = Empty } + } + else begin let cache = Hashtbl.create 16 in let cache_term = Terminals_cache.create 16 in - let _, t, _ = export ~cache ~cache_term t in - { Db.String_automata.str; t }) + let { t; _ } = export ~cache ~cache_term ~summarize ~is_root:true t in + { Db.String_automata.str; t } + end diff --git a/index/suffix_tree.mli b/index/suffix_tree.mli index 986f145d..0ff6a5a2 100644 --- a/index/suffix_tree.mli +++ b/index/suffix_tree.mli @@ -8,4 +8,4 @@ type t val make : Buf.t -> t val add_suffixes : t -> string -> Db.Entry.t -> unit -val export : t -> Db.String_automata.t +val export : summarize:bool -> t -> Db.String_automata.t diff --git a/jsoo/dune b/jsoo/dune index b9ac614a..eb4be501 100644 --- a/jsoo/dune +++ b/jsoo/dune @@ -7,11 +7,3 @@ (alias all) (action (copy main.bc.js sherlodoc.js))) - -(install - (files sherlodoc.js) - ; (section share) - ; This ought to be in share, but for now I can only make it work in bin : I did - ; not manage to fetch sherlodoc.js from share in the dune rules. - (section bin) - (package sherlodoc)) diff --git a/jsoo/main.ml b/jsoo/main.ml index ce037300..d14fb229 100644 --- a/jsoo/main.ml +++ b/jsoo/main.ml @@ -29,8 +29,6 @@ let stream_of_string str = in stream -let don't_wait_for fut = Fut.await fut Fun.id - module Decompress_browser = struct (** This module contains binding to the browser string compression api. It is much faster than using an OCaml library, and does not require sending code @@ -99,7 +97,9 @@ let string_of_kind = let search message db = let query = Jv.get message "data" in let query = query |> Jv.to_jstr |> Jstr.to_string in - let results = Query.(search ~shards:db { query; packages = []; limit = 50 }) in + let results = + Query.Blocking.search ~shards:db { Query.query; packages = []; limit = 50 } + in let _ = Jv.(apply (get global "postMessage")) [| Jv.of_list @@ -112,10 +112,12 @@ let search message db = let prefix_name, name = match kind with | Db.Entry.Kind.Doc -> None, None - | _ -> - let rev_name = name |> String.split_on_char '.' |> List.rev in - ( rev_name |> List.tl |> List.rev |> String.concat "." |> Option.some - , rev_name |> List.hd |> Option.some ) + | _ -> begin + match List.rev (String.split_on_char '.' name) with + | [] -> None, None + | [ hd ] -> None, Some hd + | hd :: tl -> Some (String.concat "." (List.rev tl)), Some hd + end in let kind = string_of_kind kind in let html = @@ -134,6 +136,8 @@ let search message db = in () +let don't_wait_for fut = Fut.await fut Fun.id + let search message = don't_wait_for @@ diff --git a/query/dune b/query/dune index eec425d7..c9f0de2a 100644 --- a/query/dune +++ b/query/dune @@ -1,6 +1,6 @@ (library (name query) - (libraries lwt db)) + (libraries db)) (menhir (modules type_parser) diff --git a/query/dynamic_cost.ml b/query/dynamic_cost.ml index 9c19d3e8..e3fac28a 100644 --- a/query/dynamic_cost.ml +++ b/query/dynamic_cost.ml @@ -2,13 +2,13 @@ module Entry = Db.Entry type query = { name : string list - ; type_paths : Type_distance.Type_path.t option + ; type_paths : Type_distance.t option } let of_query { Query_parser.name; typ } = let type_paths = match typ with - | `typ t -> Some (Type_distance.Type_path.of_typ ~ignore_any:true t) + | `typ t -> Some (Type_distance.paths_of_type t) | _ -> None in { name; type_paths } @@ -27,4 +27,4 @@ let score query entry = | Some cost -> cost | None -> 0 in - 10 * (name_matches + type_cost) + 5 * (name_matches + type_cost) diff --git a/query/io.ml b/query/io.ml new file mode 100644 index 00000000..a7bc5330 --- /dev/null +++ b/query/io.ml @@ -0,0 +1,40 @@ +module type S = sig + (* avoids a dependency on lwt for sherlodoc.js *) + + type 'a t + + val return : 'a -> 'a t + val map : 'a t -> ('a -> 'b) -> 'b t + val bind : 'a t -> ('a -> 'b t) -> 'b t +end + +module Seq (Io : S) = struct + type 'a t = unit -> 'a node Io.t + + and 'a node = + | Nil + | Cons of 'a * 'a t + + let rec of_seq s () = + match s () with + | Seq.Nil -> Io.return Nil + | Cons (x, xs) -> Io.return (Cons (x, of_seq xs)) + + let rec take n xs () = + if n = 0 + then Io.return Nil + else begin + Io.map (xs ()) + @@ function + | Nil -> Nil + | Cons (x, xs) -> Cons (x, take (n - 1) xs) + end + + let rec to_list acc s = + Io.bind (s ()) + @@ function + | Nil -> Io.return (List.rev acc) + | Cons (x, xs) -> to_list (x :: acc) xs + + let to_list s = to_list [] s +end diff --git a/query/name_cost.ml b/query/name_cost.ml index 650a1240..3062473d 100644 --- a/query/name_cost.ml +++ b/query/name_cost.ml @@ -4,7 +4,7 @@ let rec prefix_at ~case ~sub i s j = else if sub.[i] = s.[j] then prefix_at ~case ~sub (i + 1) s (j + 1) else if sub.[i] = Char.lowercase_ascii s.[j] - then prefix_at ~case:(case + 5) ~sub (i + 1) s (j + 1) + then prefix_at ~case:(case + 3) ~sub (i + 1) s (j + 1) else if Char.lowercase_ascii sub.[i] = s.[j] then prefix_at ~case:(case + 10) ~sub (i + 1) s (j + 1) else None @@ -41,7 +41,7 @@ let best_match ?(after = 0) ~sub str = List.fold_left (fun acc (i, case_cost) -> let left = word_boundary str (i - 1) in - let right = word_boundary str (i + String.length sub) in + let right = word_boundary str (i + String.length sub) / 3 in let is_after = if i >= after then 0 else 10 in let cost = case_cost + left + right + is_after in match acc with diff --git a/query/priority_queue.ml b/query/priority_queue.ml index 8579199e..6c38416a 100644 --- a/query/priority_queue.ml +++ b/query/priority_queue.ml @@ -9,34 +9,50 @@ type t = | All of elt * String_automata.t | Union of elt * t list +let rec size = function + | Empty -> 0 + | Array (i, arr) -> Array.length arr - i + | All (_, s) -> String_automata.size s + | Union (_, xs) -> List.fold_left (fun acc x -> acc + size x) 0 xs + let minimum = function | Empty -> None | Array (i, arr) -> Some arr.(i) | All (elt, _) | Union (elt, _) -> Some elt -let of_sorted_array = function - | None -> Empty - | Some arr -> Array (0, arr) +let of_sorted_array arr = Array (0, arr) let of_automata s = let elt = String_automata.minimum s in All (elt, s) +let of_list lst = + let lst = List.filter (( <> ) Empty) lst in + let min x = + match minimum x with + | None -> assert false + | Some elt -> elt + in + let compare a b = Entry.compare (min a) (min b) in + match List.sort compare lst with + | [] -> Empty + | hd :: _ as lst -> Union (min hd, lst) + let insert_sort x lst = match minimum x with | None -> lst | Some min_elt -> - let rec go lst = + let rec insert lst = match lst with | [] -> [ x ] | y :: ys -> begin match minimum y with - | None -> go ys + | None -> insert ys | Some min_y when Entry.compare min_elt min_y <= 0 -> x :: lst - | _ -> y :: go ys + | _ -> y :: insert ys end in - go lst + insert lst let union_with ~min_elt lst = match List.filter (( <> ) Empty) lst with @@ -54,6 +70,23 @@ let rec union_sorted lst = | Some min_elt -> Union (min_elt, lst) end +let expand_automata ~min_elt ({ String_automata.t; _ } as automata) = + match t.terminals with + | String_automata.Summary arr -> Array (0, arr) + | terminals -> + let terminals = + match terminals with + | String_automata.Empty -> Empty + | Terminals terminals -> Array (0, terminals) + | _ -> assert false + in + let lift child = of_automata { automata with String_automata.t = child } in + let children = + Array.to_list @@ Array.map lift @@ Option.value ~default:[||] t.children + in + let all = insert_sort terminals children in + union_with ~min_elt all + let rec pop_until cond = function | Empty -> Empty | Array (i, arr) as t -> @@ -63,7 +96,7 @@ let rec pop_until cond = function let m = (i + j) / 2 in if i = m then Array (j, arr) else if cond arr.(m) then search i m else search m j in - let rec go j step = + let rec search_from j step = if j >= Array.length arr then begin let last = Array.length arr - 1 in @@ -72,29 +105,31 @@ let rec pop_until cond = function end else if cond arr.(j) then if i = j then t else search (j - (step / 2)) j - else go (j + step) (step * 2) + else search_from (j + step) (step * 2) in - go i 1 + search_from i 1 | All (min_elt, _) as t when cond min_elt -> t - | All (min_elt, ({ String_automata.t; _ } as automata)) -> - let terminals = of_sorted_array t.terminals in - let children = - Array.to_list - @@ Array.map (fun child -> of_automata { automata with t = child }) - @@ Option.value ~default:[||] t.children - in - let all = insert_sort terminals children in - pop_until cond (union_with ~min_elt all) + | All (min_elt, automata) -> pop_until cond (expand_automata ~min_elt automata) | Union (min_elt, _) as t when cond min_elt -> t | Union (_, lst) -> - let rec go = function + let rec pop_union i = function | [] -> [] | x :: xs -> let x' = pop_until cond x in - if x == x' then x :: xs else insert_sort x' (go xs) + if x == x' + then begin + assert (i > 0) ; + x :: xs + end + else insert_sort x' (pop_union (i + 1) xs) in - let lst = go lst in + let lst = pop_union 0 lst in union_sorted lst -let pop_lt elt t = pop_until (fun x -> Entry.compare x elt >= 0) t -let pop_lte elt t = pop_until (fun x -> Entry.compare x elt > 0) t +let pop_lt elt t = + let cmp_lt x = Entry.compare x elt >= 0 in + pop_until cmp_lt t + +let pop_lte elt t = + let cmp_lte x = Entry.compare x elt > 0 in + pop_until cmp_lte t diff --git a/query/priority_queue.mli b/query/priority_queue.mli index 40134bc6..24f42d41 100644 --- a/query/priority_queue.mli +++ b/query/priority_queue.mli @@ -3,6 +3,8 @@ type t val minimum : t -> elt option val of_automata : Db.String_automata.t -> t -val of_sorted_array : elt array option -> t +val of_sorted_array : elt array -> t +val of_list : t list -> t val pop_lt : elt -> t -> t val pop_lte : elt -> t -> t +val size : t -> int diff --git a/query/query.ml b/query/query.ml index 134b6be1..e6830711 100644 --- a/query/query.ml +++ b/query/query.ml @@ -29,15 +29,14 @@ let find_types ~shard typ = | Db.Type_polarity.Sign.Pos -> shard.Db.db_pos_types | Neg -> shard.Db.db_neg_types in - Succ.union_of_list + Succ.of_automatas @@ Db.Occurences.fold (fun occurrences st acc -> if occurrences < count then acc else begin let ts = Tree.find_star st name in - let ss = List.map Succ.of_automata ts in - List.rev_append ss acc + List.rev_append ts acc end) st_occ []) @@ -75,6 +74,8 @@ type t = ; limit : int } +let pretty params = Parser.(to_string @@ of_string params.query) + let match_packages ~packages { Db.Entry.pkg; _ } = List.exists (String.equal pkg.name) packages @@ -83,26 +84,33 @@ let match_packages ~packages results = | [] -> results | _ -> Seq.filter (match_packages ~packages) results -let rec seq_take n xs () = - if n = 0 - then Seq.Nil - else begin - match xs () with - | Seq.Nil -> Seq.Nil - | Seq.Cons (x, xs) -> Seq.Cons (x, seq_take (n - 1) xs) - end - -let search ~shards ?(dynamic_sort = true) params = - let limit = params.limit in +let search ~shards params = let query = Parser.of_string params.query in let results = search ~shards query in let results = Succ.to_seq results in - let results = match_packages ~packages:params.packages results in - if dynamic_sort - then begin - let query = Dynamic_cost.of_query query in - List.of_seq @@ Top_results.of_seq ~query ~limit results - end - else List.of_seq @@ seq_take params.limit results + query, match_packages ~packages:params.packages results -let pretty params = Parser.(to_string @@ of_string params.query) +module type IO = Io.S + +module Make (Io : IO) = struct + module Tr = Top_results.Make (Io) + + let search ~shards ?(dynamic_sort = true) params = + let limit = params.limit in + let query, results = search ~shards params in + let results = Tr.Seq.of_seq results in + if dynamic_sort + then begin + let query = Dynamic_cost.of_query query in + Tr.of_seq ~query ~limit results + end + else Tr.Seq.to_list @@ Tr.Seq.take limit results +end + +module Blocking = Make (struct + type 'a t = 'a + + let return x = x + let map x f = f x + let bind x f = f x + end) diff --git a/query/query.mli b/query/query.mli index 7f944241..f7f7d78a 100644 --- a/query/query.mli +++ b/query/query.mli @@ -4,33 +4,43 @@ type t = ; limit : int } -val search : shards:Db.t list -> ?dynamic_sort:bool -> t -> Db.Entry.t list -(** [search ~shard ~dynamic_sort {query; packages; limit}] returns [(pretty_query, +val pretty : t -> string + +module type IO = Io.S + +module Make (Io : IO) : sig + val search : shards:Db.t list -> ?dynamic_sort:bool -> t -> Db.Entry.t list Io.t + (** [search ~shard ~dynamic_sort {query; packages; limit}] returns [(pretty_query, results)] where [pretty_query] is a re-printed version of [query] and - [results] is the list of results corresponding to the query and the - various parameters. + [results] is the list of results corresponding to the query and the + various parameters. - - [shards] is a list of databases. [results] is the union of the results of - each database of the list [shards]. If [shards] is a very long list, [api] - might be slow to return, but in some cases you do not have a choice. - Currently, [index] generates only one shard, but it used to generate many - to be able to handle the sheer size of the opam repository. + - [shards] is a list of databases. [results] is the union of the results of + each database of the list [shards]. If [shards] is a very long list, [api] + might be slow to return, but in some cases you do not have a choice. + Currently, [index] generates only one shard, but it used to generate many + to be able to handle the sheer size of the opam repository. - - [~dynamic_sort] changes the order of [results]. It is [true] by default, - and is only set to [false] for debugging purposes. + - [~dynamic_sort] changes the order of [results]. It is [true] by default, + and is only set to [false] for debugging purposes. - - [query] is the query string whose shape is a list of space-separated - words, followed by an optionnal [: ...] type annotation that filters the - results by type. The type annotation accepts [_] as a wildcard : [: string + - [query] is the query string whose shape is a list of space-separated + words, followed by an optionnal [: ...] type annotation that filters the + results by type. The type annotation accepts [_] as a wildcard : [: string -> _] will return entries that take a [string] as argument, but returns - anything. + anything. - - [limit] is the maximum length of [results]. Having a very large number - might be an issue. + - [limit] is the maximum length of [results]. Having a very large number + might be an issue. - - [packages] is not function, use [[]] for this argument. *) + - [packages] is not function, use [[]] for this argument. *) +end -val pretty : t -> string +module Blocking : sig + val search : shards:Db.t list -> ?dynamic_sort:bool -> t -> Db.Entry.t list +end + +(* val search_lwt : shards:Db.t list -> ?dynamic_sort:bool -> t -> Db.Entry.t list Lwt.t *) (** For testing *) module Private : sig diff --git a/query/query_parser.ml b/query/query_parser.ml index 46d6b63c..0283842d 100644 --- a/query/query_parser.ml +++ b/query/query_parser.ml @@ -15,8 +15,8 @@ let balance_parens str = let type_of_string str = let str = balance_parens str in let lexbuf = Lexing.from_string str in - try Ok (Type_parser.main Type_lexer.token lexbuf) with - | Type_parser.Error -> Error "parse error" + try `typ (Type_parser.main Type_lexer.token lexbuf) with + | _ -> `parse_error let naive_of_string str = List.filter (fun s -> String.length s > 0) (String.split_on_char ' ' str) @@ -30,11 +30,6 @@ type t = ; typ : [ `typ of Db.Typexpr.t | `no_typ | `parse_error ] } -let type_of_string str_typ = - match type_of_string str_typ with - | Ok typ -> `typ typ - | Error _ -> `parse_error - let of_string str = let query_name, typ = match String.index_opt str ':' with diff --git a/query/succ.ml b/query/succ.ml index 084e7ad8..d258e045 100644 --- a/query/succ.ml +++ b/query/succ.ml @@ -1,102 +1,134 @@ module Entry = Db.Entry -type t = +type elt = Entry.t + +type s = | Empty + | All | Pq of Priority_queue.t - | Inter of t * t - | Union of t * t + | Inter of s * s + | Union of s * s -let empty = Empty -let of_automata t = Pq (Priority_queue.of_automata t) -let of_array arr = Pq (Priority_queue.of_sorted_array (Some arr)) +type t = + { s : s + ; size : int + } + +let all = { s = All; size = 0 } +let empty = { s = Empty; size = 0 } +let make_pq t = { s = Pq t; size = Priority_queue.size t } +let of_automata t = make_pq (Priority_queue.of_automata t) +let of_automatas ts = make_pq Priority_queue.(of_list (List.map of_automata ts)) +let of_array arr = make_pq (Priority_queue.of_sorted_array arr) let inter a b = - match a, b with + match a.s, b.s with | Empty, _ | _, Empty -> empty + | _, All -> a + | All, _ -> b | x, y when x == y -> a - | x, y -> Inter (x, y) + | x, y -> + let s = if a.size <= b.size then Inter (x, y) else Inter (y, x) in + { s; size = min a.size b.size } let union a b = - match a, b with - | Empty, _ -> b + match a.s, b.s with + | All, _ | _, All -> all | _, Empty -> a + | Empty, _ -> b | x, y when x == y -> a - | x, y -> Union (x, y) + | x, y -> + let s = if a.size >= b.size then Union (x, y) else Union (y, x) in + { s; size = a.size + b.size } let rec join_with fn = function | [] -> [] | [ x ] -> [ x ] | a :: b :: xs -> fn a b :: join_with fn xs -let rec perfect fn = function - | [] -> Empty +let rec perfect ~default fn = function + | [] -> default | [ x ] -> x - | xs -> perfect fn (join_with fn xs) + | xs -> perfect ~default fn (join_with fn xs) -let inter_of_list xs = perfect inter xs -let union_of_list xs = perfect union xs -let best x y = if Entry.compare x y <= 0 then x else y +let inter_of_list xs = + let xs = List.sort (fun a b -> Int.compare a.size b.size) xs in + perfect ~default:all inter xs -let best_opt old_cand new_cand = - match old_cand, new_cand with - | None, None -> None - | None, Some z | Some z, None -> Some z - | Some x, Some y -> Some (best x y) +let union_of_list xs = + let xs = List.sort (fun a b -> Int.compare b.size a.size) xs in + perfect ~default:empty union xs type strictness = - | Gt - | Ge + | First + | Ge of elt + | Gt of elt + +type result = + | Is_empty + | Is_all + | Found_eq of s + | Found_gt of elt * s -let rec succ ~strictness t elt = +let rec succ ~strictness t = match t with - | Empty -> None, t - | Pq pqueue -> - let pqueue = + | Empty -> Is_empty + | All -> begin + match strictness with + | First -> Is_all + | Gt _ -> Is_all + | Ge _ -> Found_eq All + end + | Pq pqueue -> begin + let pqueue' = match strictness with - | Gt -> Priority_queue.pop_lte elt pqueue - | Ge -> Priority_queue.pop_lt elt pqueue + | First -> pqueue + | Ge elt -> Priority_queue.pop_lt elt pqueue + | Gt elt -> Priority_queue.pop_lte elt pqueue in - begin - match Priority_queue.minimum pqueue with - | None -> () - | Some e -> assert (Entry.compare elt e <= 0) - end ; - Priority_queue.minimum pqueue, Pq pqueue - | Union (l, r) -> begin - match succ ~strictness l elt with - | None, _ -> succ ~strictness r elt - | Some elt_l, l when strictness = Ge && Entry.equal elt elt_l -> Some elt, Union (l, r) - | elt_l, l -> - let elt_r, r = succ ~strictness r elt in - best_opt elt_l elt_r, Union (l, r) + match strictness, Priority_queue.minimum pqueue' with + | _, None -> Is_empty + | Ge elt, Some e when Db.Entry.equal e elt -> Found_eq (Pq pqueue') + | _, Some e -> Found_gt (e, Pq pqueue') end - | Inter (l, r) -> - let rec loop elt l r = - match succ ~strictness:Ge l elt with - | None, _ -> None, Empty - | Some elt', l -> - assert (Entry.compare elt elt' <= 0) ; - if Entry.equal elt elt' then Some elt, Inter (l, r) else loop elt' r l - in - begin - match succ ~strictness l elt with - | None, _ -> None, Empty - | Some elt_l, l -> loop elt_l r l + | Union (l, r) -> begin + match succ ~strictness l with + | Is_empty -> succ ~strictness r + | Is_all -> failwith "union all" + | Found_eq l -> Found_eq (Union (l, r)) + | Found_gt (elt_l, l') -> begin + match succ ~strictness r with + | Is_empty -> Found_gt (elt_l, l') + | Is_all -> failwith "union all" + | Found_eq r' -> Found_eq (Union (l', r')) + | Found_gt (elt_r, r') when Db.Entry.compare elt_l elt_r <= 0 -> + Found_gt (elt_l, Union (l', r')) + | Found_gt (elt_r, r') -> Found_gt (elt_r, Union (l', r')) end - -let rec first t = - match t with - | Empty -> None, Empty - | Pq pqueue -> Priority_queue.minimum pqueue, t + end | Inter (l, r) -> begin - match first l with - | None, _ -> None, Empty - | Some elt, l -> succ ~strictness:Ge (Inter (l, r)) elt + match succ ~strictness l with + | Is_empty -> Is_empty + | Is_all -> failwith "inter all" + | Found_eq l' -> begin + match succ ~strictness r with + | Is_empty -> Is_empty + | Is_all -> failwith "inter all" + | Found_eq r' -> Found_eq (Inter (l', r')) + | Found_gt (elt, r') -> Found_gt (elt, Inter (l', r')) + end + | Found_gt (elt, l') -> Found_gt (elt, Inter (l', r)) end - | Union (l, r) -> - let elt_l, l = first l in - let elt_r, r = first r in - best_opt elt_l elt_r, Union (l, r) + +let rec succ_loop ?(count = 0) ~strictness t = + match strictness, succ ~strictness t with + | _, Is_empty -> None + | _, Is_all -> None + | Ge elt, Found_eq t -> Some (elt, t) + | _, Found_gt (elt, t) -> succ_loop ~count:(count + 1) ~strictness:(Ge elt) t + | _ -> assert false + +let first t = succ_loop ~strictness:First t let seq_of_dispenser fn = let rec go () = @@ -106,18 +138,18 @@ let seq_of_dispenser fn = in go -let to_seq t = +let to_seq { s = t; _ } = let state = ref None in let loop () = - let elt, t = + let result = match !state with | None -> first t - | Some (previous_elt, t) -> succ ~strictness:Gt t previous_elt + | Some (previous_elt, t) -> succ_loop ~strictness:(Gt previous_elt) t in - match elt with + match result with | None -> None - | Some elt -> - state := Some (elt, t) ; + | Some (elt, _) -> + state := result ; Some elt in seq_of_dispenser loop diff --git a/query/succ.mli b/query/succ.mli index c0041cd6..cfd9df70 100644 --- a/query/succ.mli +++ b/query/succ.mli @@ -6,6 +6,7 @@ type t val to_seq : t -> Db.Entry.t Seq.t val empty : t val of_automata : Db.String_automata.t -> t +val of_automatas : Db.String_automata.t list -> t val inter : t -> t -> t val union : t -> t -> t val inter_of_list : t list -> t diff --git a/query/top_results.ml b/query/top_results.ml index deac4a96..60287b35 100644 --- a/query/top_results.ml +++ b/query/top_results.ml @@ -33,21 +33,25 @@ let add ~query ~limit elt t = end end -let max_seek = 500 +let max_seek = 10 -let of_seq ~query ~limit seq = - let rec go total_seen t seq = - if total_seen >= limit + max_seek - then t - else begin - match seq () with - | Seq.Nil -> t - | Cons (x, xs) -> begin - match add ~query ~limit x t with - | Stop t -> t - | Continue t -> go (total_seen + 1) t xs +module Make (IO : Io.S) = struct + module Seq = Io.Seq (IO) + + let of_seq ~query ~limit seq = + let rec go total_seen t seq = + if total_seen >= limit + max_seek + then IO.return t + else begin + IO.bind (seq ()) + @@ function + | Seq.Nil -> IO.return t + | Cons (x, xs) -> begin + match add ~query ~limit x t with + | Stop t -> IO.return t + | Continue t -> go (total_seen + 1) t xs + end end - end - in - let t = go 0 empty seq in - Bests.to_seq t.bests + in + IO.map (go 0 empty seq) @@ fun t -> List.of_seq @@ Bests.to_seq t.bests +end diff --git a/query/top_results.mli b/query/top_results.mli index c8c33d0e..a1533763 100644 --- a/query/top_results.mli +++ b/query/top_results.mli @@ -1 +1,9 @@ -val of_seq : query:Dynamic_cost.query -> limit:int -> Db.Entry.t Seq.t -> Db.Entry.t Seq.t +module Make (IO : Io.S) : sig + module Seq : module type of Io.Seq (IO) + + val of_seq + : query:Dynamic_cost.query + -> limit:int + -> Db.Entry.t Seq.t + -> Db.Entry.t list IO.t +end diff --git a/query/type_distance.ml b/query/type_distance.ml index a8f32164..72e414e7 100644 --- a/query/type_distance.ml +++ b/query/type_distance.ml @@ -1,125 +1,107 @@ -module Type_path : sig - (** This module contains the transformation that make it possible to compute the - distance between types.. +type step = + | Type of string + | Poly + | Any + | Arrow_left + | Arrow_right + | Product of + { pos : int + ; length : int + } + | Argument of + { pos : int + ; length : int + } - A type can viewed as a tree. [a -> b -> c * d] is the following tree : - {[ - -> - |- a - |- -> - |- b - |- * - |- c - |- d - ]} - We consider the list of paths from root to leaf in the tree of the type. +module Sign = Db.Type_polarity.Sign - Here the paths would be : [ [[-> a]; [-> -> b]; [-> -> * c ]; [-> -> * d]] ] +type t = step list list - We encode slightly more information than that. In the above, it not possible by - looking at a type path to know the child position relative to its parent : In - the path [[-> a]]; [a] is the first child of [->], and in [[-> -> b]]; [[-> b]] - is the second child of [->]. This information is not possible to recover without - the whole tree, so we add it in the list, ass a number after the arrow. +let rev_concat lst = List.fold_left (fun acc xs -> List.rev_append xs acc) [] lst - This makes the type path of the example type look like this : - - {[ - [[-> 1 a]; [-> 2 -> 1 b]; [-> 2 -> 2 * 1 c ]; [-> 2 -> 2 * 2 d]] - ]} *) - - type t = string list list - - val of_typ : ignore_any:bool -> Db.Typexpr.t -> t - (* [of_typ ~ignore_any typ] is the list of type path associated to [typ]. - If [ignore_any] is true, [Any] constructors in [typ] will be ignored, - if it is false, they will be treated like a polymorphic variable. *) -end = struct - module Sign = Db.Type_polarity.Sign - - type t = string list list - - let rev_concat lst = List.fold_left (fun acc xs -> List.rev_append xs acc) [] lst +let rec paths_of_type ~prefix t = + match t with + | Db.Typexpr.Poly _ -> [ Poly :: prefix ] + | Any -> [ Any :: prefix ] + | Arrow (a, b) -> + let prefix_left = Arrow_left :: prefix in + let prefix_right = Arrow_right :: prefix in + List.rev_append + (paths_of_type ~prefix:prefix_left a) + (paths_of_type ~prefix:prefix_right b) + | Constr (name, args) -> + let prefix = Type name :: prefix in + begin + match args with + | [] -> [ prefix ] + | _ -> + let length = List.length args in + rev_concat + @@ List.mapi + (fun i arg -> + let prefix = Argument { pos = i; length } :: prefix in + paths_of_type ~prefix arg) + args + end + | Tuple args -> + let length = List.length args in + rev_concat + @@ List.mapi (fun i arg -> + let prefix = Product { pos = i; length } :: prefix in + paths_of_type ~prefix arg) + @@ args + | Unhandled -> [] - let rec of_typ ~ignore_any ~prefix t = - match t with - | Db.Typexpr.Poly _ -> - let poly = "POLY" in - [ poly :: prefix ] - | Any -> - if ignore_any - then [ "_" :: prefix ] - else ( - let poly = "POLY" in - [ poly :: prefix ]) - | Arrow (a, b) -> - let prefix_left = "->0" :: prefix in - let prefix_right = "->1" :: prefix in - List.rev_append - (of_typ ~ignore_any ~prefix:prefix_left a) - (of_typ ~ignore_any ~prefix:prefix_right b) - | Constr (name, args) -> - let prefix = name :: prefix in - begin - match args with - | [] -> [ prefix ] - | _ -> - rev_concat - @@ List.mapi - (fun i arg -> - let prefix = string_of_int i :: prefix in - of_typ ~ignore_any ~prefix arg) - args - end - | Tuple args -> - rev_concat - @@ List.mapi (fun i arg -> - let prefix = (string_of_int i ^ "*") :: prefix in - of_typ ~ignore_any ~prefix arg) - @@ args - | Unhandled -> [] +let paths_of_type t = List.map List.rev @@ paths_of_type ~prefix:[] t - let of_typ ~ignore_any t = List.map List.rev @@ of_typ ~ignore_any ~prefix:[] t -end +(* *) -let skip_query x = 10 * String.length x -let skip_entry _ = 15 +let skip_entry _ = 10 let distance xs ys = let len_xs = List.length xs in let len_ys = List.length ys in let cache = Array.make_matrix (1 + len_xs) (1 + len_ys) (-1) in - let rec memo i j xs ys = + let inv = Db.Type_polarity.Sign.not in + let rec memo ~xsgn ~ysgn i j xs ys = let r = cache.(i).(j) in if r >= 0 then r else begin - let r = go i j xs ys in + let r = go ~xsgn ~ysgn i j xs ys in cache.(i).(j) <- r ; r end - and go i j xs ys = + and go ~xsgn ~ysgn i j xs ys = match xs, ys with | [], [] -> 0 | [], _ -> 0 - | [ "_" ], _ -> 0 - | x :: xs, y :: ys when x = y -> memo (i + 1) (j + 1) xs ys - | _, "->1" :: ys -> memo i (j + 1) xs ys - | "->1" :: xs, _ -> 1 + memo (i + 1) j xs ys - | xs, [] -> List.fold_left (fun acc x -> acc + skip_query x) 0 xs - | x :: xs', y :: ys' -> - let skip_x = skip_query x in + | [ Any ], _ when xsgn = ysgn -> 0 + | [ Poly ], [ (Any | Poly) ] when xsgn = ysgn -> 0 + | Arrow_left :: xs, Arrow_left :: ys -> + memo ~xsgn:(inv xsgn) ~ysgn:(inv ysgn) (i + 1) (j + 1) xs ys + | x :: xs, y :: ys when x = y && xsgn = ysgn -> memo ~xsgn ~ysgn (i + 1) (j + 1) xs ys + | _, Arrow_left :: ys -> 1 + memo ~xsgn ~ysgn:(inv ysgn) i (j + 1) xs ys + | Arrow_left :: xs, _ -> 1 + memo ~xsgn:(inv xsgn) ~ysgn (i + 1) j xs ys + | _, Arrow_right :: ys -> memo ~xsgn ~ysgn i (j + 1) xs ys + | Arrow_right :: xs, _ -> memo ~xsgn ~ysgn (i + 1) j xs ys + | _, [] -> 10_000 + | Product _ :: xs, Product _ :: ys -> 1 + memo ~xsgn ~ysgn (i + 1) (j + 1) xs ys + | Argument _ :: xs, Argument _ :: ys -> 1 + memo ~xsgn ~ysgn (i + 1) (j + 1) xs ys + | Product _ :: xs, ys -> 1 + memo ~xsgn ~ysgn (i + 1) j xs ys + | xs, Product _ :: ys -> 1 + memo ~xsgn ~ysgn i (j + 1) xs ys + | Type x :: xs', Type y :: ys' when xsgn = ysgn -> begin let skip_y = skip_entry y in - let cost = - match Name_cost.best_match ~sub:x y with - | None -> skip_x + skip_y - | Some (_, cost) -> cost - in - min - (cost + memo (i + 1) (j + 1) xs' ys') - (min (skip_x + memo (i + 1) j xs' ys) (skip_y + memo i (j + 1) xs ys')) + match Name_cost.best_match ~sub:x y with + | None -> skip_y + memo ~xsgn ~ysgn i (j + 1) xs ys' + | Some (_, cost) -> (cost / 3) + memo ~xsgn ~ysgn (i + 1) (j + 1) xs' ys' + end + | xs, Type y :: ys' -> skip_entry y + memo ~xsgn ~ysgn i (j + 1) xs ys' + | xs, Argument _ :: ys' -> memo ~xsgn ~ysgn i (j + 1) xs ys' + | _, (Any | Poly) :: _ -> 10_000 in - go 0 0 xs ys + let pos = Db.Type_polarity.Sign.Pos in + go ~xsgn:pos ~ysgn:pos 0 0 xs ys let minimize = function | [] -> 0 @@ -127,7 +109,7 @@ let minimize = function let used = Array.make (List.length (List.hd arr)) false in let arr = Array.map (fun lst -> - let lst = (1, None) :: List.mapi (fun i x -> x, Some i) lst in + let lst = List.mapi (fun i x -> x, i) lst in List.sort Stdlib.compare lst) @@ Array.of_list arr in @@ -145,48 +127,46 @@ let minimize = function then false else if rem <= 0 then begin - let score = acc + (1 * (Array.length arr - i)) in + (* entry type is smaller than query type *) + let score = acc + (1000 * (Array.length arr - i)) in best := min score !best ; true end else if i >= Array.length arr then begin - best := min !best (acc + (100 * rem)) ; + (* query type is smaller than entry type *) + let score = acc + (5 * rem) in + best := min score !best ; true end else if acc + heuristics.(i) >= !best then true - else ( + else begin let rec find = function | [] -> true | (cost, j) :: rest -> - let ok = - match j with - | None -> - go rem (acc + cost + if rem > Array.length arr - i then 100 else 0) (i + 1) - | Some j -> - if used.(j) - then true - else begin - used.(j) <- true ; - let ok = go (rem - 1) (acc + cost) (i + 1) in - used.(j) <- false ; - ok - end + let continue = + if used.(j) + then true + else begin + used.(j) <- true ; + let continue = go (rem - 1) (acc + cost) (i + 1) in + used.(j) <- false ; + continue + end in - if ok then find rest else false + if continue then find rest else false in - find arr.(i)) + find arr.(i) + end in let _ = go (Array.length used) 0 0 in !best let v ~query_paths ~entry = - let entry_paths = Type_path.of_typ ~ignore_any:false entry in + let entry_paths = paths_of_type entry in match entry_paths, query_paths with | _, [] | [], _ -> 0 | _ -> - let arr = - List.map (fun p -> List.map (fun q -> distance q p) query_paths) entry_paths - in + let arr = List.map (fun p -> List.map (distance p) entry_paths) query_paths in minimize arr diff --git a/query/type_distance.mli b/query/type_distance.mli index 3d73b7ea..ab97edef 100644 --- a/query/type_distance.mli +++ b/query/type_distance.mli @@ -1,10 +1,8 @@ -module Type_path : sig - type t +type t - val of_typ : ignore_any:bool -> Db.Typexpr.t -> t -end +val paths_of_type : Db.Typexpr.t -> t -val v : query_paths:Type_path.t -> entry:Db.Typexpr.t -> int +val v : query_paths:t -> entry:Db.Typexpr.t -> int (** [Type_distance.v ~query_paths ~entry] is an integer representing a notion of distance between two types. [query_paths] is a type from a query, and [entry] is the type of a possible candidate for this query. *) diff --git a/sherlodoc.opam b/sherlodoc.opam index 27831cef..6bd90ee2 100644 --- a/sherlodoc.opam +++ b/sherlodoc.opam @@ -1,28 +1,29 @@ # This file is generated by dune, edit dune-project instead opam-version: "2.0" -synopsis: "Fuzzy search in OCaml documentation" +synopsis: "Search engine for OCaml documentation" maintainer: ["art.wendling@gmail.com"] -authors: ["Arthur Wendling"] +authors: ["Arthur Wendling" "Emile Trotignon"] license: "MIT" homepage: "https://github.com/art-w/sherlodoc" bug-reports: "https://github.com/art-w/sherlodoc/issues" depends: [ "dune" {>= "3.5"} "ocaml" {>= "4.0.8"} - "cmdliner" {>= "1.2.0"} + "odoc" {>= "2.4.0"} + "base64" {>= "3.5.1"} "bigstringaf" {>= "0.9.1"} + "js_of_ocaml" {>= "5.6.0"} + "brr" {>= "0.0.6"} + "cmdliner" {>= "1.2.0"} "decompress" {>= "1.5.3"} - "base64" {>= "3.5.1"} "fpath" {>= "0.7.3"} "lwt" {>= "5.7.0"} "menhir" {>= "20230608"} - "odoc" {>= "2.4.0"} - "tyxml" {>= "4.6.0"} - "brr" {>= "0.0.6"} "ppx_blob" {>= "0.7.2"} - "alcotest" {with-test} + "tyxml" {>= "4.6.0"} "odig" {with-test} "base" {with-test & = "v0.16.3"} + "alcotest" {with-test} ] depopts: [ "dream" {>= "1.0.0~alpha5"} diff --git a/store/db_store.default.ml b/store/db_store.default.ml index 2183d094..36fb89cb 100644 --- a/store/db_store.default.ml +++ b/store/db_store.default.ml @@ -1,5 +1,6 @@ type db_format = - [ `marshal + [ `ancient + | `marshal | `js ] @@ -8,3 +9,4 @@ let available_backends = [ "marshal", `marshal; "js", `js ] let storage_module = function | `marshal -> (module Storage_marshal : Db.Storage.S) | `js -> (module Storage_js : Db.Storage.S) + | `ancient -> failwith "ancient is unsupported" diff --git a/test/cram/base_cli.t b/test/cram/base_cli.t index 8d289a65..4553a624 100644 --- a/test/cram/base_cli.t +++ b/test/cram/base_cli.t @@ -150,140 +150,140 @@ dependencies so we do not display error (one was encountered with yojson) $ export SHERLODOC_FORMAT=ancient $ sherlodoc index --index-docstring=false $(find ./docs/odoc/base/ -name "*.odocl") > /dev/null $ sherlodoc search --print-cost --limit 100 "S_poly" - 195 val Base.Set.S_poly.mem : 'a t -> 'a -> bool - 202 val Base.Hashtbl.S_poly.data : (_, 'b) t -> 'b list - 206 val Base.Hashtbl.S_poly.keys : ('a, _) t -> 'a key list - 212 val Base.Set.S_poly.map : ('a, _) set -> f:('a -> 'b) -> 'b t - 212 val Base.Hashtbl.S_poly.find_exn : ('a, 'b) t -> 'a key -> 'b - 213 val Base.Hashtbl.S_poly.choose_exn : ('a, 'b) t -> 'a key * 'b - 215 sig Base.Map.S_poly - 215 sig Base.Set.S_poly - 215 val Base.Hashtbl.S_poly.find : ('a, 'b) t -> 'a key -> 'b option - 218 val Base.Hashtbl.S_poly.choose : ('a, 'b) t -> ('a key * 'b) option - 218 val Base.Hashtbl.S_poly.to_alist : ('a, 'b) t -> ('a key * 'b) list - 219 sig Base.Hashtbl.S_poly - 221 val Base.Hashtbl.S_poly.map : ('a, 'b) t -> f:('b -> 'c) -> ('a, 'c) t - 222 val Base.Hashtbl.S_poly.map_inplace : (_, 'b) t -> f:('b -> 'b) -> unit - 222 val Base.Hashtbl.S_poly.remove_multi : ('a, _ list) t -> 'a key -> unit - 224 val Base.Hashtbl.S_poly.set : ('a, 'b) t -> key:'a key -> data:'b -> unit - 224 val Base.Hashtbl.S_poly.find_multi : ('a, 'b list) t -> 'a key -> 'b list - 226 val Base.Hashtbl.S_poly.find_and_remove : ('a, 'b) t -> 'a key -> 'b option - 235 val Base.Hashtbl.S_poly.update : ('a, 'b) t -> 'a key -> f:('b option -> 'b) -> unit - 235 val Base.Hashtbl.S_poly.add_multi : ('a, 'b list) t -> key:'a key -> data:'b -> unit - 235 val Base.Hashtbl.S_poly.filter_map : ('a, 'b) t -> f:('b -> 'c option) -> ('a, 'c) t - 236 val Base.Hashtbl.S_poly.filter_map_inplace : (_, 'b) t -> f:('b -> 'b option) -> unit - 236 val Base.Hashtbl.S_poly.filter_keys_inplace : ('a, _) t -> f:('a key -> bool) -> unit - 237 val Base.Hashtbl.S_poly.equal : ('b -> 'b -> bool) -> ('a, 'b) t -> ('a, 'b) t -> bool - 238 val Base.Hashtbl.S_poly.iteri : ('a, 'b) t -> f:(key:'a key -> data:'b -> unit) -> unit - 239 val Base.Hashtbl.S_poly.find_or_add : ('a, 'b) t -> 'a key -> default:(unit -> 'b) -> 'b - 240 val Base.Hashtbl.S_poly.add : ('a, 'b) t -> key:'a key -> data:'b -> [ `Ok | `Duplicate ] - 241 val Base.Hashtbl.S_poly.mapi : ('a, 'b) t -> f:(key:'a key -> data:'b -> 'c) -> ('a, 'c) t - 242 val Base.Hashtbl.S_poly.change : ('a, 'b) t -> 'a key -> f:('b option -> 'b option) -> unit - 242 val Base.Hashtbl.S_poly.findi_or_add : ('a, 'b) t -> 'a key -> default:('a key -> 'b) -> 'b - 244 val Base.Hashtbl.S_poly.update_and_return : ('a, 'b) t -> 'a key -> f:('b option -> 'b) -> 'b - 245 val Base.Hashtbl.S_poly.partition_tf : ('a, 'b) t -> f:('b -> bool) -> ('a, 'b) t * ('a, 'b) t - 246 val Base.Hashtbl.S_poly.incr : ?by:int -> ?remove_if_zero:bool -> ('a, int) t -> 'a key -> unit - 254 val Base.Hashtbl.S_poly.choose_randomly_exn : ?random_state:Random.State.t -> ('a, 'b) t -> 'a key * 'b - 255 val Base.Hashtbl.S_poly.filter_mapi : ('a, 'b) t -> f:(key:'a key -> data:'b -> 'c option) -> ('a, 'c) t - 258 val Base.Hashtbl.S_poly.fold : ('a, 'b) t -> init:'acc -> f:(key:'a key -> data:'b -> 'acc -> 'acc) -> 'acc - 259 val Base.Hashtbl.S_poly.partition_map : ('a, 'b) t -> f:('b -> ('c, 'd) Either.t) -> ('a, 'c) t * ('a, 'd) t - 259 val Base.Hashtbl.S_poly.choose_randomly : ?random_state:Random.State.t -> ('a, 'b) t -> ('a key * 'b) option - 265 val Base.Hashtbl.S_poly.partitioni_tf : ('a, 'b) t -> f:(key:'a key -> data:'b -> bool) -> ('a, 'b) t * ('a, 'b) t - 272 type ('a, 'b) Base.Map.S_poly.t - 272 type 'elt Base.Set.S_poly.t - 274 type ('a, 'cmp) Base.Set.S_poly.set - 275 type ('a, 'b) Base.Map.S_poly.tree - 275 type 'elt Base.Set.S_poly.tree - 276 type ('a, 'b) Base.Hashtbl.S_poly.t - 279 val Base.Hashtbl.S_poly.find_and_call : ('a, 'b) t -> + 150 sig Base.Map.S_poly + 150 sig Base.Set.S_poly + 154 sig Base.Hashtbl.S_poly + 198 type 'a Base.Hashtbl.S_poly.key = 'a + 207 type ('a, 'b) Base.Map.S_poly.t + 207 type 'elt Base.Set.S_poly.t + 209 type ('a, 'cmp) Base.Set.S_poly.set + 210 val Base.Set.S_poly.mem : 'a t -> 'a -> bool + 210 type ('a, 'b) Base.Map.S_poly.tree + 210 type 'elt Base.Set.S_poly.tree + 211 type ('a, 'b) Base.Hashtbl.S_poly.t + 211 mod Base.Set.S_poly.Named + 217 val Base.Hashtbl.S_poly.data : (_, 'b) t -> 'b list + 221 val Base.Hashtbl.S_poly.keys : ('a, _) t -> 'a key list + 224 type Base.Map.S_poly.comparator_witness + 224 type Base.Set.S_poly.comparator_witness + 227 val Base.Set.S_poly.map : ('a, _) set -> f:('a -> 'b) -> 'b t + 227 val Base.Hashtbl.S_poly.find_exn : ('a, 'b) t -> 'a key -> 'b + 228 val Base.Hashtbl.S_poly.choose_exn : ('a, 'b) t -> 'a key * 'b + 230 val Base.Hashtbl.S_poly.find : ('a, 'b) t -> 'a key -> 'b option + 233 val Base.Hashtbl.S_poly.choose : ('a, 'b) t -> ('a key * 'b) option + 233 val Base.Hashtbl.S_poly.to_alist : ('a, 'b) t -> ('a key * 'b) list + 233 mod Base.Map.S_poly.Make_applicative_traversals + 236 val Base.Hashtbl.S_poly.map : ('a, 'b) t -> f:('b -> 'c) -> ('a, 'c) t + 237 val Base.Hashtbl.S_poly.map_inplace : (_, 'b) t -> f:('b -> 'b) -> unit + 237 val Base.Hashtbl.S_poly.remove_multi : ('a, _ list) t -> 'a key -> unit + 239 val Base.Hashtbl.S_poly.set : ('a, 'b) t -> key:'a key -> data:'b -> unit + 239 val Base.Hashtbl.S_poly.find_multi : ('a, 'b list) t -> 'a key -> 'b list + 241 val Base.Hashtbl.S_poly.find_and_remove : ('a, 'b) t -> 'a key -> 'b option + 250 val Base.Hashtbl.S_poly.update : ('a, 'b) t -> 'a key -> f:('b option -> 'b) -> unit + 250 val Base.Hashtbl.S_poly.add_multi : ('a, 'b list) t -> key:'a key -> data:'b -> unit + 250 val Base.Hashtbl.S_poly.filter_map : ('a, 'b) t -> f:('b -> 'c option) -> ('a, 'c) t + 251 val Base.Hashtbl.S_poly.filter_map_inplace : (_, 'b) t -> f:('b -> 'b option) -> unit + 251 val Base.Hashtbl.S_poly.filter_keys_inplace : ('a, _) t -> f:('a key -> bool) -> unit + 252 val Base.Hashtbl.S_poly.equal : ('b -> 'b -> bool) -> ('a, 'b) t -> ('a, 'b) t -> bool + 253 val Base.Hashtbl.S_poly.iteri : ('a, 'b) t -> f:(key:'a key -> data:'b -> unit) -> unit + 254 val Base.Hashtbl.S_poly.find_or_add : ('a, 'b) t -> 'a key -> default:(unit -> 'b) -> 'b + 255 val Base.Hashtbl.S_poly.add : ('a, 'b) t -> key:'a key -> data:'b -> [ `Ok | `Duplicate ] + 256 val Base.Hashtbl.S_poly.mapi : ('a, 'b) t -> f:(key:'a key -> data:'b -> 'c) -> ('a, 'c) t + 257 val Base.Hashtbl.S_poly.change : ('a, 'b) t -> 'a key -> f:('b option -> 'b option) -> unit + 257 val Base.Hashtbl.S_poly.findi_or_add : ('a, 'b) t -> 'a key -> default:('a key -> 'b) -> 'b + 259 val Base.Hashtbl.S_poly.update_and_return : ('a, 'b) t -> 'a key -> f:('b option -> 'b) -> 'b + 260 val Base.Hashtbl.S_poly.partition_tf : ('a, 'b) t -> f:('b -> bool) -> ('a, 'b) t * ('a, 'b) t + 261 val Base.Hashtbl.S_poly.incr : ?by:int -> ?remove_if_zero:bool -> ('a, int) t -> 'a key -> unit + 269 val Base.Hashtbl.S_poly.choose_randomly_exn : ?random_state:Random.State.t -> ('a, 'b) t -> 'a key * 'b + 270 val Base.Hashtbl.S_poly.filter_mapi : ('a, 'b) t -> f:(key:'a key -> data:'b -> 'c option) -> ('a, 'c) t + 273 val Base.Hashtbl.S_poly.fold : ('a, 'b) t -> init:'acc -> f:(key:'a key -> data:'b -> 'acc -> 'acc) -> 'acc + 274 val Base.Hashtbl.S_poly.partition_map : ('a, 'b) t -> f:('b -> ('c, 'd) Either.t) -> ('a, 'c) t * ('a, 'd) t + 274 val Base.Hashtbl.S_poly.choose_randomly : ?random_state:Random.State.t -> ('a, 'b) t -> ('a key * 'b) option + 280 val Base.Hashtbl.S_poly.partitioni_tf : ('a, 'b) t -> f:(key:'a key -> data:'b -> bool) -> ('a, 'b) t * ('a, 'b) t + 294 val Base.Hashtbl.S_poly.find_and_call : ('a, 'b) t -> 'a key -> if_found:('b -> 'c) -> if_not_found:('a key -> 'c) -> 'c - 283 val Base.Set.S_poly.empty : 'a t - 283 type 'a Base.Hashtbl.S_poly.key = 'a - 283 val Base.Hashtbl.S_poly.partition_mapi : ('a, 'b) t -> + 298 val Base.Set.S_poly.empty : 'a t + 298 val Base.Hashtbl.S_poly.partition_mapi : ('a, 'b) t -> f:(key:'a key -> data:'b -> ('c, 'd) Either.t) -> ('a, 'c) t * ('a, 'd) t - 288 val Base.Map.S_poly.empty : ('k, _) t - 289 type Base.Map.S_poly.comparator_witness - 289 type Base.Set.S_poly.comparator_witness - 290 val Base.Set.S_poly.length : _ t -> int - 293 val Base.Set.S_poly.is_empty : _ t -> bool - 293 val Base.Set.S_poly.singleton : 'a -> 'a t - 294 val Base.Set.S_poly.choose_exn : 'a t -> 'a - 295 val Base.Set.S_poly.add : 'a t -> 'a -> 'a t - 295 val Base.Map.S_poly.length : (_, _) t -> int - 295 val Base.Set.S_poly.max_elt_exn : 'a t -> 'a - 295 val Base.Set.S_poly.min_elt_exn : 'a t -> 'a - 296 val Base.Set.S_poly.of_list : 'a list -> 'a t - 296 val Base.Set.S_poly.of_tree : 'a tree -> 'a t - 296 val Base.Set.S_poly.to_list : 'a t -> 'a list - 296 val Base.Set.S_poly.to_tree : 'a t -> 'a tree - 296 val Base.Set.S_poly.invariants : 'a t -> bool - 297 val Base.Set.S_poly.choose : 'a t -> 'a option - 297 val Base.Set.S_poly.elements : 'a t -> 'a list - 297 val Base.Hashtbl.S_poly.merge_into : src:('k, 'a) t -> + 303 val Base.Map.S_poly.empty : ('k, _) t + 305 val Base.Set.S_poly.length : _ t -> int + 308 val Base.Set.S_poly.is_empty : _ t -> bool + 308 val Base.Set.S_poly.singleton : 'a -> 'a t + 309 val Base.Set.S_poly.choose_exn : 'a t -> 'a + 310 val Base.Set.S_poly.add : 'a t -> 'a -> 'a t + 310 val Base.Map.S_poly.length : (_, _) t -> int + 310 val Base.Set.S_poly.max_elt_exn : 'a t -> 'a + 310 val Base.Set.S_poly.min_elt_exn : 'a t -> 'a + 311 val Base.Set.S_poly.of_list : 'a list -> 'a t + 311 val Base.Set.S_poly.of_tree : 'a tree -> 'a t + 311 val Base.Set.S_poly.to_list : 'a t -> 'a list + 311 val Base.Set.S_poly.to_tree : 'a t -> 'a tree + 311 val Base.Set.S_poly.invariants : 'a t -> bool + 312 val Base.Set.S_poly.choose : 'a t -> 'a option + 312 val Base.Set.S_poly.elements : 'a t -> 'a list + 312 val Base.Hashtbl.S_poly.merge_into : src:('k, 'a) t -> dst:('k, 'b) t -> f:(key:'k key -> 'a -> 'b option -> 'b Merge_into_action.t) -> unit - 298 val Base.Map.S_poly.data : (_, 'v) t -> 'v list - 298 val Base.Map.S_poly.keys : ('k, _) t -> 'k list - 298 val Base.Set.S_poly.diff : 'a t -> 'a t -> 'a t - 298 val Base.Set.S_poly.remove : 'a t -> 'a -> 'a t - 298 val Base.Set.S_poly.max_elt : 'a t -> 'a option - 298 val Base.Set.S_poly.min_elt : 'a t -> 'a option - 298 val Base.Map.S_poly.is_empty : (_, _) t -> bool - 298 val Base.Set.S_poly.of_array : 'a array -> 'a t - 298 val Base.Set.S_poly.to_array : 'a t -> 'a array - 299 val Base.Set.S_poly.equal : 'a t -> 'a t -> bool - 299 val Base.Set.S_poly.inter : 'a t -> 'a t -> 'a t - 299 val Base.Set.S_poly.union : 'a t -> 'a t -> 'a t - 299 val Base.Hashtbl.S_poly.clear : (_, _) t -> unit - 299 val Base.Hashtbl.S_poly.length : (_, _) t -> int - 299 val Base.Hashtbl.S_poly.hashable : 'a Hashable.t - 300 val Base.Map.S_poly.mem : ('k, _) t -> 'k -> bool - 301 val Base.Set.S_poly.nth : 'a t -> int -> 'a option - 301 val Base.Set.S_poly.union_list : 'a t list -> 'a t - 302 val Base.Map.S_poly.invariants : ('k, 'v) t -> bool - 302 val Base.Hashtbl.S_poly.is_empty : (_, _) t -> bool - 302 val Base.Hashtbl.S_poly.find_and_call1 : ('a, 'b) t -> + 313 val Base.Map.S_poly.data : (_, 'v) t -> 'v list + 313 val Base.Map.S_poly.keys : ('k, _) t -> 'k list + 313 val Base.Set.S_poly.diff : 'a t -> 'a t -> 'a t + 313 val Base.Set.S_poly.remove : 'a t -> 'a -> 'a t + 313 val Base.Set.S_poly.max_elt : 'a t -> 'a option + 313 val Base.Set.S_poly.min_elt : 'a t -> 'a option + 313 val Base.Map.S_poly.is_empty : (_, _) t -> bool + 313 val Base.Set.S_poly.of_array : 'a array -> 'a t + 313 val Base.Set.S_poly.to_array : 'a t -> 'a array + 314 val Base.Set.S_poly.equal : 'a t -> 'a t -> bool + 314 val Base.Set.S_poly.inter : 'a t -> 'a t -> 'a t + 314 val Base.Set.S_poly.union : 'a t -> 'a t -> 'a t + 314 val Base.Hashtbl.S_poly.clear : (_, _) t -> unit + 314 val Base.Hashtbl.S_poly.length : (_, _) t -> int + 314 val Base.Hashtbl.S_poly.hashable : 'a Hashable.t + 315 val Base.Map.S_poly.mem : ('k, _) t -> 'k -> bool + 316 val Base.Set.S_poly.nth : 'a t -> int -> 'a option + 316 val Base.Set.S_poly.union_list : 'a t list -> 'a t + 317 val Base.Map.S_poly.invariants : ('k, 'v) t -> bool + 317 val Base.Hashtbl.S_poly.is_empty : (_, _) t -> bool + 317 val Base.Hashtbl.S_poly.find_and_call1 : ('a, 'b) t -> 'a key -> a:'d -> if_found:('b -> 'd -> 'c) -> if_not_found:('a key -> 'd -> 'c) -> 'c - 304 val Base.Map.S_poly.find_exn : ('k, 'v) t -> 'k -> 'v - 305 val Base.Map.S_poly.singleton : 'k -> 'v -> ('k, 'v) t - 305 val Base.Set.S_poly.remove_index : 'a t -> int -> 'a t - 306 val Base.Hashtbl.S_poly.copy : ('a, 'b) t -> ('a, 'b) t - 306 val Base.Map.S_poly.max_elt_exn : ('k, 'v) t -> 'k * 'v - 306 val Base.Map.S_poly.min_elt_exn : ('k, 'v) t -> 'k * 'v - 306 val Base.Set.S_poly.of_sequence : 'a Sequence.t -> 'a t - 306 val Base.Set.S_poly.are_disjoint : 'a t -> 'a t -> bool - 307 val Base.Map.S_poly.find : ('k, 'v) t -> 'k -> 'v option - 307 val Base.Map.S_poly.rank : ('k, _) t -> 'k -> int option - 307 val Base.Set.S_poly.compare_direct : 'a t -> 'a t -> int + 319 val Base.Map.S_poly.find_exn : ('k, 'v) t -> 'k -> 'v + 320 val Base.Map.S_poly.singleton : 'k -> 'v -> ('k, 'v) t + 320 val Base.Set.S_poly.remove_index : 'a t -> int -> 'a t + 321 val Base.Hashtbl.S_poly.copy : ('a, 'b) t -> ('a, 'b) t + 321 val Base.Map.S_poly.max_elt_exn : ('k, 'v) t -> 'k * 'v + 321 val Base.Map.S_poly.min_elt_exn : ('k, 'v) t -> 'k * 'v + 321 val Base.Set.S_poly.of_sequence : 'a Sequence.t -> 'a t + 321 val Base.Set.S_poly.are_disjoint : 'a t -> 'a t -> bool + 322 val Base.Set.S_poly.compare_direct : 'a t -> 'a t -> int $ sherlodoc search --print-cost --no-rhs "group b" - 281 val Base.Set.group_by - 360 val Base.List.group - 367 val Base.Sequence.group - 375 val Base.Set.Poly.group_by - 390 val Base.List.Assoc.group - 390 val Base.List.sort_and_group - 403 val Base.Set.Using_comparator.group_by - 413 val Base.Set.Using_comparator.Tree.group_by - 420 val Base.List.Assoc.sort_and_group - 458 val Base.List.groupi - 477 val Base.Set.S_poly.group_by - 478 val Base.Hashtbl.group - 512 val Base.Set.Accessors_generic.group_by - 525 val Base.Set.Creators_and_accessors_generic.group_by - 578 val Base.Hashtbl.Poly.group - 585 val Base.Hashtbl.Creators.group - 592 val Base.Hashtbl.Creators.group - 604 val Base.Hashtbl.S_without_submodules.group - 680 val Base.Hashtbl.S_poly.group + 181 val Base.Set.group_by + 205 val Base.List.group + 212 val Base.Sequence.group + 225 val Base.List.sort_and_group + 228 val Base.List.groupi + 235 val Base.List.Assoc.group + 255 val Base.List.Assoc.sort_and_group + 275 val Base.Set.Poly.group_by + 303 val Base.Set.Using_comparator.group_by + 313 val Base.Set.Using_comparator.Tree.group_by + 323 val Base.Hashtbl.group + 377 val Base.Set.S_poly.group_by + 412 val Base.Set.Accessors_generic.group_by + 423 val Base.Hashtbl.Poly.group + 425 val Base.Set.Creators_and_accessors_generic.group_by + 430 val Base.Hashtbl.Creators.group + 437 val Base.Hashtbl.Creators.group + 449 val Base.Hashtbl.S_without_submodules.group + 525 val Base.Hashtbl.S_poly.group $ sherlodoc search --no-rhs "group by" val Base.Set.group_by val Base.Set.Poly.group_by @@ -293,151 +293,151 @@ dependencies so we do not display error (one was encountered with yojson) val Base.Set.Accessors_generic.group_by val Base.Set.Creators_and_accessors_generic.group_by $ sherlodoc search --print-cost "map2" + 127 mod Base.Applicative.Make_using_map2 + 128 mod Base.Applicative.Make2_using_map2 + 128 mod Base.Applicative.Make3_using_map2 + 138 mod Base.Applicative.Make_using_map2_local + 139 mod Base.Applicative.Make2_using_map2_local + 139 mod Base.Applicative.Make3_using_map2_local 142 val Base.Uniform_array.map2_exn : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t - 150 val Base.Applicative.Make_using_map2.(<*>) : ('a -> 'b) X.t -> 'a X.t -> 'b X.t - 157 val Base.List.rev_map2_exn : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t - 173 val Base.Applicative.Make_using_map2.Applicative_infix.(<*>) : ('a -> 'b) X.t -> 'a X.t -> 'b X.t - 176 val Base.Applicative.Make_using_map2_local.(<*>) : ('a -> 'b) X.t -> 'a X.t -> 'b X.t - 199 val Base.Applicative.Make_using_map2_local.Applicative_infix.(<*>) : ('a -> 'b) X.t -> 'a X.t -> 'b X.t - 211 val Base.Option.map2 : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t - 213 val Base.Or_error.map2 : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t - 226 val Base.Applicative.Pair.map2 : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t - 229 val Base.Applicative.Compose.map2 : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t - 229 val Base.Applicative.S2_to_S.map2 : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t - 229 val Base.Applicative.S3_to_S.map2 : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t - 229 val Base.Applicative.Make_using_map2.return : 'a -> 'a X.t - 230 val Base.List.map2 : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t Or_unequal_lengths.t - 232 val Base.Applicative.Make.map2 : 'a X.t -> 'b X.t -> f:('a -> 'b -> 'c) -> 'c X.t - 232 val Base.List.Cartesian_product.map2 : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t - 233 val Base.List.map2_exn : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t - 234 val Base.Array.map2_exn : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t - 235 val Base.Applicative.Make2_using_map2.return : 'a -> ('a, _) X.t - 236 val Base.Applicative.Of_monad.map2 : 'a M.t -> 'b M.t -> f:('a -> 'b -> 'c) -> 'c M.t - 238 val Base.Applicative.Make3_using_map2.return : 'a -> ('a, _, _) X.t - 240 val Base.Either.First.map2 : ('a, 'e) t -> ('b, 'e) t -> f:('a -> 'b -> 'c) -> ('c, 'e) t - 240 val Base.Applicative.Make_using_map2.all : 'a X.t list -> 'a list X.t - 241 val Base.Either.Second.map2 : ('a, 'e) t -> ('b, 'e) t -> f:('a -> 'b -> 'c) -> ('c, 'e) t - 243 val Base.Applicative.Make_using_map2.map2 : 'a X.t -> 'b X.t -> f:('a -> 'b -> 'c) -> 'c X.t + 147 val Base.List.rev_map2_exn : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t + 150 mod Base.Applicative.Make_using_map2.Applicative_infix + 151 mod Base.Applicative.Make2_using_map2.Applicative_infix + 151 mod Base.Applicative.Make3_using_map2.Applicative_infix + 155 val Base.Applicative.Make_using_map2.(<*>) : ('a -> 'b) X.t -> 'a X.t -> 'b X.t + 161 mod Base.Applicative.Make_using_map2_local.Applicative_infix + 162 mod Base.Applicative.Make2_using_map2_local.Applicative_infix + 162 mod Base.Applicative.Make3_using_map2_local.Applicative_infix + 166 val Base.Applicative.Make_using_map2_local.(<*>) : ('a -> 'b) X.t -> 'a X.t -> 'b X.t + 178 sig Base.Applicative.Basic_using_map2 + 178 val Base.Applicative.Make_using_map2.Applicative_infix.(<*>) : ('a -> 'b) X.t -> 'a X.t -> 'b X.t + 179 sig Base.Applicative.Basic2_using_map2 + 179 sig Base.Applicative.Basic3_using_map2 + 189 sig Base.Applicative.Basic_using_map2_local + 189 val Base.Applicative.Make_using_map2_local.Applicative_infix.(<*>) : ('a -> 'b) X.t -> 'a X.t -> 'b X.t + 190 sig Base.Applicative.Basic2_using_map2_local + 190 sig Base.Applicative.Basic3_using_map2_local + 226 val Base.Option.map2 : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t $ sherlodoc search --print-cost --static-sort "List map2" - 97 val Base.List.rev_map2_exn : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t - 193 val Base.List.map2_exn : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t - 210 val Base.List.map2 : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t Or_unequal_lengths.t - 212 val Base.List.Cartesian_product.map2 : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t - 214 val Base.List.rev_map2 : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t Or_unequal_lengths.t + 127 val Base.List.rev_map2_exn : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t + 223 val Base.List.map2_exn : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t + 240 val Base.List.map2 : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t Or_unequal_lengths.t + 242 val Base.List.Cartesian_product.map2 : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t + 244 val Base.List.rev_map2 : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t Or_unequal_lengths.t $ sherlodoc search --print-cost "List map2" - 177 val Base.List.rev_map2_exn : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t + 152 val Base.List.rev_map2_exn : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t + 238 val Base.List.map2_exn : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t 250 val Base.List.map2 : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t Or_unequal_lengths.t 252 val Base.List.Cartesian_product.map2 : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t - 253 val Base.List.map2_exn : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t - 274 val Base.List.rev_map2 : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t Or_unequal_lengths.t + 264 val Base.List.rev_map2 : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t Or_unequal_lengths.t $ sherlodoc search --no-rhs "Base.Hashtbl.S_without_submodules.group" val Base.Hashtbl.S_without_submodules.group $ sherlodoc search --print-cost "list" - 105 val Base.Bytes.to_list : t -> char list - 106 val Base.Queue.of_list : 'a list -> 'a t - 106 val Base.Stack.of_list : 'a list -> 'a t - 109 val Base.Set.to_list : ('a, _) t -> 'a list - 110 val Base.Bytes.of_char_list : char list -> t - 113 val Base.Linked_queue.of_list : 'a list -> 'a t - 121 val Base.Info.of_list : ?trunc_after:int -> t list -> t - 122 val Base.Error.of_list : ?trunc_after:int -> t list -> t - 128 val Base.List.rev : 'a t -> 'a t - 129 val Base.List.hd_exn : 'a t -> 'a - 129 val Base.List.return : 'a -> 'a t - 130 val Base.Array.of_list_rev : 'a list -> 'a t - 130 val Base.String.to_list_rev : t -> char list - 131 val Base.List.join : 'a t t -> 'a t - 131 val Base.List.tl_exn : 'a t -> 'a t - 131 val Base.Sequence.shift_right_with_list : 'a t -> 'a list -> 'a t - 133 val Base.List.concat : 'a t t -> 'a t - 133 val Base.Sequence.to_list_rev : 'a t -> 'a list - 134 val Base.List.last : 'a t -> 'a option - 135 val Base.List.ignore_m : 'a t -> unit t - 136 val Base.List.drop : 'a t -> int -> 'a t - 136 val Base.List.take : 'a t -> int -> 'a t - 136 val Base.Sequence.cycle_list_exn : 'a list -> 'a t - 137 val Base.List.nth_exn : 'a t -> int -> 'a - 139 val Base.List.append : 'a t -> 'a t -> 'a t + 81 type 'a Base.list = 'a List.t + 93 type 'a Base.Export.list = 'a List.t + 101 type 'a Base.List.t = 'a list + 104 mod Base.List + 104 mod Caml.List + 108 val Base.List.rev : 'a t -> 'a t + 109 val Base.List.hd_exn : 'a t -> 'a + 109 val Base.List.return : 'a -> 'a t + 110 val Base.Bytes.to_list : t -> char list + 111 val Base.List.join : 'a t t -> 'a t + 111 val Base.List.tl_exn : 'a t -> 'a t + 111 val Base.Queue.of_list : 'a list -> 'a t + 111 val Base.Stack.of_list : 'a list -> 'a t + 113 val Base.List.concat : 'a t t -> 'a t + 113 mod Shadow_stdlib.List + 114 val Base.List.last : 'a t -> 'a option + 114 val Base.Set.to_list : ('a, _) t -> 'a list + 115 mod Base.List.Assoc + 115 mod Base.List.Infix + 115 cons Base.Sexp.t.List : t list -> t + 115 val Base.List.ignore_m : 'a t -> unit t + 115 val Base.Bytes.of_char_list : char list -> t + 116 val Base.List.drop : 'a t -> int -> 'a t + 116 val Base.List.take : 'a t -> int -> 'a t + 117 val Base.List.nth_exn : 'a t -> int -> 'a $ sherlodoc search --print-cost ": list" - 95 val Base.Bytes.to_list : t -> char list - 97 val Base.String.split_lines : t -> t list - 100 val Base.String.to_list_rev : t -> char list - 103 val Base.Sequence.to_list_rev : 'a t -> 'a list - 105 val Caml.(@) : 'a list -> 'a list -> 'a list - 105 val Base.Pretty_printer.all : unit -> string list - 109 val Base.Set.to_list : ('a, _) t -> 'a list - 110 val Base.Hashtbl.data : (_, 'b) t -> 'b list - 110 val Base.Set.elements : ('a, _) t -> 'a list - 112 val Base.String.split : t -> on:char -> t list - 114 val Base.Hashtbl.keys : ('a, _) t -> 'a key list - 119 val Base.Map.data : (_, 'v, _) t -> 'v list - 119 val Base.Map.keys : ('k, _, _) t -> 'k list - 120 val Base.Hashtbl.Poly.data : (_, 'b) t -> 'b list - 124 val Base.Hashtbl.Poly.keys : ('a, _) t -> 'a key list - 126 val Base.String.split_on_chars : t -> on:char list -> t list - 136 val Base.Hashtbl.to_alist : ('a, 'b) t -> ('a key * 'b) list - 138 val Base.List.rev : 'a t -> 'a t - 139 val Base.List.return : 'a -> 'a t - 139 val Base.String.Search_pattern.split_on : t -> string -> string list - 141 val Base.List.join : 'a t t -> 'a t - 141 val Base.List.tl_exn : 'a t -> 'a t - 142 val Base.Hashtbl.find_multi : ('a, 'b list) t -> 'a key -> 'b list - 143 val Base.List.concat : 'a t t -> 'a t - 145 val Base.List.ignore_m : 'a t -> unit t + 118 val Base.List.rev : 'a t -> 'a t + 119 val Base.List.return : 'a -> 'a t + 120 val Base.Bytes.to_list : t -> char list + 121 val Base.List.join : 'a t t -> 'a t + 121 val Base.List.tl_exn : 'a t -> 'a t + 122 val Base.String.split_lines : t -> t list + 123 val Base.List.concat : 'a t t -> 'a t + 125 val Base.List.ignore_m : 'a t -> unit t + 125 val Base.String.to_list_rev : t -> char list + 128 val Base.Sequence.to_list_rev : 'a t -> 'a list + 130 val Base.Pretty_printer.all : unit -> string list + 132 val Base.List.all_unit : unit t list -> unit t + 132 val Base.List.filter_opt : 'a option t -> 'a t + 132 val Base.List.transpose_exn : 'a t t -> 'a t t + 132 val Base.List.concat_no_order : 'a t t -> 'a t + 145 val Caml.(@) : 'a list -> 'a list -> 'a list + 149 val Base.Set.to_list : ('a, _) t -> 'a list + 150 val Base.Hashtbl.data : (_, 'b) t -> 'b list + 150 val Base.Set.elements : ('a, _) t -> 'a list + 151 val Base.List.drop : 'a t -> int -> 'a t + 151 val Base.List.take : 'a t -> int -> 'a t + 152 val Base.String.split : t -> on:char -> t list + 154 val Base.List.append : 'a t -> 'a t -> 'a t + 154 val Base.Hashtbl.keys : ('a, _) t -> 'a key list + 158 val Base.List.rev_append : 'a t -> 'a t -> 'a t Partial name search: $ sherlodoc search --print-cost "strin" - 169 val Caml.string_of_int : int -> string - 171 val Caml.string_of_bool : bool -> string - 173 val Caml.string_of_float : float -> string - 186 val Base.Sexp.of_string : unit - 189 val Caml.prerr_string : string -> unit - 189 val Caml.print_string : string -> unit - 189 val Caml.int_of_string : string -> int - 191 val Caml.bool_of_string : string -> bool - 192 val Base.Exn.to_string : t -> string - 192 val Base.Sys.max_string_length : int - 193 val Caml.float_of_string : string -> float - 194 val Base.Float.to_string : t -> string - 197 val Base.Exn.to_string_mach : t -> string - 197 val Base.Info.to_string_hum : t -> string - 197 val Base.Sign.to_string_hum : t -> string - 198 val Base.Error.to_string_hum : t -> string - 198 val Base.Info.to_string_mach : t -> string - 199 val Base.Error.to_string_mach : t -> string - 200 val Caml.int_of_string_opt : string -> int option - 201 val Caml.string_of_format : ('a, 'b, 'c, 'd, 'e, 'f) format6 -> string - 202 val Caml.bool_of_string_opt : string -> bool option - 202 val Base.Or_error.error_string : string -> _ t - 204 val Base.Buffer.add_string : t -> string -> unit - 204 val Caml.float_of_string_opt : string -> float option - 204 val Base.Sign_or_nan.to_string_hum : t -> string + 97 type Base.string = String.t + 109 type Base.Export.string = String.t + 109 val Caml.string_of_int : int -> string + 111 val Caml.string_of_bool : bool -> string + 113 val Caml.string_of_float : float -> string + 116 val Base.Sexp.of_string : unit + 117 type Base.String.t = string + 117 type Base.String.elt = char + 119 val Base.String.rev : t -> t + 119 val Caml.prerr_string : string -> unit + 119 val Caml.print_string : string -> unit + 119 val Caml.int_of_string : string -> int + 121 mod Base.String + 121 mod Caml.String + 121 val Caml.bool_of_string : string -> bool + 122 val Base.String.hash : t -> int + 122 val Base.Exn.to_string : t -> string + 122 val Base.Sys.max_string_length : int + 123 val Base.String.escaped : t -> t + 123 val Caml.float_of_string : string -> float + 123 val Base.String.max_length : int + 124 val Base.String.(^) : t -> t -> t + 124 val Base.Float.to_string : t -> string + 125 mod Base.Stringable + 125 val Base.String.uppercase : t -> t $ sherlodoc search --print-cost "tring" - 164 val Base.String.rev : t -> t - 166 val Base.Sexp.of_string : unit - 167 val Base.String.hash : t -> int - 168 val Base.String.escaped : t -> t - 168 val Base.String.max_length : int - 169 val Base.String.(^) : t -> t -> t - 169 val Caml.prerr_string : string -> unit - 169 val Caml.print_string : string -> unit - 169 val Caml.int_of_string : string -> int - 170 val Base.String.uppercase : t -> t - 171 val Caml.bool_of_string : string -> bool - 171 val Base.String.capitalize : t -> t - 172 val Base.Exn.to_string : t -> string - 172 val Base.String.append : t -> t -> t - 173 val Caml.float_of_string : string -> float - 174 val Base.String.equal : t -> t -> bool - 174 val Base.String.prefix : t -> int -> t - 174 val Base.String.suffix : t -> int -> t - 174 val Base.Float.to_string : t -> string - 175 val Base.String.compare : t -> t -> int - 177 val Base.String.ascending : t -> t -> int - 177 val Base.String.split_lines : t -> t list - 179 val Base.String.drop_prefix : t -> int -> t - 179 val Base.String.drop_suffix : t -> int -> t - 179 val Base.String.common_suffix : t list -> t + 127 type Base.string = String.t + 132 type Base.String.t = string + 132 type Base.String.elt = char + 134 val Base.String.rev : t -> t + 136 mod Base.String + 136 mod Caml.String + 136 val Base.Sexp.of_string : unit + 137 val Base.String.hash : t -> int + 138 val Base.String.escaped : t -> t + 138 val Base.String.max_length : int + 139 val Base.String.(^) : t -> t -> t + 139 val Caml.prerr_string : string -> unit + 139 val Caml.print_string : string -> unit + 139 type Base.Export.string = String.t + 139 val Caml.int_of_string : string -> int + 140 val Base.String.uppercase : t -> t + 141 val Caml.bool_of_string : string -> bool + 141 type Base.String.Caseless.t = t + 141 val Base.String.capitalize : t -> t + 142 val Base.Exn.to_string : t -> string + 142 val Base.String.append : t -> t -> t + 143 val Caml.float_of_string : string -> float + 144 val Base.String.equal : t -> t -> bool + 144 val Base.String.prefix : t -> int -> t + 144 val Base.Float.to_string : t -> string diff --git a/test/cram/base_web.t b/test/cram/base_web.t index 5a70e1ef..932ab2d5 100644 --- a/test/cram/base_web.t +++ b/test/cram/base_web.t @@ -159,9 +159,9 @@ in queryable way, so a size increase is expected. It should just be reasonable. $ gzip -k megaodocl $ du -s *.js *.gz - 2284 db.js - 1724 db.js.gz - 1776 megaodocl.gz + 2088 db.js + 1580 db.js.gz + 1772 megaodocl.gz $ for f in $(find . -name '*.odocl'); do > odoc html-generate --search-uri=db.js --search-uri=sherlodoc.js --output-dir html $f @@ -172,7 +172,7 @@ The --no-preserve flag is here so that copying to /tmp will not fail because of a previous run. .js files built by dune are read only. $ cp --no-preserve=mode,ownership ../../jsoo/main.bc.js html/sherlodoc.js $ du -sh html/sherlodoc.js - 96K html/sherlodoc.js + 100K html/sherlodoc.js $ ls html base db.js diff --git a/test/cram/cli.t/run.t b/test/cram/cli.t/run.t index d1ea3a6b..30d114fa 100644 --- a/test/cram/cli.t/run.t +++ b/test/cram/cli.t/run.t @@ -16,31 +16,31 @@ val Main.multiple_hit_2 : foo val Main.multiple_hit_3 : foo $ sherlodoc search --print-cost "name_conflict" - 169 val Main.name_conflict : foo - 169 type Main.name_conflict = foo + 84 type Main.name_conflict = foo + 184 val Main.name_conflict : foo $ sherlodoc search "nesting_priority" val Main.nesting_priority : foo val Main.Nest.nesting_priority : foo $ sherlodoc search "list" type 'a Main.list - val Main.Map.to_list : foo type 'a Main.List.t = 'a list - val Main.List.map : ('a -> 'b) -> 'a t -> 'b t mod Main.List + val Main.Map.to_list : foo + val Main.List.map : ('a -> 'b) -> 'a t -> 'b t val Main.List.map2 : ('a -> 'b -> 'c) -> 'a list -> 'b list -> 'c list val Main.List.rev_map2_exn : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t val Main.foo : foo $ sherlodoc search "map" - val Main.List.map : ('a -> 'b) -> 'a t -> 'b t - val Main.Map.to_list : foo mod Main.Map + val Main.Map.to_list : foo + val Main.List.map : ('a -> 'b) -> 'a t -> 'b t val Main.List.map2 : ('a -> 'b -> 'c) -> 'a list -> 'b list -> 'c list val Main.List.rev_map2_exn : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t val Main.foo : foo $ sherlodoc search "list map" val Main.List.map : ('a -> 'b) -> 'a t -> 'b t - val Main.Map.to_list : foo val Main.List.map2 : ('a -> 'b -> 'c) -> 'a list -> 'b list -> 'c list + val Main.Map.to_list : foo val Main.List.rev_map2_exn : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t val Main.foo : foo $ sherlodoc search "map2" @@ -55,33 +55,33 @@ val Main.produce_2' : unit -> unit -> moo val Main.value : moo $ sherlodoc search ":moo -> _" - val Main.consume : moo -> unit cons Main.MyExtension : moo -> extensible_type + val Main.consume : moo -> unit val Main.consume_2 : moo -> moo -> unit val Main.consume_2_other : moo -> t -> unit $ sherlodoc search "modtype" sig Main.Modtype val Main.Modtype.v_modtype : foo $ sherlodoc search "S" - sig Main.S mod Main.S_to_S1 - type 'a Main.list - type Main.MyExtension + sig Main.S type Main.extensible_type = .. type 'a Main.List.t = 'a list + mod Main.List + mod Main.Nest + type 'a Main.list + type Main.MyExtension + cons Main.MyExtension : moo -> extensible_type val Main.consume : moo -> unit val Main.Map.to_list : foo val Main.nesting_priority : foo val Main.consume_2 : moo -> moo -> unit val Main.Nest.nesting_priority : foo val Main.consume_2_other : moo -> t -> unit - cons Main.MyExtension : moo -> extensible_type val Main.List.map : ('a -> 'b) -> 'a t -> 'b t - mod Main.List - mod Main.Nest + val Main.foo : foo val Main.List.map2 : ('a -> 'b -> 'c) -> 'a list -> 'b list -> 'c list val Main.List.rev_map2_exn : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t - val Main.foo : foo $ sherlodoc search "qwertyuiopasdfghjklzxcvbnm" [No results] $ sherlodoc search "hidden" @@ -91,21 +91,21 @@ val Main.produce : unit -> moo val Main.produce_2' : unit -> unit -> moo $ sherlodoc search ":'a" + val Main.poly_param : 'a boo val Main.poly_1 : 'a -> 'b -> 'c - val Main.poly_2 : 'a -> 'b -> 'c -> 'a -> 'b -> 'c val Main.List.map : ('a -> 'b) -> 'a t -> 'b t + val Main.poly_2 : 'a -> 'b -> 'c -> 'a -> 'b -> 'c val Main.List.map2 : ('a -> 'b -> 'c) -> 'a list -> 'b list -> 'c list val Main.List.rev_map2_exn : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t - val Main.poly_param : 'a boo $ sherlodoc search ": 'a -> 'b -> 'c " val Main.poly_1 : 'a -> 'b -> 'c val Main.poly_2 : 'a -> 'b -> 'c -> 'a -> 'b -> 'c - val Main.List.rev_map2_exn : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t val Main.List.map2 : ('a -> 'b -> 'c) -> 'a list -> 'b list -> 'c list + val Main.List.rev_map2_exn : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t $ sherlodoc search ": ('a -> 'b) -> 'a t -> 'b t" val Main.List.map : ('a -> 'b) -> 'a t -> 'b t - val Main.List.rev_map2_exn : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t val Main.List.map2 : ('a -> 'b -> 'c) -> 'a list -> 'b list -> 'c list + val Main.List.rev_map2_exn : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t $ sherlodoc search ": 'a bo" val Main.poly_param : 'a boo $ sherlodoc search ":extensible_type" diff --git a/test/cram/cli_small.t/run.t b/test/cram/cli_small.t/run.t index 95daa075..b8545e0e 100644 --- a/test/cram/cli_small.t/run.t +++ b/test/cram/cli_small.t/run.t @@ -8,10 +8,10 @@ $ export SHERLODOC_FORMAT=ancient $ sherlodoc index $(find . -name '*.odocl') $ sherlodoc search --print-cost "list" - 154 type 'a Main.list - 221 type 'a Main.List.t = 'a list - 229 val Main.List.empty : 'a t * 'b t - 242 val Main.List.map : ('a -> 'b) -> 'a t -> 'b t - 254 mod Main.List + 89 type 'a Main.list + 101 type 'a Main.List.t = 'a list + 104 mod Main.List + 209 val Main.List.empty : 'a t * 'b t + 222 val Main.List.map : ('a -> 'b) -> 'a t -> 'b t $ sherlodoc search ": (int, 'a) result" val Main.ok_zero : (int, 'a) result diff --git a/test/cram/module_type_cost.t/run.t b/test/cram/module_type_cost.t/run.t index 9c499626..d9b72e95 100644 --- a/test/cram/module_type_cost.t/run.t +++ b/test/cram/module_type_cost.t/run.t @@ -10,10 +10,10 @@ Here we expect to have the `my_function` from the module be above the one from the module type. $ sherlodoc search --print-cost --no-rhs "my_function" - 181 val Main.M.my_function - 184 val Main.Make.my_function - 281 val Main.S.my_function + 196 val Main.M.my_function + 199 val Main.Make.my_function + 296 val Main.S.my_function Here we expect both the module type and the module to be ranked the same $ sherlodoc search --print-cost "module" - 281 mod Main.Module_nype - 281 sig Main.Module_type + 116 mod Main.Module_nype + 166 sig Main.Module_type diff --git a/test/cram/simple.t/run.t b/test/cram/simple.t/run.t index 0f1733bb..ce7480a7 100644 --- a/test/cram/simple.t/run.t +++ b/test/cram/simple.t/run.t @@ -11,7 +11,7 @@ Here cat is used to remove weird permissions on executable built by dune $ cat ../../../jsoo/main.bc.js > sherlodoc.js $ du -sh sherlodoc.js - 96K sherlodoc.js + 100K sherlodoc.js $ mkdir html $ cp sherlodoc.js html $ cp db.js html diff --git a/www/dune b/www/dune index 4d6d7253..b8ea93c0 100644 --- a/www/dune +++ b/www/dune @@ -1,6 +1,7 @@ (library (name www) - (libraries cmdliner dream tyxml db db_store query) + (optional) + (libraries lwt cmdliner dream tyxml db db_store query) (preprocess (pps ppx_blob)) (preprocessor_deps diff --git a/www/static/style.css b/www/static/style.css index 56c11b1d..98e5bca5 100644 --- a/www/static/style.css +++ b/www/static/style.css @@ -8,6 +8,7 @@ body { margin-bottom: 1em; min-height: 100%; background: url("/bg.jpg") no-repeat bottom right; + font-family: system-ui, sans-serif; } form { @@ -54,14 +55,22 @@ a { text-decoration: none; } -pre { - margin: 0.5em; +.comment p { + line-height: 1.3em; +} + +.comment pre { + margin: 0 2em; font-size: 1.1rem; - white-space: normal; + white-space: pre; } -pre { + +.found > li > pre { + margin: 0.5em; padding-left: 6em; text-indent: -6em; + font-size: 1.1rem; + white-space: normal; } pre em { @@ -74,7 +83,7 @@ ul { padding: 0; } -.found li { +.found > li { list-style: none; margin: 0; padding: 0; @@ -83,39 +92,45 @@ ul { margin-left: 0.95em; } -.found li em { +.found > li > pre em { margin: 0 -3px; padding: 3px; color: black; } -.found li:hover em { +.found > li:hover > pre em { background: #FADFB1; } -.found li a:hover em { +.found > li > pre a:hover em { background: #EABB60; border-bottom: 2px solid #553515; } -h1, ul.doc, p { +h1, ul.doc, .comment { margin: 0; padding: 0; margin-left: 3.4rem; } +.comment a, .comment a:visited { color: black } +.comment .at-tag { font-style: italic } +.comment li { list-style: square } + h1 { margin-bottom: 1em; font-size: 3em; + font-family: serif; } p.doc { margin-bottom: 1em; + margin-left: 2.3em; font-size: 1.5em; } -ul.doc li { +ul.doc > li { margin-bottom: 0.5em; } @@ -170,11 +185,15 @@ code { .ad { padding: 3rem 0; - font-family: monospace; + margin-left: 2.3em; font-style: italic; font-size: 1rem; } +pre, code, .ad, .packages a, input#q { + font-family: ui-monospace, 'Fira Code', 'Cascadia Code', 'Source Code Pro', Menlo, Consolas, 'DejaVu Sans Mono', monospace; +} + .ad svg { vertical-align: middle; margin-right: 0.5rem } .categories { @@ -206,7 +225,6 @@ code { display: inline-block; white-space: nowrap; margin-right: 1.5em; - font-family: monospace; } .packages a:hover { background: #eee; diff --git a/www/ui.ml b/www/ui.ml index 1556c1f0..c237e379 100644 --- a/www/ui.ml +++ b/www/ui.ml @@ -10,16 +10,17 @@ let string_of_kind = let open Db.Entry.Kind in function | Doc -> "doc" - | Type_decl _ -> "type" - | Module -> "mod" - | Exception _ -> "exn" + | Type_decl None -> "type" + | Type_decl (Some str) -> "type " ^ str + | Module -> "module" + | Exception _ -> "exception" | Class_type -> "class" - | Method -> "meth" + | Method -> "method" | Class -> "class" | Type_extension -> "type" - | Extension_constructor _ -> "cons" - | Module_type -> "sig" - | Constructor _ -> "cons" + | Extension_constructor _ -> "constructor" + | Module_type -> "module type" + | Constructor _ -> "constructor" | Field _ -> "field" | Val _ -> "val" @@ -33,7 +34,12 @@ let render_elt elt = | None -> [] in let kind = string_of_kind elt.kind ^ " " in - [ txt kind; a ~a:link [ em [ txt elt.name ] ] ] @ rhs + let doc = + if elt.doc_html = "" + then [] + else [ div ~a:[ a_class [ "comment" ] ] [ Unsafe.data elt.doc_html ] ] + in + pre (txt kind :: a ~a:link [ em [ txt elt.name ] ] :: rhs) :: doc let render_pkg elt = let open Db.Entry in @@ -47,9 +53,7 @@ let render_pkg elt = ] ] -let render_result elt = - let open Db.Entry in - render_pkg elt @ [ pre (render_elt elt); Unsafe.data elt.doc_html ] +let render_result elt = render_pkg elt @ render_elt elt let render ~pretty results = match results with diff --git a/www/www.ml b/www/www.ml index c1f74feb..91032d6b 100644 --- a/www/www.ml +++ b/www/www.ml @@ -1,18 +1,25 @@ module Storage = Db.Storage module H = Tyxml.Html +open Lwt.Syntax + +module Query_lwt = Query.Make (struct + type 'a t = 'a Lwt.t + + let return = Lwt.return + let map x f = Lwt.map f x + let bind x f = Lwt.bind x f + end) let api ~shards params = - let results = Query.search ~shards params in + let+ results = Query_lwt.search ~shards params in let pretty = Query.pretty params in - Lwt.return (Ui.render ~pretty results) + Ui.render ~pretty results let api ~shards params = if String.trim params.Query.query = "" then Lwt.return (Ui.explain ()) else api ~shards params -open Lwt.Syntax - let get_query params = Option.value ~default:"" (Dream.query params "q") let get_packages params =