From aa54ab79f2e73910c4bf7991eb09f2eb36d2cd4a Mon Sep 17 00:00:00 2001 From: Joseph Livesey Date: Sat, 3 Dec 2022 12:15:06 -0500 Subject: [PATCH] update docs and startup filepath handling --- .gitignore | 7 ------- LICENSE | 4 ++-- README.md | 14 +++++++------- src/configuration.rs | 17 ++++++++--------- src/kmer.rs | 2 +- src/main.rs | 2 +- src/startup.rs | 12 ++++++++---- tests/find_invalid.rs | 1 - 8 files changed, 27 insertions(+), 32 deletions(-) diff --git a/.gitignore b/.gitignore index 9a5b223..ea8c4bf 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1 @@ /target -cerevisiae.k21.hist.png -freq.tsv.gz -output.tsv -output/ -reddit_feedback.txt -revcomp_issue.txt -.idea/ \ No newline at end of file diff --git a/LICENSE b/LICENSE index 9f85984..6c07f2b 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ -MIT License +# MIT License -Copyright (c) 2021 suchapalaver +Copyright (c) 2022 suchapalaver Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 7f20623..ed36c37 100644 --- a/README.md +++ b/README.md @@ -5,23 +5,23 @@ Run `krust` on the test data* in the [`krust` Github repo](https://github.com/suchapalaver/krust), searching for kmers of length 5, like this: ```bash - cargo run --release 5 cerevisae.pan.fa > output.tsv +cargo run --release 5 your/local/path/to/cerevisae.pan.fa > output.tsv ``` or, searching for kmers of length 21: ```bash - cargo run --release 21 cerevisae.pan.fa > output.tsv +cargo run --release 21 your/local/path/to/cerevisae.pan.fa > output.tsv ``` `krust` prints to `stdout`, writing, on alternate lines: ```bash - >{frequency} - {canonical k-mer} - >{frequency} - {canonical k-mer} - ... +>{frequency} +{canonical k-mer} +>{frequency} +{canonical k-mer} +... ``` `krust` uses the [`rust-bio`](https://docs.rs/bio/0.38.0/bio/), [`rayon`](https://docs.rs/rayon/1.5.1/rayon/), and [`dashmap`](https://docs.rs/crate/dashmap/4.0.2) Rust libraries. diff --git a/src/configuration.rs b/src/configuration.rs index 188b139..2f7f69f 100644 --- a/src/configuration.rs +++ b/src/configuration.rs @@ -1,28 +1,27 @@ -use std::env; -use std::error::Error; +use std::{env, error::Error, path::PathBuf}; /// Parsing command line k-size and filepath arguments. pub struct Config { - pub kmer_len: usize, - pub filepath: String, + pub k: usize, + pub path: PathBuf, } impl Config { pub fn new(mut args: env::Args) -> Result> { - let kmer_len: usize = match args.nth(1) { + let k: usize = match args.nth(1) { Some(arg) => match arg.parse() { - Ok(kmer_len) if kmer_len > 0 && kmer_len < 33 => kmer_len, + Ok(k) if k > 0 && k < 33 => k, Ok(_) => return Err("k-mer length needs to be larger than zero and, for `krust` in its current working form, no more than 32".into()), Err(_) => return Err(format!("issue with k-mer length argument: {}", arg).into()), }, None => return Err("k-mer length input required".into()), }; - let filepath = match args.next() { - Some(arg) => arg, + let path = match args.next() { + Some(arg) => PathBuf::from(arg), None => return Err("filepath argument needed".into()), }; - Ok(Config { kmer_len, filepath }) + Ok(Config { k, path }) } } diff --git a/src/kmer.rs b/src/kmer.rs index 9aac49d..42c5ec9 100644 --- a/src/kmer.rs +++ b/src/kmer.rs @@ -3,7 +3,7 @@ custom_error::custom_error! { pub ValidityError } /// Creating a valid k-mer bytestring. -#[derive(Debug, PartialEq)] +#[derive(Debug, Eq, PartialEq)] pub struct Kmer(pub Vec); /// Find the canonical kmer diff --git a/src/main.rs b/src/main.rs index ba5c383..987d56b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,7 +6,7 @@ fn main() { process::exit(1); }); - if let Err(e) = krust::startup::run(config.filepath, config.kmer_len) { + if let Err(e) = krust::startup::run(config.path, config.k) { eprintln!("Application error: {}", e); drop(e); process::exit(1); diff --git a/src/startup.rs b/src/startup.rs index 8a437d7..ef1756a 100644 --- a/src/startup.rs +++ b/src/startup.rs @@ -12,12 +12,13 @@ use std::{ error::Error, hash::BuildHasherDefault, io::{BufWriter, Stdout, Write}, + path::Path, }; -pub fn run(filepath: String, k: usize) -> Result<(), Box> { +pub fn run + std::fmt::Debug>(path: P, k: usize) -> Result<(), Box> { let mut buf = BufWriter::new(std::io::stdout()); - let _print_results = build_kmer_map(filepath, k)? + build_kmer_map(path, k)? .into_iter() .par_bridge() .map(|(bitpacked_kmer, freq)| (UnpackedKmer::from_kmer_data(bitpacked_kmer, k).0, freq)) @@ -40,9 +41,12 @@ pub fn run(filepath: String, k: usize) -> Result<(), Box> { /// using a customized [`dashmap`](https://docs.rs/dashmap/4.0.2/dashmap/struct.DashMap.html) /// with [`FxHasher`](https://docs.rs/fxhash/0.2.1/fxhash/struct.FxHasher.html) to update in parallel a /// hashmap of canonical k-mers (keys) and their frequency in the data (values). -fn build_kmer_map(filepath: String, k: usize) -> Result> { +fn build_kmer_map + std::fmt::Debug>( + path: P, + k: usize, +) -> Result> { let kmer_map: DashFx = DashMap::with_hasher(BuildHasherDefault::::default()); - let _ = fasta::Reader::from_file(&filepath)? + fasta::Reader::from_file(path)? .records() .into_iter() .par_bridge() diff --git a/tests/find_invalid.rs b/tests/find_invalid.rs index 1278bce..2dd08ae 100644 --- a/tests/find_invalid.rs +++ b/tests/find_invalid.rs @@ -39,4 +39,3 @@ fn find_invalid_works5() { assert_eq!(0, ans); assert_eq!(&b'N', dna.iter().collect::>()[ans]); } -