Skip to content

Commit

Permalink
update docs and startup filepath handling
Browse files Browse the repository at this point in the history
  • Loading branch information
suchapalaver committed Dec 3, 2022
1 parent bd6a643 commit aa54ab7
Show file tree
Hide file tree
Showing 8 changed files with 27 additions and 32 deletions.
7 changes: 0 additions & 7 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,8 +1 @@
/target
cerevisiae.k21.hist.png
freq.tsv.gz
output.tsv
output/
reddit_feedback.txt
revcomp_issue.txt
.idea/
4 changes: 2 additions & 2 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MIT License
# MIT License

Copyright (c) 2021 suchapalaver
Copyright (c) 2022 suchapalaver

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
14 changes: 7 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,23 @@
Run `krust` on the test data* in the [`krust` Github repo](https://github.com/suchapalaver/krust), searching for kmers of length 5, like this:

```bash
cargo run --release 5 cerevisae.pan.fa > output.tsv
cargo run --release 5 your/local/path/to/cerevisae.pan.fa > output.tsv
```

or, searching for kmers of length 21:

```bash
cargo run --release 21 cerevisae.pan.fa > output.tsv
cargo run --release 21 your/local/path/to/cerevisae.pan.fa > output.tsv
```

`krust` prints to `stdout`, writing, on alternate lines:

```bash
>{frequency}
{canonical k-mer}
>{frequency}
{canonical k-mer}
...
>{frequency}
{canonical k-mer}
>{frequency}
{canonical k-mer}
...
```

`krust` uses the [`rust-bio`](https://docs.rs/bio/0.38.0/bio/), [`rayon`](https://docs.rs/rayon/1.5.1/rayon/), and [`dashmap`](https://docs.rs/crate/dashmap/4.0.2) Rust libraries.
Expand Down
17 changes: 8 additions & 9 deletions src/configuration.rs
Original file line number Diff line number Diff line change
@@ -1,28 +1,27 @@
use std::env;
use std::error::Error;
use std::{env, error::Error, path::PathBuf};

/// Parsing command line k-size and filepath arguments.
pub struct Config {
pub kmer_len: usize,
pub filepath: String,
pub k: usize,
pub path: PathBuf,
}

impl Config {
pub fn new(mut args: env::Args) -> Result<Config, Box<dyn Error>> {
let kmer_len: usize = match args.nth(1) {
let k: usize = match args.nth(1) {
Some(arg) => match arg.parse() {
Ok(kmer_len) if kmer_len > 0 && kmer_len < 33 => kmer_len,
Ok(k) if k > 0 && k < 33 => k,
Ok(_) => return Err("k-mer length needs to be larger than zero and, for `krust` in its current working form, no more than 32".into()),
Err(_) => return Err(format!("issue with k-mer length argument: {}", arg).into()),
},
None => return Err("k-mer length input required".into()),
};

let filepath = match args.next() {
Some(arg) => arg,
let path = match args.next() {
Some(arg) => PathBuf::from(arg),
None => return Err("filepath argument needed".into()),
};

Ok(Config { kmer_len, filepath })
Ok(Config { k, path })
}
}
2 changes: 1 addition & 1 deletion src/kmer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ custom_error::custom_error! { pub ValidityError
}

/// Creating a valid k-mer bytestring.
#[derive(Debug, PartialEq)]
#[derive(Debug, Eq, PartialEq)]
pub struct Kmer(pub Vec<u8>);

/// Find the canonical kmer
Expand Down
2 changes: 1 addition & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ fn main() {
process::exit(1);
});

if let Err(e) = krust::startup::run(config.filepath, config.kmer_len) {
if let Err(e) = krust::startup::run(config.path, config.k) {
eprintln!("Application error: {}", e);
drop(e);
process::exit(1);
Expand Down
12 changes: 8 additions & 4 deletions src/startup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,13 @@ use std::{
error::Error,
hash::BuildHasherDefault,
io::{BufWriter, Stdout, Write},
path::Path,
};

pub fn run(filepath: String, k: usize) -> Result<(), Box<dyn Error>> {
pub fn run<P: AsRef<Path> + std::fmt::Debug>(path: P, k: usize) -> Result<(), Box<dyn Error>> {
let mut buf = BufWriter::new(std::io::stdout());

let _print_results = build_kmer_map(filepath, k)?
build_kmer_map(path, k)?
.into_iter()
.par_bridge()
.map(|(bitpacked_kmer, freq)| (UnpackedKmer::from_kmer_data(bitpacked_kmer, k).0, freq))
Expand All @@ -40,9 +41,12 @@ pub fn run(filepath: String, k: usize) -> Result<(), Box<dyn Error>> {
/// using a customized [`dashmap`](https://docs.rs/dashmap/4.0.2/dashmap/struct.DashMap.html)
/// with [`FxHasher`](https://docs.rs/fxhash/0.2.1/fxhash/struct.FxHasher.html) to update in parallel a
/// hashmap of canonical k-mers (keys) and their frequency in the data (values).
fn build_kmer_map(filepath: String, k: usize) -> Result<DashFx, Box<dyn Error>> {
fn build_kmer_map<P: AsRef<Path> + std::fmt::Debug>(
path: P,
k: usize,
) -> Result<DashFx, Box<dyn Error>> {
let kmer_map: DashFx = DashMap::with_hasher(BuildHasherDefault::<FxHasher>::default());
let _ = fasta::Reader::from_file(&filepath)?
fasta::Reader::from_file(path)?
.records()
.into_iter()
.par_bridge()
Expand Down
1 change: 0 additions & 1 deletion tests/find_invalid.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,3 @@ fn find_invalid_works5() {
assert_eq!(0, ans);
assert_eq!(&b'N', dna.iter().collect::<Vec<_>>()[ans]);
}

0 comments on commit aa54ab7

Please sign in to comment.