From 68ffb0c4346a1267a423c112b7a11803cff76129 Mon Sep 17 00:00:00 2001 From: Adam Gausmann Date: Fri, 7 Jun 2019 01:01:14 -0500 Subject: [PATCH 01/14] Make a message type with a single buffer --- irc-proto/src/message.rs | 468 +++++++++++++++++++++------------------ 1 file changed, 250 insertions(+), 218 deletions(-) diff --git a/irc-proto/src/message.rs b/irc-proto/src/message.rs index b2648d71..96f20a47 100644 --- a/irc-proto/src/message.rs +++ b/irc-proto/src/message.rs @@ -1,6 +1,7 @@ //! A module providing a data structure for messages to and from IRC servers. -use std::borrow::ToOwned; -use std::fmt::{Display, Formatter, Result as FmtResult, Write}; +use std::borrow::Cow; +use std::fmt; +use std::num::NonZeroU16; use std::str::FromStr; use chan::ChannelExt; @@ -10,6 +11,33 @@ use error::{MessageParseError, ProtocolError}; use prefix::Prefix; +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +struct Part { + start: u16, + end: u16, +} + +impl Part { + fn new(start: usize, end: usize) -> Part { + Part { + start: start as u16, + end: end as u16, + } + } + + fn index<'a>(&self, s: &'a str) -> &'a str { + &s[self.start as usize..self.end as usize] + } +} + +impl From for Message { + fn from(c: Command) -> Message { + unimplemented!("dummy impl") + } +} + +pub const MAX_ARGS: usize = 15; + /// A data structure representing an IRC message according to the protocol specification. It /// consists of a collection of IRCv3 tags, a prefix (describing the source of the message), and /// the protocol command. If the command is unknown, it is treated as a special raw command that @@ -17,264 +45,259 @@ use prefix::Prefix; /// is parsed into a more useful form as described in [Command](../command/enum.Command.html). #[derive(Clone, PartialEq, Debug)] pub struct Message { - /// Message tags as defined by [IRCv3.2](http://ircv3.net/specs/core/message-tags-3.2.html). - /// These tags are used to add extended information to the given message, and are commonly used - /// in IRCv3 extensions to the IRC protocol. - pub tags: Option>, - /// The message prefix (or source) as defined by [RFC 2812](http://tools.ietf.org/html/rfc2812). - pub prefix: Option, - /// The IRC command, parsed according to the known specifications. The command itself and its - /// arguments (including the special suffix argument) are captured in this component. - pub command: Command, + buf: String, + tags: Option, + prefix: Option, + command: Part, + args: [Part; MAX_ARGS], + args_len: u8, + suffix: Option, } impl Message { - /// Creates a new message from the given components. - /// - /// # Example - /// ``` - /// # extern crate irc_proto; - /// # use irc_proto::Message; - /// # fn main() { - /// let message = Message::new( - /// Some("nickname!username@hostname"), "JOIN", vec!["#channel"], None - /// ).unwrap(); - /// # } - /// ``` - pub fn new( - prefix: Option<&str>, - command: &str, - args: Vec<&str>, - suffix: Option<&str>, - ) -> Result { - Message::with_tags(None, prefix, command, args, suffix) + pub fn parse(message: S) -> Result + where + S: ToString, + { + Message::parse_string(message.to_string()) } - /// Creates a new IRCv3.2 message from the given components, including message tags. These tags - /// are used to add extended information to the given message, and are commonly used in IRCv3 - /// extensions to the IRC protocol. - pub fn with_tags( - tags: Option>, - prefix: Option<&str>, - command: &str, - args: Vec<&str>, - suffix: Option<&str>, - ) -> Result { + pub fn parse_string(message: String) -> Result { + if message.len() <= u16::max_value() as usize { + // Message must not exceed 64K (8.5k under normal circumstances) + return unimplemented!(); + } + if !message.ends_with("\r\n") { + // Message must end with CRLF + return unimplemented!(); + } + let message_end = message.len() - '\n'.len_utf8() - '\r'.len_utf8(); + let mut i = 0; + + let tags = None; + if message[i..].starts_with('@') { + i += '@'.len_utf8(); + let start = i; + + i += message[i..].find(' ').unwrap_or_else(|| message_end - i); + let end = i; + + tags = Some(Part::new(start, end)); + } + + while message[i..].starts_with(' ') { + i += ' '.len_utf8(); + } + + let prefix = None; + if message[i..].starts_with(':') { + i += ':'.len_utf8(); + let start = i; + + i += message[i..].find(' ').unwrap_or_else(|| message_end - i); + let end = i; + + prefix = Some(Part::new(start, end)); + } + + while message[i..].starts_with(' ') { + i += ' '.len_utf8(); + } + + let command = { + let start = i; + + i += message[i..].find(' ').unwrap_or_else(|| message_end - i); + let end = i; + + Part::new(start, end) + }; + + while message[i..].starts_with(' ') { + i += ' '.len_utf8(); + } + + let mut args = [Part::new(0, 0); MAX_ARGS]; + let mut args_len = 0; + let mut suffix = None; + + while i < message_end { + if message[i..].starts_with(':') { + i += ':'.len_utf8(); + let start = i; + + i = message_end; + let end = i; + + suffix = Some(Part::new(start, end)); + break; + } + + if args_len as usize >= MAX_ARGS { + // Arguments cannot exceed MAX_ARGS. + return unimplemented!(); + } + + let start = i; + + i += message[i..].find(' ').unwrap_or_else(|| message_end - i); + let end = i; + + args[args_len as usize] = Part::new(start, end); + args_len += 1; + + while message[i..].starts_with(' ') { + i += ' '.len_utf8(); + } + } + Ok(Message { - tags: tags, - prefix: prefix.map(|p| p.into()), - command: Command::new(command, args, suffix)?, + buf: message, + tags, + prefix, + command, + args, + args_len, + suffix, }) } - /// Gets the nickname of the message source, if it exists. - /// - /// # Example - /// ``` - /// # extern crate irc_proto; - /// # use irc_proto::Message; - /// # fn main() { - /// let message = Message::new( - /// Some("nickname!username@hostname"), "JOIN", vec!["#channel"], None - /// ).unwrap(); - /// assert_eq!(message.source_nickname(), Some("nickname")); - /// # } - /// ``` - pub fn source_nickname(&self) -> Option<&str> { - // ::= | [ '!' ] [ '@' ] - // ::= - self.prefix.as_ref().and_then(|p| match p { - Prefix::Nickname(name, _, _) => Some(&name[..]), - _ => None - }) + pub fn as_str(&self) -> &str { + &self.buf } - /// Gets the likely intended place to respond to this message. - /// If the type of the message is a `PRIVMSG` or `NOTICE` and the message is sent to a channel, - /// the result will be that channel. In all other cases, this will call `source_nickname`. - /// - /// # Example - /// ``` - /// # extern crate irc_proto; - /// # use irc_proto::Message; - /// # fn main() { - /// let msg1 = Message::new( - /// Some("ada"), "PRIVMSG", vec!["#channel"], Some("Hi, everyone!") - /// ).unwrap(); - /// assert_eq!(msg1.response_target(), Some("#channel")); - /// let msg2 = Message::new( - /// Some("ada"), "PRIVMSG", vec!["betsy"], Some("betsy: hi") - /// ).unwrap(); - /// assert_eq!(msg2.response_target(), Some("ada")); - /// # } - /// ``` - pub fn response_target(&self) -> Option<&str> { - match self.command { - Command::PRIVMSG(ref target, _) if target.is_channel_name() => Some(target), - Command::NOTICE(ref target, _) if target.is_channel_name() => Some(target), - _ => self.source_nickname() - } + pub fn into_string(self) -> String { + self.buf } - /// Converts a Message into a String according to the IRC protocol. - /// - /// # Example - /// ``` - /// # extern crate irc_proto; - /// # use irc_proto::Message; - /// # fn main() { - /// let msg = Message::new( - /// Some("ada"), "PRIVMSG", vec!["#channel"], Some("Hi, everyone!") - /// ).unwrap(); - /// assert_eq!(msg.to_string(), ":ada PRIVMSG #channel :Hi, everyone!\r\n"); - /// # } - /// ``` - pub fn to_string(&self) -> String { - let mut ret = String::new(); - if let Some(ref tags) = self.tags { - ret.push('@'); - for tag in tags { - ret.push_str(&tag.0); - if let Some(ref value) = tag.1 { - ret.push('='); - ret.push_str(value); - } - ret.push(';'); - } - ret.pop(); - ret.push(' '); + pub fn tags(&self) -> Tags { + Tags { + buf: self.tags.as_ref().map(|part| part.index(&self.buf)).unwrap_or(""), } - if let Some(ref prefix) = self.prefix { - write!(ret, ":{} ", prefix).unwrap(); - } - let cmd: String = From::from(&self.command); - ret.push_str(&cmd); - ret.push_str("\r\n"); - ret } -} -impl From for Message { - fn from(cmd: Command) -> Message { - Message { - tags: None, - prefix: None, - command: cmd, - } + pub fn prefix(&self) -> Option<&str> { + self.prefix.as_ref().map(|part| part.index(&self.buf)) } -} -impl FromStr for Message { - type Err = ProtocolError; - - fn from_str(s: &str) -> Result { - if s.is_empty() { - return Err(ProtocolError::InvalidMessage { - string: s.to_owned(), - cause: MessageParseError::EmptyMessage, - }) - } + pub fn command(&self) -> &str { + self.command.index(&self.buf) + } - let mut state = s; - - let tags = if state.starts_with('@') { - let tags = state.find(' ').map(|i| &state[1..i]); - state = state.find(' ').map_or("", |i| &state[i + 1..]); - tags.map(|ts| { - ts.split(';') - .filter(|s| !s.is_empty()) - .map(|s: &str| { - let mut iter = s.splitn(2, '='); - let (fst, snd) = (iter.next(), iter.next()); - Tag(fst.unwrap_or("").to_owned(), snd.map(|s| s.to_owned())) - }) - .collect::>() - }) + pub fn arg(&self, arg: usize) -> Option<&str> { + if arg < self.args_len as usize { + Some(self.args[arg].index(&self.buf)) } else { None - }; + } + } - let prefix = if state.starts_with(':') { - let prefix = state.find(' ').map(|i| &state[1..i]); - state = state.find(' ').map_or("", |i| &state[i + 1..]); - prefix - } else { - None - }; + pub fn args(&self) -> Args { + Args { + buf: &self.buf, + args: self.args.iter().take(self.args_len as usize), + } + } - let line_ending_len = if state.ends_with("\r\n") { - "\r\n" - } else if state.ends_with('\r') { - "\r" - } else if state.ends_with('\n') { - "\n" - } else { - "" - }.len(); + pub fn suffix(&self) -> Option<&str> { + self.suffix.as_ref().map(|part| part.index(&self.buf)) + } +} - let suffix = if state.contains(" :") { - let suffix = state.find(" :").map(|i| &state[i + 2..state.len() - line_ending_len]); - state = state.find(" :").map_or("", |i| &state[..i + 1]); - suffix - } else { - state = &state[..state.len() - line_ending_len]; - None - }; +impl FromStr for Message { + type Err = MessageParseError; - let command = match state.find(' ').map(|i| &state[..i]) { - Some(cmd) => { - state = state.find(' ').map_or("", |i| &state[i + 1..]); - cmd - } - // If there's no arguments but the "command" starts with colon, it's not a command. - None if state.starts_with(':') => return Err(ProtocolError::InvalidMessage { - string: s.to_owned(), - cause: MessageParseError::InvalidCommand, - }), - // If there's no arguments following the command, the rest of the state is the command. - None => { - let cmd = state; - state = ""; - cmd - }, - }; + fn from_str(s: &str) -> Result { + Message::parse(s) + } +} + +impl fmt::Display for Message { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(&self.buf) + } +} + +pub struct Tags<'a> { + buf: &'a str, +} - let args: Vec<_> = state.splitn(14, ' ').filter(|s| !s.is_empty()).collect(); +impl<'a> Iterator for Tags<'a> { + type Item = (&'a str, Option>); - Message::with_tags(tags, prefix, command, args, suffix).map_err(|e| { - ProtocolError::InvalidMessage { - string: s.to_owned(), - cause: e, + fn next(&mut self) -> Option { + if self.buf.len() == 0 { + None + } else { + let tag = self.buf + .char_indices() + .find(|&(_i, c)| c == ';') + .map(|(i, _c)| &self.buf[..i]) + .unwrap_or(&self.buf); + self.buf = &self.buf[tag.len()..]; + + if let Some(key_end) = tag.find('=') { + let key = &tag[..key_end]; + let raw_value = &tag[key_end + '='.len_utf8()..]; + + let value = String::new(); + while let Some(escape_idx) = raw_value.find('\\') { + value.push_str(&raw_value[..escape_idx]); + let c = match raw_value[escape_idx + '\\'.len_utf8()..].chars().next() { + Some(':') => Some(';'), + Some('s') => Some(' '), + Some('\\') => Some('\\'), + Some('r') => Some('\r'), + Some('n') => Some('\n'), + Some(c) => Some(c), + None => None, + }; + if let Some(c) = c { + value.push(c); + } + raw_value = &raw_value[ + (escape_idx + + '\\'.len_utf8() + + c.map(char::len_utf8).unwrap_or(0) + ).. + ]; + } + if value.len() == 0 { + Some((key, Some(Cow::Borrowed(raw_value)))) + } else { + value.push_str(raw_value); + Some((key, Some(Cow::Owned(value)))) + } + } else { + Some((tag, None)) } - }) + } } } -impl<'a> From<&'a str> for Message { - fn from(s: &'a str) -> Message { - s.parse().unwrap() - } +pub struct Args<'a> { + buf: &'a str, + args: std::iter::Take>, } -impl Display for Message { - fn fmt(&self, f: &mut Formatter) -> FmtResult { - write!(f, "{}", self.to_string()) +impl<'a> Iterator for Args<'a> { + type Item = &'a str; + + fn next(&mut self) -> Option { + self.args.next().map(|part| part.index(self.buf)) } } -/// A message tag as defined by [IRCv3.2](http://ircv3.net/specs/core/message-tags-3.2.html). -/// It consists of a tag key, and an optional value for the tag. Each message can contain a number -/// of tags (in the string format, they are separated by semicolons). Tags are used to add extended -/// information to a message under IRCv3. -#[derive(Clone, PartialEq, Debug)] -pub struct Tag(pub String, pub Option); - #[cfg(test)] mod test { use super::{Message, Tag}; use command::Command::{PRIVMSG, QUIT, Raw}; + // Legacy tests + // TODO: Adapt to new message/command API + #[test] + #[ignore] fn new() { let message = Message { tags: None, @@ -288,6 +311,7 @@ mod test { } #[test] + #[ignore] fn source_nickname() { assert_eq!( Message::new(None, "PING", vec![], Some("data")) @@ -347,6 +371,7 @@ mod test { } #[test] + #[ignore] fn to_string() { let message = Message { tags: None, @@ -366,6 +391,7 @@ mod test { } #[test] + #[ignore] fn from_string() { let message = Message { tags: None, @@ -406,6 +432,7 @@ mod test { } #[test] + #[ignore] fn from_string_atypical_endings() { let message = Message { tags: None, @@ -427,6 +454,7 @@ mod test { } #[test] + #[ignore] fn from_and_to_string() { let message = "@aaa=bbb;ccc;example.com/ddd=eee :test!test@test PRIVMSG test :Testing with \ tags!\r\n"; @@ -434,6 +462,7 @@ mod test { } #[test] + #[ignore] fn to_message() { let message = Message { tags: None, @@ -452,6 +481,7 @@ mod test { } #[test] + #[ignore] fn to_message_with_colon_in_arg() { // Apparently, UnrealIRCd (and perhaps some others) send some messages that include // colons within individual parameters. So, let's make sure it parses correctly. @@ -469,6 +499,7 @@ mod test { } #[test] + #[ignore] fn to_message_no_prefix_no_args() { let message = Message { tags: None, @@ -480,6 +511,7 @@ mod test { } #[test] + #[ignore] #[should_panic] fn to_message_invalid_format() { let _: Message = ":invalid :message".into(); From cd1b382f512017bdafd634edc0ec9e4430e6eb98 Mon Sep 17 00:00:00 2001 From: Adam Gausmann Date: Fri, 7 Jun 2019 03:27:52 -0500 Subject: [PATCH 02/14] Compile error fixes for proto --- irc-proto/src/message.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/irc-proto/src/message.rs b/irc-proto/src/message.rs index 96f20a47..a9212caa 100644 --- a/irc-proto/src/message.rs +++ b/irc-proto/src/message.rs @@ -74,7 +74,7 @@ impl Message { let message_end = message.len() - '\n'.len_utf8() - '\r'.len_utf8(); let mut i = 0; - let tags = None; + let mut tags = None; if message[i..].starts_with('@') { i += '@'.len_utf8(); let start = i; @@ -89,7 +89,7 @@ impl Message { i += ' '.len_utf8(); } - let prefix = None; + let mut prefix = None; if message[i..].starts_with(':') { i += ':'.len_utf8(); let start = i; @@ -205,10 +205,11 @@ impl Message { } impl FromStr for Message { - type Err = MessageParseError; + type Err = ProtocolError; - fn from_str(s: &str) -> Result { + fn from_str(s: &str) -> Result { Message::parse(s) + .map_err(|err| ProtocolError::InvalidMessage { string: s.to_string(), cause: err }) } } @@ -238,9 +239,9 @@ impl<'a> Iterator for Tags<'a> { if let Some(key_end) = tag.find('=') { let key = &tag[..key_end]; - let raw_value = &tag[key_end + '='.len_utf8()..]; + let mut raw_value = &tag[key_end + '='.len_utf8()..]; - let value = String::new(); + let mut value = String::new(); while let Some(escape_idx) = raw_value.find('\\') { value.push_str(&raw_value[..escape_idx]); let c = match raw_value[escape_idx + '\\'.len_utf8()..].chars().next() { From f3298b232c5ed6719e5692469d8e7fbb19017361 Mon Sep 17 00:00:00 2001 From: Adam Gausmann Date: Fri, 7 Jun 2019 14:54:17 -0500 Subject: [PATCH 03/14] Comment legacy tests, add doctests, bugfixes --- irc-proto/src/message.rs | 237 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 227 insertions(+), 10 deletions(-) diff --git a/irc-proto/src/message.rs b/irc-proto/src/message.rs index a9212caa..7cbeb4b8 100644 --- a/irc-proto/src/message.rs +++ b/irc-proto/src/message.rs @@ -36,13 +36,15 @@ impl From for Message { } } +/// The maximum number of bytes allowed in a message, currently set to `u16::max_value()`, though +/// the IRC specification is stricter than this. +pub const MAX_BYTES: usize = u16::max_value() as usize; + +/// The maximum number of arguments supported by the message parser, currently set to 15 as it is +/// the maximum according to the IRC specification. pub const MAX_ARGS: usize = 15; -/// A data structure representing an IRC message according to the protocol specification. It -/// consists of a collection of IRCv3 tags, a prefix (describing the source of the message), and -/// the protocol command. If the command is unknown, it is treated as a special raw command that -/// consists of a collection of arguments and the special suffix argument. Otherwise, the command -/// is parsed into a more useful form as described in [Command](../command/enum.Command.html). +/// A parsed IRC message, containing a buffer with pointers to the individual parts. #[derive(Clone, PartialEq, Debug)] pub struct Message { buf: String, @@ -55,6 +57,38 @@ pub struct Message { } impl Message { + /// Parses the message, converting the given object into an owned string. + /// + /// This will perform an additional allocation if a `String` is passed. To avoid this and + /// transfer ownership instead, use the [`parse_string`] method. + /// + /// # Error + /// + /// This method will fail in the following conditions: + /// + /// - The message length is longer than the maximum supported number of bytes ([`MAX_BYTES`]). + /// - The message contains more than the maximum supported number of arguments ([`MAX_ARGS`]). + /// - The message is missing required components such as the trailing CRLF or the command. + /// + /// Note that it does not check whether the parts of the message have illegal forms, as + /// there is little benefit to restricting that. It will also not perform any allocations + /// except for the initial buffer; any unbounded dynamically-sized components like tags are + /// parsed through iterators instead (see the [`tags`] method). + /// + /// # Examples + /// + /// ``` + /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { + /// use irc_proto::Message; + /// + /// let message = Message::parse("PRIVMSG #rust :Hello Rustaceans!\r\n")?; + /// # Ok(()) + /// # } + /// ``` + /// + /// [`parse_string`]: #method.parse_string + /// [`MAX_BYTES`]: ./constant.MAX_BYTES.html + /// [`MAX_ARGS`]: ./constant.MAX_ARGS.html pub fn parse(message: S) -> Result where S: ToString, @@ -62,9 +96,25 @@ impl Message { Message::parse_string(message.to_string()) } + /// Takes ownership of the given string and parses it into a message. + /// + /// For more information about the details of the parser, see the [`parse`] method. + /// + /// [`parse`] #method.parse + /// + /// # Examples + /// + /// ``` + /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { + /// use irc_proto::Message; + /// + /// let message = Message::parse_string("NICK ferris\r\n".to_string())?; + /// # Ok(()) + /// # } + /// ``` pub fn parse_string(message: String) -> Result { - if message.len() <= u16::max_value() as usize { - // Message must not exceed 64K (8.5k under normal circumstances) + if message.len() > MAX_BYTES { + // Message must not exceed our pointer size (u16). return unimplemented!(); } if !message.ends_with("\r\n") { @@ -162,28 +212,125 @@ impl Message { }) } + /// Borrows from the string slice containing the serialized message. + /// + /// # Examples + /// + /// ``` + /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { + /// use irc_proto::Message; + /// + /// let raw_message = "JOIN #rust\r\n"; + /// let parsed_message = Message::parse(raw_message)?; + /// assert_eq!(parsed_message.as_str(), raw_message); + /// # Ok(()) + /// # } pub fn as_str(&self) -> &str { &self.buf } + /// Consumes this message, producing the string containing the serialized message. + /// + /// # Examples + /// + /// ``` + /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { + /// use irc_proto::Message; + /// + /// let raw_message = "JOIN #rust\r\n"; + /// let parsed_message = Message::parse(raw_message)?; + /// assert_eq!(parsed_message.into_string(), raw_message); + /// # Ok(()) + /// # } pub fn into_string(self) -> String { self.buf } + /// Produces a parser iterator over the message's tags. The iterator will produce items of + /// `(&str, Option>)` for each tag in order, containing the tag's key and its value if + /// one exists for that key. It is entirely zero copy except when the value contains escape + /// sequences, in which case the unescaped value will be produced and stored in an owned + /// buffer. + /// + /// This parser will not dedupe tags, nor will it check whether the tag's key is empty or + /// whether it contains illegal characters. + /// + /// # Examples + /// + /// ``` + /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { + /// use irc_proto::Message; + /// use std::borrow::Cow; + /// + /// let message = Message::parse( + /// "@aaa=bbb;ccc;example.com/ddd=eee :nick!ident@host.com PRIVMSG me :Hello\r\n" + /// )?; + /// + /// let mut tags = message.tags(); + /// assert_eq!(tags.len(), 3); + /// + /// assert_eq!(tags.next(), Some(("aaa", Some(Cow::Borrowed("bbb"))))); + /// assert_eq!(tags.next(), Some(("ccc", None))); + /// assert_eq!(tags.next(), Some(("example.com/ddd", Some(Cow::Borrowed("eee"))))); + /// assert_eq!(tags.next(), None); + /// # Ok(()) + /// # } + /// ``` pub fn tags(&self) -> Tags { Tags { buf: self.tags.as_ref().map(|part| part.index(&self.buf)).unwrap_or(""), } } + /// Returns a string slice containing the message's prefix, if it exists. + /// + /// # Examples + /// + /// ``` + /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { + /// use irc_proto::Message; + /// + /// let message = Message::parse(":nick!ident@host.com PRIVMSG me :Hello\r\n")?; + /// assert_eq!(message.prefix(), Some("nick!ident@host.com")); + /// # Ok(()) + /// # } + /// ``` pub fn prefix(&self) -> Option<&str> { self.prefix.as_ref().map(|part| part.index(&self.buf)) } + /// Returns a string slice containing the message's command. + /// + /// # Examples + /// + /// ``` + /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { + /// use irc_proto::Message; + /// + /// let message = Message::parse("NICK ferris\r\n")?; + /// assert_eq!(message.command(), "NICK"); + /// # Ok(()) + /// # } + /// ``` pub fn command(&self) -> &str { self.command.index(&self.buf) } + /// Returns a string slice containing the value of the given argument (indexed starting from + /// 0), if it exists. + /// + /// # Examples + /// + /// ``` + /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { + /// use irc_proto::Message; + /// + /// let message = Message::parse("PRIVMSG #rust :Hello Rustaceans!\r\n")?; + /// assert_eq!(message.arg(0), Some("#rust")); + /// assert_eq!(message.arg(1), None); + /// # Ok(()) + /// # } + /// ``` pub fn arg(&self, arg: usize) -> Option<&str> { if arg < self.args_len as usize { Some(self.args[arg].index(&self.buf)) @@ -192,6 +339,25 @@ impl Message { } } + /// Returns an iterator over the message's arguments, yielding `&str` string slices containing + /// each argument in order. + /// + /// # Examples + /// + /// ``` + /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { + /// use irc_proto::Message; + /// + /// let message = Message::parse("USER guest tolmoon tolsun :Ronnie Reagan\r\n")?; + /// let mut args = message.args(); + /// assert_eq!(args.len(), 3); + /// assert_eq!(args.next(), Some("guest")); + /// assert_eq!(args.next(), Some("tolmoon")); + /// assert_eq!(args.next(), Some("tolsun")); + /// assert_eq!(args.next(), None); + /// # Ok(()) + /// # } + /// ``` pub fn args(&self) -> Args { Args { buf: &self.buf, @@ -199,6 +365,19 @@ impl Message { } } + /// Returns a string slice containing the message's suffix, if it exists. + /// + /// # Examples + /// + /// ``` + /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { + /// use irc_proto::Message; + /// + /// let message = Message::parse("USER guest tolmoon tolsun :Ronnie Reagan\r\n")?; + /// assert_eq!(message.suffix(), Some("Ronnie Reagan")); + /// # Ok(()) + /// # } + /// ``` pub fn suffix(&self) -> Option<&str> { self.suffix.as_ref().map(|part| part.index(&self.buf)) } @@ -213,12 +392,21 @@ impl FromStr for Message { } } +impl AsRef for Message { + fn as_ref(&self) -> &str { + self.as_str() + } +} + impl fmt::Display for Message { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.write_str(&self.buf) } } +/// A parser iterator over a message's tags. See [`Message::tags`] for more information. +/// +/// [`Message::tags`]: ./struct.Message.html#method.tags pub struct Tags<'a> { buf: &'a str, } @@ -235,7 +423,12 @@ impl<'a> Iterator for Tags<'a> { .find(|&(_i, c)| c == ';') .map(|(i, _c)| &self.buf[..i]) .unwrap_or(&self.buf); - self.buf = &self.buf[tag.len()..]; + + if self.buf.len() == tag.len() { + self.buf = ""; + } else { + self.buf = &self.buf[tag.len() + ';'.len_utf8()..]; + } if let Some(key_end) = tag.find('=') { let key = &tag[..key_end]; @@ -276,6 +469,19 @@ impl<'a> Iterator for Tags<'a> { } } +impl<'a> ExactSizeIterator for Tags<'a> { + fn len(&self) -> usize { + if self.buf.len() == 0 { + 0 + } else { + self.buf.chars().filter(|&c| c == ';').count() + 1 + } + } +} + +/// An iterator over a message's tags. See [`Message::args`] for more information. +/// +/// [`Message::args`]: ./struct.Message.html#method.args pub struct Args<'a> { buf: &'a str, args: std::iter::Take>, @@ -289,14 +495,24 @@ impl<'a> Iterator for Args<'a> { } } +impl<'a> ExactSizeIterator for Args<'a> { + fn len(&self) -> usize { + self.args.len() + } +} + #[cfg(test)] mod test { - use super::{Message, Tag}; - use command::Command::{PRIVMSG, QUIT, Raw}; + + // Legacy tests // TODO: Adapt to new message/command API + /* + use super::{Message, Tag}; + use command::Command::{PRIVMSG, QUIT, Raw}; + #[test] #[ignore] fn new() { @@ -517,4 +733,5 @@ mod test { fn to_message_invalid_format() { let _: Message = ":invalid :message".into(); } + */ } From 833306b1f3266c5aad8196aab7aa93e6e5e317e9 Mon Sep 17 00:00:00 2001 From: Adam Gausmann Date: Fri, 7 Jun 2019 15:01:18 -0500 Subject: [PATCH 04/14] Add error variants for new parser --- irc-proto/src/error.rs | 18 +++++++++++++++++- irc-proto/src/message.rs | 13 +++++++------ 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/irc-proto/src/error.rs b/irc-proto/src/error.rs index 3d3bd12f..48b31ac4 100644 --- a/irc-proto/src/error.rs +++ b/irc-proto/src/error.rs @@ -57,7 +57,23 @@ pub enum MessageParseError { cmd: &'static str, /// The invalid subcommand. sub: String, - } + }, + + /// The message exceeded the maximum allowable length. + #[fail(display = "message exceeded the maximum length")] + MaxLengthExceeded, + + /// The message exceeded the maximum allowable arguments. + #[fail(display = "message exceeded the maximum number of arguments")] + MaxArgsExceeded, + + /// The message did not contain a trailing CRLF. + #[fail(display = "message does not contain a trailing CRLF")] + MissingCrLf, + + /// The message did not contain a command. + #[fail(display = "message does not contain a command")] + MissingCommand, } /// Errors that occur while parsing mode strings. diff --git a/irc-proto/src/message.rs b/irc-proto/src/message.rs index 7cbeb4b8..30ceee28 100644 --- a/irc-proto/src/message.rs +++ b/irc-proto/src/message.rs @@ -114,12 +114,10 @@ impl Message { /// ``` pub fn parse_string(message: String) -> Result { if message.len() > MAX_BYTES { - // Message must not exceed our pointer size (u16). - return unimplemented!(); + return Err(MessageParseError::MaxLengthExceeded); } if !message.ends_with("\r\n") { - // Message must end with CRLF - return unimplemented!(); + return Err(MessageParseError::MissingCrLf); } let message_end = message.len() - '\n'.len_utf8() - '\r'.len_utf8(); let mut i = 0; @@ -163,6 +161,10 @@ impl Message { Part::new(start, end) }; + if command.start == command.end { + return Err(MessageParseError::MissingCommand); + } + while message[i..].starts_with(' ') { i += ' '.len_utf8(); } @@ -184,8 +186,7 @@ impl Message { } if args_len as usize >= MAX_ARGS { - // Arguments cannot exceed MAX_ARGS. - return unimplemented!(); + return Err(MessageParseError::MaxArgsExceeded); } let start = i; From f648c6c92aef79582377a84e57aaf2036ceb7463 Mon Sep 17 00:00:00 2001 From: Adam Gausmann Date: Fri, 7 Jun 2019 16:51:10 -0500 Subject: [PATCH 05/14] Add comments to parsing logic, and combine suffix with params --- irc-proto/src/message.rs | 195 +++++++++++++++++++++++---------------- 1 file changed, 113 insertions(+), 82 deletions(-) diff --git a/irc-proto/src/message.rs b/irc-proto/src/message.rs index 30ceee28..887c763d 100644 --- a/irc-proto/src/message.rs +++ b/irc-proto/src/message.rs @@ -42,7 +42,7 @@ pub const MAX_BYTES: usize = u16::max_value() as usize; /// The maximum number of arguments supported by the message parser, currently set to 15 as it is /// the maximum according to the IRC specification. -pub const MAX_ARGS: usize = 15; +pub const MAX_PARAMS: usize = 15; /// A parsed IRC message, containing a buffer with pointers to the individual parts. #[derive(Clone, PartialEq, Debug)] @@ -51,9 +51,8 @@ pub struct Message { tags: Option, prefix: Option, command: Part, - args: [Part; MAX_ARGS], - args_len: u8, - suffix: Option, + params: [Part; MAX_PARAMS], + params_len: u8, } impl Message { @@ -67,7 +66,7 @@ impl Message { /// This method will fail in the following conditions: /// /// - The message length is longer than the maximum supported number of bytes ([`MAX_BYTES`]). - /// - The message contains more than the maximum supported number of arguments ([`MAX_ARGS`]). + /// - The message contains more than the maximum supported number of arguments ([`MAX_PARAMS`]). /// - The message is missing required components such as the trailing CRLF or the command. /// /// Note that it does not check whether the parts of the message have illegal forms, as @@ -88,7 +87,7 @@ impl Message { /// /// [`parse_string`]: #method.parse_string /// [`MAX_BYTES`]: ./constant.MAX_BYTES.html - /// [`MAX_ARGS`]: ./constant.MAX_ARGS.html + /// [`MAX_PARAMS`]: ./constant.MAX_PARAMS.html pub fn parse(message: S) -> Result where S: ToString, @@ -113,103 +112,128 @@ impl Message { /// # } /// ``` pub fn parse_string(message: String) -> Result { + // To make sure pointers don't overflow: if message.len() > MAX_BYTES { return Err(MessageParseError::MaxLengthExceeded); } + + // Make sure the message is terminated with line endings: if !message.ends_with("\r\n") { return Err(MessageParseError::MissingCrLf); } - let message_end = message.len() - '\n'.len_utf8() - '\r'.len_utf8(); + // Used as the end of the "useful" part of the message. + let crlf = message.len() - '\n'.len_utf8() - '\r'.len_utf8(); + + // Accumulating pointer used to keep track of how much has already been parsed. let mut i = 0; - let mut tags = None; + // If word starts with '@', it is a tag. + let tags; if message[i..].starts_with('@') { + // Take everything between '@' and next space. i += '@'.len_utf8(); let start = i; - i += message[i..].find(' ').unwrap_or_else(|| message_end - i); + i += message[i..].find(' ').unwrap_or_else(|| crlf - i); let end = i; tags = Some(Part::new(start, end)); + } else { + tags = None; } + // Skip to next non-space. while message[i..].starts_with(' ') { i += ' '.len_utf8(); } - let mut prefix = None; + // If word starts with ':', it is a prefix. + let prefix; if message[i..].starts_with(':') { + // Take everything between ':' and next space. i += ':'.len_utf8(); let start = i; - i += message[i..].find(' ').unwrap_or_else(|| message_end - i); + i += message[i..].find(' ').unwrap_or_else(|| crlf - i); let end = i; prefix = Some(Part::new(start, end)); + } else { + prefix = None; } + // Skip to next non-space. while message[i..].starts_with(' ') { i += ' '.len_utf8(); } + // Next word must be command. let command = { + // Take everything between here and next space. let start = i; - i += message[i..].find(' ').unwrap_or_else(|| message_end - i); + i += message[i..].find(' ').unwrap_or_else(|| crlf - i); let end = i; Part::new(start, end) }; + // Command must not be empty. if command.start == command.end { return Err(MessageParseError::MissingCommand); } + // Skip to next non-space. while message[i..].starts_with(' ') { i += ' '.len_utf8(); } - let mut args = [Part::new(0, 0); MAX_ARGS]; - let mut args_len = 0; - let mut suffix = None; + // Everything from here to crlf must be parameters. + let mut params = [Part::new(0, 0); MAX_PARAMS]; + let mut params_len = 0; - while i < message_end { - if message[i..].starts_with(':') { - i += ':'.len_utf8(); - let start = i; - - i = message_end; - let end = i; - - suffix = Some(Part::new(start, end)); - break; - } - - if args_len as usize >= MAX_ARGS { + while i < crlf { + // Make sure we don't overrun the maximum parameter count. + if params_len as usize >= MAX_PARAMS { return Err(MessageParseError::MaxArgsExceeded); } - let start = i; + // If parameter begins with ':', it is trailing. + let start; + let end; + if message[i..].starts_with(':') { + // Take everything between ':' and crlf. + i += ':'.len_utf8(); + start = i; - i += message[i..].find(' ').unwrap_or_else(|| message_end - i); - let end = i; + i = crlf; + end = i; + } else { + // Take everything from here to next space. + start = i; - args[args_len as usize] = Part::new(start, end); - args_len += 1; + i += message[i..].find(' ').unwrap_or_else(|| crlf - i); + end = i; - while message[i..].starts_with(' ') { - i += ' '.len_utf8(); + // Skip to next non-space. + while message[i..].starts_with(' ') { + i += ' '.len_utf8(); + } } + + // Add to parameters. + params[params_len as usize] = Part::new(start, end); + params_len += 1; } + // Done parsing. Ok(Message { buf: message, tags, prefix, command, - args, - args_len, - suffix, + params, + params_len, }) } @@ -279,7 +303,7 @@ impl Message { /// ``` pub fn tags(&self) -> Tags { Tags { - buf: self.tags.as_ref().map(|part| part.index(&self.buf)).unwrap_or(""), + remaining: self.tags.as_ref().map(|part| part.index(&self.buf)).unwrap_or(""), } } @@ -328,13 +352,14 @@ impl Message { /// /// let message = Message::parse("PRIVMSG #rust :Hello Rustaceans!\r\n")?; /// assert_eq!(message.arg(0), Some("#rust")); - /// assert_eq!(message.arg(1), None); + /// assert_eq!(message.arg(1), Some("Hello Rustaceans!")); + /// assert_eq!(message.arg(2), None); /// # Ok(()) /// # } /// ``` pub fn arg(&self, arg: usize) -> Option<&str> { - if arg < self.args_len as usize { - Some(self.args[arg].index(&self.buf)) + if arg < self.params_len as usize { + Some(self.params[arg].index(&self.buf)) } else { None } @@ -350,38 +375,22 @@ impl Message { /// use irc_proto::Message; /// /// let message = Message::parse("USER guest tolmoon tolsun :Ronnie Reagan\r\n")?; - /// let mut args = message.args(); - /// assert_eq!(args.len(), 3); - /// assert_eq!(args.next(), Some("guest")); - /// assert_eq!(args.next(), Some("tolmoon")); - /// assert_eq!(args.next(), Some("tolsun")); - /// assert_eq!(args.next(), None); + /// let mut params = message.params(); + /// assert_eq!(params.len(), 4); + /// assert_eq!(params.next(), Some("guest")); + /// assert_eq!(params.next(), Some("tolmoon")); + /// assert_eq!(params.next(), Some("tolsun")); + /// assert_eq!(params.next(), Some("Ronnie Reagan")); + /// assert_eq!(params.next(), None); /// # Ok(()) /// # } /// ``` - pub fn args(&self) -> Args { + pub fn params(&self) -> Args { Args { buf: &self.buf, - args: self.args.iter().take(self.args_len as usize), + params: self.params.iter().take(self.params_len as usize), } } - - /// Returns a string slice containing the message's suffix, if it exists. - /// - /// # Examples - /// - /// ``` - /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { - /// use irc_proto::Message; - /// - /// let message = Message::parse("USER guest tolmoon tolsun :Ronnie Reagan\r\n")?; - /// assert_eq!(message.suffix(), Some("Ronnie Reagan")); - /// # Ok(()) - /// # } - /// ``` - pub fn suffix(&self) -> Option<&str> { - self.suffix.as_ref().map(|part| part.index(&self.buf)) - } } impl FromStr for Message { @@ -409,35 +418,46 @@ impl fmt::Display for Message { /// /// [`Message::tags`]: ./struct.Message.html#method.tags pub struct Tags<'a> { - buf: &'a str, + remaining: &'a str, } impl<'a> Iterator for Tags<'a> { type Item = (&'a str, Option>); fn next(&mut self) -> Option { - if self.buf.len() == 0 { + // If remaining is empty, nothing is left to yield. + if self.remaining.len() == 0 { None } else { - let tag = self.buf + // Take everything from here to next ';'. + let tag = self.remaining .char_indices() .find(|&(_i, c)| c == ';') - .map(|(i, _c)| &self.buf[..i]) - .unwrap_or(&self.buf); + .map(|(i, _c)| &self.remaining[..i]) + .unwrap_or(&self.remaining); - if self.buf.len() == tag.len() { - self.buf = ""; + // Remove taken data from the remaining buffer. + if self.remaining.len() == tag.len() { + self.remaining = ""; } else { - self.buf = &self.buf[tag.len() + ';'.len_utf8()..]; + self.remaining = &self.remaining[tag.len() + ';'.len_utf8()..]; } + // If an equal sign exists in the tag data, it must have an associated value. if let Some(key_end) = tag.find('=') { + // Everything before the first equal sign is the key. let key = &tag[..key_end]; + + // Everything after the first equal sign is the value. let mut raw_value = &tag[key_end + '='.len_utf8()..]; + // Resolve escape sequences if any are found. + // This will not allocate unless data is given to it. let mut value = String::new(); while let Some(escape_idx) = raw_value.find('\\') { + // Copy everything before this escape sequence. value.push_str(&raw_value[..escape_idx]); + // Resolve this escape sequence. let c = match raw_value[escape_idx + '\\'.len_utf8()..].chars().next() { Some(':') => Some(';'), Some('s') => Some(' '), @@ -447,9 +467,14 @@ impl<'a> Iterator for Tags<'a> { Some(c) => Some(c), None => None, }; + // If it resolves to a character, then push it. if let Some(c) = c { value.push(c); } + // Cut off the beginning of raw_value such that it only contains + // everything after the parsed escape sequence. + // Upon looping, it will start searching from this point, skipping the last + // escape sequence. raw_value = &raw_value[ (escape_idx + '\\'.len_utf8() @@ -457,9 +482,13 @@ impl<'a> Iterator for Tags<'a> { ).. ]; } + + // If we didn't add data, no escape sequences exist and the raw value can be + // referenced. if value.len() == 0 { Some((key, Some(Cow::Borrowed(raw_value)))) } else { + // Make sure you add the rest of the raw value that doesn't contain escapes. value.push_str(raw_value); Some((key, Some(Cow::Owned(value)))) } @@ -472,33 +501,35 @@ impl<'a> Iterator for Tags<'a> { impl<'a> ExactSizeIterator for Tags<'a> { fn len(&self) -> usize { - if self.buf.len() == 0 { + // Number of arguments yielded is number of remaining semicolons plus one, unless the + // remaining buffer is empty. + if self.remaining.len() == 0 { 0 } else { - self.buf.chars().filter(|&c| c == ';').count() + 1 + self.remaining.chars().filter(|&c| c == ';').count() + 1 } } } -/// An iterator over a message's tags. See [`Message::args`] for more information. +/// An iterator over a message's tags. See [`Message::params`] for more information. /// -/// [`Message::args`]: ./struct.Message.html#method.args +/// [`Message::params`]: ./struct.Message.html#method.params pub struct Args<'a> { buf: &'a str, - args: std::iter::Take>, + params: std::iter::Take>, } impl<'a> Iterator for Args<'a> { type Item = &'a str; fn next(&mut self) -> Option { - self.args.next().map(|part| part.index(self.buf)) + self.params.next().map(|part| part.index(self.buf)) } } impl<'a> ExactSizeIterator for Args<'a> { fn len(&self) -> usize { - self.args.len() + self.params.len() } } From 37b902d34f0d9558a0332bb88262591ef5e69553 Mon Sep 17 00:00:00 2001 From: Adam Gausmann Date: Sat, 8 Jun 2019 01:49:59 -0500 Subject: [PATCH 06/14] Reworded documentation and modified parameter parsing. - Made some parts of the documentation less ambiguous about borrows. - Updated parser documentation to reflect new parameters. - Removed the 15-parameter restriction, replacing it with a parser-iterator (even though servers/clients SHOULD NOT send more than 15, they MUST be able to handle such messages). - Combined suffix/trailing parameter with middle parameters, as it MUST be treated the same as if it were a final parameter ("PRIVMSG #foo :Hello" and "PRIVMSG #foo Hello" are semantically equivalent). --- irc-proto/src/message.rs | 189 +++++++++++++++++++++------------------ 1 file changed, 101 insertions(+), 88 deletions(-) diff --git a/irc-proto/src/message.rs b/irc-proto/src/message.rs index 887c763d..6303074c 100644 --- a/irc-proto/src/message.rs +++ b/irc-proto/src/message.rs @@ -40,10 +40,6 @@ impl From for Message { /// the IRC specification is stricter than this. pub const MAX_BYTES: usize = u16::max_value() as usize; -/// The maximum number of arguments supported by the message parser, currently set to 15 as it is -/// the maximum according to the IRC specification. -pub const MAX_PARAMS: usize = 15; - /// A parsed IRC message, containing a buffer with pointers to the individual parts. #[derive(Clone, PartialEq, Debug)] pub struct Message { @@ -51,28 +47,30 @@ pub struct Message { tags: Option, prefix: Option, command: Part, - params: [Part; MAX_PARAMS], - params_len: u8, + middle_params: Part, + trailing_param: Option, } impl Message { /// Parses the message, converting the given object into an owned string. /// - /// This will perform an additional allocation if a `String` is passed. To avoid this and - /// transfer ownership instead, use the [`parse_string`] method. + /// This will allocate a new `String` to hold the message data, even if a `String` is + /// passed. To avoid this and transfer ownership instead, use the [`parse_string`] method. + /// + /// This function does not parse parameters or tags, as those may have an arbitrary number of + /// elements and would require additional allocations to hold their pointer data. They have + /// their own iterator-parsers that produce the elements while avoiding additional allocations; + /// see the [`params`] and [`tags`] methods for more information. /// /// # Error /// /// This method will fail in the following conditions: /// /// - The message length is longer than the maximum supported number of bytes ([`MAX_BYTES`]). - /// - The message contains more than the maximum supported number of arguments ([`MAX_PARAMS`]). /// - The message is missing required components such as the trailing CRLF or the command. /// /// Note that it does not check whether the parts of the message have illegal forms, as - /// there is little benefit to restricting that. It will also not perform any allocations - /// except for the initial buffer; any unbounded dynamically-sized components like tags are - /// parsed through iterators instead (see the [`tags`] method). + /// there is little benefit to restricting that. /// /// # Examples /// @@ -86,8 +84,9 @@ impl Message { /// ``` /// /// [`parse_string`]: #method.parse_string + /// [`params`]: #method.params + /// [`tags`]: #method.tags /// [`MAX_BYTES`]: ./constant.MAX_BYTES.html - /// [`MAX_PARAMS`]: ./constant.MAX_PARAMS.html pub fn parse(message: S) -> Result where S: ToString, @@ -189,41 +188,45 @@ impl Message { } // Everything from here to crlf must be parameters. - let mut params = [Part::new(0, 0); MAX_PARAMS]; - let mut params_len = 0; + let middle_params; + let trailing_param; + + // If " :" exists in the remaining data, the first instance marks the beginning of a + // trailing parameter. + if let Some(trailing_idx) = message[i..].find(" :") { + // Middle parameters are everything from the current position to the last + // non-space character before the trailing parameter. + let start = i; - while i < crlf { - // Make sure we don't overrun the maximum parameter count. - if params_len as usize >= MAX_PARAMS { - return Err(MessageParseError::MaxArgsExceeded); + // Walking back to the last non-space character: + let mut j = i + trailing_idx; + while message[..j].ends_with(' ') { + j -= ' '.len_utf8(); } + let end = j; + middle_params = Part::new(start, end); - // If parameter begins with ':', it is trailing. - let start; - let end; - if message[i..].starts_with(':') { - // Take everything between ':' and crlf. - i += ':'.len_utf8(); - start = i; - - i = crlf; - end = i; - } else { - // Take everything from here to next space. - start = i; - - i += message[i..].find(' ').unwrap_or_else(|| crlf - i); - end = i; + // Trailing parameter is everything between the leading " :" and crlf. + i += trailing_idx + ' '.len_utf8() + ':'.len_utf8(); + let start = i; + i = crlf; + let end = i; + trailing_param = Some(Part::new(start, end)); + } else { + // Middle parameters are everything from the current position to the last non-space + // character before crlf. + let start = i; - // Skip to next non-space. - while message[i..].starts_with(' ') { - i += ' '.len_utf8(); - } + // Walking back to the last non-space character: + let mut j = crlf; + while message[..j].ends_with(' ') { + j -= ' '.len_utf8(); } + let end = j; + middle_params = Part::new(start, end); - // Add to parameters. - params[params_len as usize] = Part::new(start, end); - params_len += 1; + // Trailing parameter does not exist: + trailing_param = None; } // Done parsing. @@ -232,12 +235,12 @@ impl Message { tags, prefix, command, - params, - params_len, + middle_params, + trailing_param, }) } - /// Borrows from the string slice containing the serialized message. + /// Returns a borrowed string slice containing the serialized message. /// /// # Examples /// @@ -254,7 +257,7 @@ impl Message { &self.buf } - /// Consumes this message, producing the string containing the serialized message. + /// Consumes this message, producing the inner string that contains the serialized message. /// /// # Examples /// @@ -273,9 +276,9 @@ impl Message { /// Produces a parser iterator over the message's tags. The iterator will produce items of /// `(&str, Option>)` for each tag in order, containing the tag's key and its value if - /// one exists for that key. It is entirely zero copy except when the value contains escape - /// sequences, in which case the unescaped value will be produced and stored in an owned - /// buffer. + /// one exists for that key. It is mostly zero-copy, borrowing in all cases except when the + /// value contains escape sequences, in which case the unescaped value will be produced and + /// stored in an owned buffer. /// /// This parser will not dedupe tags, nor will it check whether the tag's key is empty or /// whether it contains illegal characters. @@ -341,32 +344,9 @@ impl Message { self.command.index(&self.buf) } - /// Returns a string slice containing the value of the given argument (indexed starting from - /// 0), if it exists. - /// - /// # Examples - /// - /// ``` - /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { - /// use irc_proto::Message; - /// - /// let message = Message::parse("PRIVMSG #rust :Hello Rustaceans!\r\n")?; - /// assert_eq!(message.arg(0), Some("#rust")); - /// assert_eq!(message.arg(1), Some("Hello Rustaceans!")); - /// assert_eq!(message.arg(2), None); - /// # Ok(()) - /// # } - /// ``` - pub fn arg(&self, arg: usize) -> Option<&str> { - if arg < self.params_len as usize { - Some(self.params[arg].index(&self.buf)) - } else { - None - } - } - - /// Returns an iterator over the message's arguments, yielding `&str` string slices containing - /// each argument in order. + /// Returns a parser iterator over the message's parameters. The iterator will produce items of + /// `&str` for each parameter in order, containing the raw data in the parameter. It is entirely + /// zero-copy, borrowing each parameter slice directly from the message buffer. /// /// # Examples /// @@ -385,10 +365,10 @@ impl Message { /// # Ok(()) /// # } /// ``` - pub fn params(&self) -> Args { - Args { - buf: &self.buf, - params: self.params.iter().take(self.params_len as usize), + pub fn params(&self) -> Params { + Params { + remaining: self.middle_params.index(&self.buf), + trailing: self.trailing_param.map(|part| part.index(&self.buf)), } } } @@ -501,7 +481,7 @@ impl<'a> Iterator for Tags<'a> { impl<'a> ExactSizeIterator for Tags<'a> { fn len(&self) -> usize { - // Number of arguments yielded is number of remaining semicolons plus one, unless the + // Number of tags yielded is number of remaining semicolons plus one, unless the // remaining buffer is empty. if self.remaining.len() == 0 { 0 @@ -511,25 +491,58 @@ impl<'a> ExactSizeIterator for Tags<'a> { } } -/// An iterator over a message's tags. See [`Message::params`] for more information. +/// An iterator over a message's parameters. See [`Message::params`] for more information. /// /// [`Message::params`]: ./struct.Message.html#method.params -pub struct Args<'a> { - buf: &'a str, - params: std::iter::Take>, +pub struct Params<'a> { + remaining: &'a str, + trailing: Option<&'a str>, } -impl<'a> Iterator for Args<'a> { +impl<'a> Iterator for Params<'a> { type Item = &'a str; fn next(&mut self) -> Option { - self.params.next().map(|part| part.index(self.buf)) + // If remaining slice is non-empty, we still have middle params to take: + if self.remaining.len() > 0 { + // Next param is everything from here to next whitespace character (or end of string). + let param_end = self.remaining.find(' ').unwrap_or(self.remaining.len()); + let param = &self.remaining[..param_end]; + + // Trim this param and its trailing spaces out of remaining. + self.remaining = self.remaining[param_end..].trim_start_matches(' '); + + Some(param) + } else { + // No more middle params to parse, return trailing if it hasn't been already. + // take will replace with None on the first call, so all future calls will return None. + self.trailing.take() + } } } -impl<'a> ExactSizeIterator for Args<'a> { +impl<'a> ExactSizeIterator for Params<'a> { fn len(&self) -> usize { - self.params.len() + // Number of middle parameter remaining is equal to the number of points where a non-space + // character is preceded by a space character or the beginning of the string. + let mut middle_len = 0; + let mut last = true; + for c in self.remaining.chars() { + let current = c == ' '; + if (last, current) == (true, false) { + middle_len += 1; + } + last = current; + } + + // Add one if the trailing parameter hasn't been taken. + let trailing_len; + if self.trailing.is_some() { + trailing_len = 1; + } else { + trailing_len = 0; + } + middle_len + trailing_len } } From 58c2aeed160db1fbe394a2d11f4cf102d7930ced Mon Sep 17 00:00:00 2001 From: Adam Gausmann Date: Mon, 10 Jun 2019 17:39:15 -0500 Subject: [PATCH 07/14] Separate suffix from args and make naming consistent --- irc-proto/src/message.rs | 141 ++++++++++++++++++++------------------- 1 file changed, 74 insertions(+), 67 deletions(-) diff --git a/irc-proto/src/message.rs b/irc-proto/src/message.rs index 6303074c..25e57fe7 100644 --- a/irc-proto/src/message.rs +++ b/irc-proto/src/message.rs @@ -31,8 +31,11 @@ impl Part { } impl From for Message { - fn from(c: Command) -> Message { - unimplemented!("dummy impl") + fn from(cmd: Command) -> Message { + let mut buf = String::from(&cmd); + buf.push_str("\r\n"); + Message::parse_string(buf) + .unwrap() } } @@ -47,8 +50,8 @@ pub struct Message { tags: Option, prefix: Option, command: Part, - middle_params: Part, - trailing_param: Option, + args: Part, + suffix: Option, } impl Message { @@ -57,10 +60,10 @@ impl Message { /// This will allocate a new `String` to hold the message data, even if a `String` is /// passed. To avoid this and transfer ownership instead, use the [`parse_string`] method. /// - /// This function does not parse parameters or tags, as those may have an arbitrary number of + /// This function does not parse arguments or tags, as those may have an arbitrary number of /// elements and would require additional allocations to hold their pointer data. They have /// their own iterator-parsers that produce the elements while avoiding additional allocations; - /// see the [`params`] and [`tags`] methods for more information. + /// see the [`args`] and [`tags`] methods for more information. /// /// # Error /// @@ -84,7 +87,7 @@ impl Message { /// ``` /// /// [`parse_string`]: #method.parse_string - /// [`params`]: #method.params + /// [`args`]: #method.args /// [`tags`]: #method.tags /// [`MAX_BYTES`]: ./constant.MAX_BYTES.html pub fn parse(message: S) -> Result @@ -187,33 +190,33 @@ impl Message { i += ' '.len_utf8(); } - // Everything from here to crlf must be parameters. - let middle_params; - let trailing_param; + // Everything from here to crlf must be args. + let args; + let suffix; // If " :" exists in the remaining data, the first instance marks the beginning of a - // trailing parameter. - if let Some(trailing_idx) = message[i..].find(" :") { - // Middle parameters are everything from the current position to the last - // non-space character before the trailing parameter. + // suffix. + if let Some(suffix_idx) = message[i..].find(" :") { + // Middle args are everything from the current position to the last + // non-space character before the suffix. let start = i; // Walking back to the last non-space character: - let mut j = i + trailing_idx; + let mut j = i + suffix_idx; while message[..j].ends_with(' ') { j -= ' '.len_utf8(); } let end = j; - middle_params = Part::new(start, end); + args = Part::new(start, end); - // Trailing parameter is everything between the leading " :" and crlf. - i += trailing_idx + ' '.len_utf8() + ':'.len_utf8(); + // Suffix is everything between the leading " :" and crlf. + i += suffix_idx + ' '.len_utf8() + ':'.len_utf8(); let start = i; i = crlf; let end = i; - trailing_param = Some(Part::new(start, end)); + suffix = Some(Part::new(start, end)); } else { - // Middle parameters are everything from the current position to the last non-space + // Middle arg are everything from the current position to the last non-space // character before crlf. let start = i; @@ -223,10 +226,10 @@ impl Message { j -= ' '.len_utf8(); } let end = j; - middle_params = Part::new(start, end); + args = Part::new(start, end); - // Trailing parameter does not exist: - trailing_param = None; + // Suffix does not exist: + suffix = None; } // Done parsing. @@ -235,8 +238,8 @@ impl Message { tags, prefix, command, - middle_params, - trailing_param, + args, + suffix, }) } @@ -344,9 +347,9 @@ impl Message { self.command.index(&self.buf) } - /// Returns a parser iterator over the message's parameters. The iterator will produce items of - /// `&str` for each parameter in order, containing the raw data in the parameter. It is entirely - /// zero-copy, borrowing each parameter slice directly from the message buffer. + /// Returns a parser iterator over the message's arguments. The iterator will produce items of + /// `&str` for each argument in order, containing the raw data in the argument. It is entirely + /// zero-copy, borrowing each argument slice directly from the message buffer. /// /// # Examples /// @@ -355,22 +358,36 @@ impl Message { /// use irc_proto::Message; /// /// let message = Message::parse("USER guest tolmoon tolsun :Ronnie Reagan\r\n")?; - /// let mut params = message.params(); - /// assert_eq!(params.len(), 4); - /// assert_eq!(params.next(), Some("guest")); - /// assert_eq!(params.next(), Some("tolmoon")); - /// assert_eq!(params.next(), Some("tolsun")); - /// assert_eq!(params.next(), Some("Ronnie Reagan")); - /// assert_eq!(params.next(), None); + /// let mut args = message.args(); + /// assert_eq!(args.len(), 3); + /// assert_eq!(args.next(), Some("guest")); + /// assert_eq!(args.next(), Some("tolmoon")); + /// assert_eq!(args.next(), Some("tolsun")); + /// assert_eq!(args.next(), None); /// # Ok(()) /// # } /// ``` - pub fn params(&self) -> Params { - Params { - remaining: self.middle_params.index(&self.buf), - trailing: self.trailing_param.map(|part| part.index(&self.buf)), + pub fn args(&self) -> Args { + Args { + remaining: self.args.index(&self.buf), } } + + /// Returns the suffix of this message, if one exists. + /// + /// # Examples + /// + /// ``` + /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { + /// use irc_proto::Message; + /// + /// let message = Message::parse("USER guest tolmoon tolsun :Ronnie Reagan\r\n")?; + /// assert_eq!(message.suffix(), Some("Ronnie Reagan")); + /// # Ok(()) + /// # } + pub fn suffix(&self) -> Option<&str> { + self.suffix.map(|part| part.index(&self.buf)) + } } impl FromStr for Message { @@ -491,58 +508,48 @@ impl<'a> ExactSizeIterator for Tags<'a> { } } -/// An iterator over a message's parameters. See [`Message::params`] for more information. +/// An iterator over a message's arguments. See [`Message::args`] for more information. /// -/// [`Message::params`]: ./struct.Message.html#method.params -pub struct Params<'a> { +/// [`Message::args`]: ./struct.Message.html#method.args +pub struct Args<'a> { remaining: &'a str, - trailing: Option<&'a str>, } -impl<'a> Iterator for Params<'a> { +impl<'a> Iterator for Args<'a> { type Item = &'a str; fn next(&mut self) -> Option { - // If remaining slice is non-empty, we still have middle params to take: + // If remaining slice is non-empty, we still have args to take: if self.remaining.len() > 0 { - // Next param is everything from here to next whitespace character (or end of string). - let param_end = self.remaining.find(' ').unwrap_or(self.remaining.len()); - let param = &self.remaining[..param_end]; + // Next arg is everything from here to next whitespace character (or end of string). + let arg_end = self.remaining.find(' ').unwrap_or(self.remaining.len()); + let arg = &self.remaining[..arg_end]; - // Trim this param and its trailing spaces out of remaining. - self.remaining = self.remaining[param_end..].trim_start_matches(' '); + // Trim this arg and its trailing spaces out of remaining. + self.remaining = self.remaining[arg_end..].trim_start_matches(' '); - Some(param) + Some(arg) } else { - // No more middle params to parse, return trailing if it hasn't been already. - // take will replace with None on the first call, so all future calls will return None. - self.trailing.take() + // No more args to parse. + None } } } -impl<'a> ExactSizeIterator for Params<'a> { +impl<'a> ExactSizeIterator for Args<'a> { fn len(&self) -> usize { - // Number of middle parameter remaining is equal to the number of points where a non-space + // Number of args remaining is equal to the number of points where a non-space // character is preceded by a space character or the beginning of the string. - let mut middle_len = 0; + let mut len = 0; let mut last = true; for c in self.remaining.chars() { let current = c == ' '; if (last, current) == (true, false) { - middle_len += 1; + len += 1; } last = current; } - - // Add one if the trailing parameter hasn't been taken. - let trailing_len; - if self.trailing.is_some() { - trailing_len = 1; - } else { - trailing_len = 0; - } - middle_len + trailing_len + len } } From 4c9c53e7f1582b22bbe093c597e3a46d1b2e8461 Mon Sep 17 00:00:00 2001 From: Adam Gausmann Date: Mon, 10 Jun 2019 17:47:49 -0500 Subject: [PATCH 08/14] Add command-message conversions --- irc-proto/src/command.rs | 4 ++-- irc-proto/src/message.rs | 41 +++++++++++++++++++++++++++++----------- 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/irc-proto/src/command.rs b/irc-proto/src/command.rs index 387811ad..dd0f1b34 100644 --- a/irc-proto/src/command.rs +++ b/irc-proto/src/command.rs @@ -1802,13 +1802,13 @@ mod test { fn user_round_trip() { let cmd = Command::USER("a".to_string(), "b".to_string(), "c".to_string()); let line = Message::from(cmd.clone()).to_string(); - let returned_cmd = line.parse::().unwrap().command; + let returned_cmd = line.parse::().unwrap().command().unwrap(); assert_eq!(cmd, returned_cmd); } #[test] fn parse_user_message() { - let cmd = "USER a 0 * b".parse::().unwrap().command; + let cmd = "USER a 0 * b\r\n".parse::().unwrap().command().unwrap(); assert_eq!(Command::USER("a".to_string(), "0".to_string(), "b".to_string()), cmd); } } diff --git a/irc-proto/src/message.rs b/irc-proto/src/message.rs index 25e57fe7..b143d16b 100644 --- a/irc-proto/src/message.rs +++ b/irc-proto/src/message.rs @@ -49,7 +49,7 @@ pub struct Message { buf: String, tags: Option, prefix: Option, - command: Part, + command_name: Part, args: Part, suffix: Option, } @@ -70,7 +70,7 @@ impl Message { /// This method will fail in the following conditions: /// /// - The message length is longer than the maximum supported number of bytes ([`MAX_BYTES`]). - /// - The message is missing required components such as the trailing CRLF or the command. + /// - The message is missing required components such as the trailing CRLF or the command name. /// /// Note that it does not check whether the parts of the message have illegal forms, as /// there is little benefit to restricting that. @@ -169,8 +169,8 @@ impl Message { i += ' '.len_utf8(); } - // Next word must be command. - let command = { + // Next word must be command name. + let command_name = { // Take everything between here and next space. let start = i; @@ -180,8 +180,8 @@ impl Message { Part::new(start, end) }; - // Command must not be empty. - if command.start == command.end { + // Command name must not be empty. + if command_name.start == command_name.end { return Err(MessageParseError::MissingCommand); } @@ -237,7 +237,7 @@ impl Message { buf: message, tags, prefix, - command, + command_name, args, suffix, }) @@ -330,7 +330,7 @@ impl Message { self.prefix.as_ref().map(|part| part.index(&self.buf)) } - /// Returns a string slice containing the message's command. + /// Returns a string slice containing the message's command name. /// /// # Examples /// @@ -339,12 +339,31 @@ impl Message { /// use irc_proto::Message; /// /// let message = Message::parse("NICK ferris\r\n")?; - /// assert_eq!(message.command(), "NICK"); + /// assert_eq!(message.command_name(), "NICK"); /// # Ok(()) /// # } /// ``` - pub fn command(&self) -> &str { - self.command.index(&self.buf) + pub fn command_name(&self) -> &str { + self.command_name.index(&self.buf) + } + + /// Converts this message into a [`Command`]. + /// + /// # Examples + /// + /// ``` + /// fn main() -> Result<(), irc_proto::error::MessageParseError> { + /// use irc_proto::{Message, Command}; + /// + /// let message = Message::parse("NICK ferris\r\n")?; + /// assert_eq!(message.command()?, Command::NICK("ferris".to_string())); + /// # Ok(()) + /// # } + /// ``` + /// + /// [`Command`]: ../command/enum.Command.html + pub fn command(&self) -> Result { + Command::new(self.command_name(), self.args().collect(), self.suffix()) } /// Returns a parser iterator over the message's arguments. The iterator will produce items of From 33547ed9c012c96c7a79530064cacd17ca11f302 Mon Sep 17 00:00:00 2001 From: Adam Gausmann Date: Mon, 10 Jun 2019 17:50:09 -0500 Subject: [PATCH 09/14] Remove unused imports and unused error type --- irc-proto/src/error.rs | 4 ---- irc-proto/src/message.rs | 4 ---- 2 files changed, 8 deletions(-) diff --git a/irc-proto/src/error.rs b/irc-proto/src/error.rs index 48b31ac4..53fe2eb8 100644 --- a/irc-proto/src/error.rs +++ b/irc-proto/src/error.rs @@ -63,10 +63,6 @@ pub enum MessageParseError { #[fail(display = "message exceeded the maximum length")] MaxLengthExceeded, - /// The message exceeded the maximum allowable arguments. - #[fail(display = "message exceeded the maximum number of arguments")] - MaxArgsExceeded, - /// The message did not contain a trailing CRLF. #[fail(display = "message does not contain a trailing CRLF")] MissingCrLf, diff --git a/irc-proto/src/message.rs b/irc-proto/src/message.rs index b143d16b..7a911867 100644 --- a/irc-proto/src/message.rs +++ b/irc-proto/src/message.rs @@ -1,14 +1,10 @@ //! A module providing a data structure for messages to and from IRC servers. use std::borrow::Cow; use std::fmt; -use std::num::NonZeroU16; use std::str::FromStr; -use chan::ChannelExt; use command::Command; -use error; use error::{MessageParseError, ProtocolError}; -use prefix::Prefix; #[derive(Debug, PartialEq, Eq, Clone, Copy)] From b69aaf595f54c39325379c8979600df2dbdf4838 Mon Sep 17 00:00:00 2001 From: Adam Gausmann Date: Tue, 11 Jun 2019 00:00:53 -0500 Subject: [PATCH 10/14] Preserve original Message API --- irc-proto/src/buf.rs | 546 +++++++++++++++++++++++++++++ irc-proto/src/lib.rs | 1 + irc-proto/src/message.rs | 727 +++++++++++---------------------------- 3 files changed, 752 insertions(+), 522 deletions(-) create mode 100644 irc-proto/src/buf.rs diff --git a/irc-proto/src/buf.rs b/irc-proto/src/buf.rs new file mode 100644 index 00000000..fd325f55 --- /dev/null +++ b/irc-proto/src/buf.rs @@ -0,0 +1,546 @@ +//! A zero-copy implementation of IRC message parsing. + +use std::borrow::Cow; +use std::fmt; +use std::str::FromStr; + +use error::{MessageParseError, ProtocolError}; + + +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +struct Part { + start: u16, + end: u16, +} + +impl Part { + fn new(start: usize, end: usize) -> Part { + Part { + start: start as u16, + end: end as u16, + } + } + + fn index<'a>(&self, s: &'a str) -> &'a str { + &s[self.start as usize..self.end as usize] + } +} + +/// The maximum number of bytes allowed in a message buffer, currently set to `u16::max_value()` as +/// the maximum value of the buffer's pointer types. +pub const MAX_BYTES: usize = u16::max_value() as usize; + +/// A parsed IRC message string, containing a single buffer with pointers to the individual parts. +#[derive(Clone, PartialEq, Debug)] +pub struct MessageBuf { + buf: String, + tags: Option, + prefix: Option, + command: Part, + args: Part, + suffix: Option, +} + +impl MessageBuf { + /// Parses the message, converting the given object into an owned string. + /// + /// This will allocate a new `String` to hold the message data, even if a `String` is + /// passed. To avoid this and transfer ownership instead, use the [`parse_string`] method. + /// + /// This function does not parse arguments or tags, as those may have an arbitrary number of + /// elements and would require additional allocations to hold their pointer data. They have + /// their own iterator-parsers that produce the elements while avoiding additional allocations; + /// see the [`args`] and [`tags`] methods for more information. + /// + /// # Error + /// + /// This method will fail in the following conditions: + /// + /// - The message length is longer than the maximum supported number of bytes ([`MAX_BYTES`]). + /// - The message is missing required components such as the trailing CRLF or the command. + /// + /// Note that it does not check whether the parts of the message have illegal forms, as + /// there is little benefit to restricting that. + /// + /// # Examples + /// + /// ``` + /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { + /// use irc_proto::MessageBuf; + /// + /// let message = MessageBuf::parse("PRIVMSG #rust :Hello Rustaceans!\r\n")?; + /// # Ok(()) + /// # } + /// ``` + /// + /// [`parse_string`]: #method.parse_string + /// [`args`]: #method.args + /// [`tags`]: #method.tags + /// [`MAX_BYTES`]: ./constant.MAX_BYTES.html + pub fn parse(message: S) -> Result + where + S: ToString, + { + MessageBuf::parse_string(message.to_string()) + } + + /// Takes ownership of the given string and parses it into a message. + /// + /// For more information about the details of the parser, see the [`parse`] method. + /// + /// # Examples + /// + /// ``` + /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { + /// use irc_proto::MessageBuf; + /// + /// let message = MessageBuf::parse_string("NICK ferris\r\n".to_string())?; + /// # Ok(()) + /// # } + /// ``` + /// + /// [`parse`]: #method.parse + pub fn parse_string(message: String) -> Result { + // To make sure pointers don't overflow: + if message.len() > MAX_BYTES { + return Err(MessageParseError::MaxLengthExceeded); + } + + // Make sure the message is terminated with line endings: + if !message.ends_with("\r\n") { + return Err(MessageParseError::MissingCrLf); + } + // Used as the end of the "useful" part of the message. + let crlf = message.len() - '\n'.len_utf8() - '\r'.len_utf8(); + + // Accumulating pointer used to keep track of how much has already been parsed. + let mut i = 0; + + // If word starts with '@', it is a tag. + let tags; + if message[i..].starts_with('@') { + // Take everything between '@' and next space. + i += '@'.len_utf8(); + let start = i; + + i += message[i..].find(' ').unwrap_or_else(|| crlf - i); + let end = i; + + tags = Some(Part::new(start, end)); + } else { + tags = None; + } + + // Skip to next non-space. + while message[i..].starts_with(' ') { + i += ' '.len_utf8(); + } + + // If word starts with ':', it is a prefix. + let prefix; + if message[i..].starts_with(':') { + // Take everything between ':' and next space. + i += ':'.len_utf8(); + let start = i; + + i += message[i..].find(' ').unwrap_or_else(|| crlf - i); + let end = i; + + prefix = Some(Part::new(start, end)); + } else { + prefix = None; + } + + // Skip to next non-space. + while message[i..].starts_with(' ') { + i += ' '.len_utf8(); + } + + // Next word must be command. + let command = { + // Take everything between here and next space. + let start = i; + + i += message[i..].find(' ').unwrap_or_else(|| crlf - i); + let end = i; + + Part::new(start, end) + }; + + // Command name must not be empty. + if command.start == command.end { + return Err(MessageParseError::MissingCommand); + } + + // Skip to next non-space. + while message[i..].starts_with(' ') { + i += ' '.len_utf8(); + } + + // Everything from here to crlf must be args. + let args; + let suffix; + + // If " :" exists in the remaining data, the first instance marks the beginning of a + // suffix. + if let Some(suffix_idx) = message[i..].find(" :") { + // Middle args are everything from the current position to the last + // non-space character before the suffix. + let start = i; + + // Walking back to the last non-space character: + let mut j = i + suffix_idx; + while message[..j].ends_with(' ') { + j -= ' '.len_utf8(); + } + let end = j; + args = Part::new(start, end); + + // Suffix is everything between the leading " :" and crlf. + i += suffix_idx + ' '.len_utf8() + ':'.len_utf8(); + let start = i; + i = crlf; + let end = i; + suffix = Some(Part::new(start, end)); + } else { + // Middle arg are everything from the current position to the last non-space + // character before crlf. + let start = i; + + // Walking back to the last non-space character: + let mut j = crlf; + while message[..j].ends_with(' ') { + j -= ' '.len_utf8(); + } + let end = j; + args = Part::new(start, end); + + // Suffix does not exist: + suffix = None; + } + + // Done parsing. + Ok(MessageBuf { + buf: message, + tags, + prefix, + command, + args, + suffix, + }) + } + + /// Returns a borrowed string slice containing the serialized message. + /// + /// # Examples + /// + /// ``` + /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { + /// use irc_proto::MessageBuf; + /// + /// let raw_message = "JOIN #rust\r\n"; + /// let parsed_message = MessageBuf::parse(raw_message)?; + /// assert_eq!(parsed_message.as_str(), raw_message); + /// # Ok(()) + /// # } + pub fn as_str(&self) -> &str { + &self.buf + } + + /// Consumes this message, producing the inner string that contains the serialized message. + /// + /// # Examples + /// + /// ``` + /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { + /// use irc_proto::MessageBuf; + /// + /// let raw_message = "JOIN #rust\r\n"; + /// let parsed_message = MessageBuf::parse(raw_message)?; + /// assert_eq!(parsed_message.into_string(), raw_message); + /// # Ok(()) + /// # } + pub fn into_string(self) -> String { + self.buf + } + + /// Produces a parser iterator over the message's tags. The iterator will produce items of + /// `(&str, Option>)` for each tag in order, containing the tag's key and its value if + /// one exists for that key. It is mostly zero-copy, borrowing in all cases except when the + /// value contains escape sequences, in which case the unescaped value will be produced and + /// stored in an owned buffer. + /// + /// This parser will not dedupe tags, nor will it check whether the tag's key is empty or + /// whether it contains illegal characters. + /// + /// # Examples + /// + /// ``` + /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { + /// use irc_proto::MessageBuf; + /// use std::borrow::Cow; + /// + /// let message = MessageBuf::parse( + /// "@aaa=bbb;ccc;example.com/ddd=eee :nick!ident@host.com PRIVMSG me :Hello\r\n" + /// )?; + /// + /// let mut tags = message.tags(); + /// assert_eq!(tags.len(), 3); + /// + /// assert_eq!(tags.next(), Some(("aaa", Some(Cow::Borrowed("bbb"))))); + /// assert_eq!(tags.next(), Some(("ccc", None))); + /// assert_eq!(tags.next(), Some(("example.com/ddd", Some(Cow::Borrowed("eee"))))); + /// assert_eq!(tags.next(), None); + /// # Ok(()) + /// # } + /// ``` + pub fn tags(&self) -> Tags { + Tags { + remaining: self.tags.as_ref().map(|part| part.index(&self.buf)).unwrap_or(""), + } + } + + /// Returns a string slice containing the message's prefix, if it exists. + /// + /// # Examples + /// + /// ``` + /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { + /// use irc_proto::MessageBuf; + /// + /// let message = MessageBuf::parse(":nick!ident@host.com PRIVMSG me :Hello\r\n")?; + /// assert_eq!(message.prefix(), Some("nick!ident@host.com")); + /// # Ok(()) + /// # } + /// ``` + pub fn prefix(&self) -> Option<&str> { + self.prefix.as_ref().map(|part| part.index(&self.buf)) + } + + /// Returns a string slice containing the message's command. + /// + /// # Examples + /// + /// ``` + /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { + /// use irc_proto::MessageBuf; + /// + /// let message = MessageBuf::parse("NICK ferris\r\n")?; + /// assert_eq!(message.command(), "NICK"); + /// # Ok(()) + /// # } + /// ``` + pub fn command(&self) -> &str { + self.command.index(&self.buf) + } + + /// Returns a parser iterator over the message's arguments. The iterator will produce items of + /// `&str` for each argument in order, containing the raw data in the argument. It is entirely + /// zero-copy, borrowing each argument slice directly from the message buffer. + /// + /// # Examples + /// + /// ``` + /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { + /// use irc_proto::MessageBuf; + /// + /// let message = MessageBuf::parse("USER guest tolmoon tolsun :Ronnie Reagan\r\n")?; + /// let mut args = message.args(); + /// assert_eq!(args.len(), 3); + /// assert_eq!(args.next(), Some("guest")); + /// assert_eq!(args.next(), Some("tolmoon")); + /// assert_eq!(args.next(), Some("tolsun")); + /// assert_eq!(args.next(), None); + /// # Ok(()) + /// # } + /// ``` + pub fn args(&self) -> Args { + Args { + remaining: self.args.index(&self.buf), + } + } + + /// Returns the suffix of this message, if one exists. + /// + /// # Examples + /// + /// ``` + /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { + /// use irc_proto::MessageBuf; + /// + /// let message = MessageBuf::parse("USER guest tolmoon tolsun :Ronnie Reagan\r\n")?; + /// assert_eq!(message.suffix(), Some("Ronnie Reagan")); + /// # Ok(()) + /// # } + pub fn suffix(&self) -> Option<&str> { + self.suffix.map(|part| part.index(&self.buf)) + } +} + +impl FromStr for MessageBuf { + type Err = ProtocolError; + + fn from_str(s: &str) -> Result { + MessageBuf::parse(s) + .map_err(|err| ProtocolError::InvalidMessage { string: s.to_string(), cause: err }) + } +} + +impl AsRef for MessageBuf { + fn as_ref(&self) -> &str { + self.as_str() + } +} + +impl fmt::Display for MessageBuf { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(&self.buf) + } +} + +/// A parser iterator over a message's tags. See [`MessageBuf::tags`] for more information. +/// +/// [`MessageBuf::tags`]: ./struct.MessageBuf.html#method.tags +pub struct Tags<'a> { + remaining: &'a str, +} + +impl<'a> Iterator for Tags<'a> { + type Item = (&'a str, Option>); + + fn next(&mut self) -> Option { + // If remaining is empty, nothing is left to yield. + if self.remaining.len() == 0 { + None + } else { + // Take everything from here to next ';'. + let tag = self.remaining + .char_indices() + .find(|&(_i, c)| c == ';') + .map(|(i, _c)| &self.remaining[..i]) + .unwrap_or(&self.remaining); + + // Remove taken data from the remaining buffer. + if self.remaining.len() == tag.len() { + self.remaining = ""; + } else { + self.remaining = &self.remaining[tag.len() + ';'.len_utf8()..]; + } + + // If an equal sign exists in the tag data, it must have an associated value. + if let Some(key_end) = tag.find('=') { + // Everything before the first equal sign is the key. + let key = &tag[..key_end]; + + // Everything after the first equal sign is the value. + let mut raw_value = &tag[key_end + '='.len_utf8()..]; + + // Resolve escape sequences if any are found. + // This will not allocate unless data is given to it. + let mut value = String::new(); + while let Some(escape_idx) = raw_value.find('\\') { + // Copy everything before this escape sequence. + value.push_str(&raw_value[..escape_idx]); + // Resolve this escape sequence. + let c = match raw_value[escape_idx + '\\'.len_utf8()..].chars().next() { + Some(':') => Some(';'), + Some('s') => Some(' '), + Some('\\') => Some('\\'), + Some('r') => Some('\r'), + Some('n') => Some('\n'), + Some(c) => Some(c), + None => None, + }; + // If it resolves to a character, then push it. + if let Some(c) = c { + value.push(c); + } + // Cut off the beginning of raw_value such that it only contains + // everything after the parsed escape sequence. + // Upon looping, it will start searching from this point, skipping the last + // escape sequence. + raw_value = &raw_value[ + (escape_idx + + '\\'.len_utf8() + + c.map(char::len_utf8).unwrap_or(0) + ).. + ]; + } + + // If we didn't add data, no escape sequences exist and the raw value can be + // referenced. + if value.len() == 0 { + Some((key, Some(Cow::Borrowed(raw_value)))) + } else { + // Make sure you add the rest of the raw value that doesn't contain escapes. + value.push_str(raw_value); + Some((key, Some(Cow::Owned(value)))) + } + } else { + Some((tag, None)) + } + } + } +} + +impl<'a> ExactSizeIterator for Tags<'a> { + fn len(&self) -> usize { + // Number of tags yielded is number of remaining semicolons plus one, unless the + // remaining buffer is empty. + if self.remaining.len() == 0 { + 0 + } else { + self.remaining.chars().filter(|&c| c == ';').count() + 1 + } + } +} + +/// An iterator over a message's arguments. See [`MessageBuf::args`] for more information. +/// +/// [`MessageBuf::args`]: ./struct.MessageBuf.html#method.args +pub struct Args<'a> { + remaining: &'a str, +} + +impl<'a> Iterator for Args<'a> { + type Item = &'a str; + + fn next(&mut self) -> Option { + // If remaining slice is non-empty, we still have args to take: + if self.remaining.len() > 0 { + // Next arg is everything from here to next whitespace character (or end of string). + let arg_end = self.remaining.find(' ').unwrap_or(self.remaining.len()); + let arg = &self.remaining[..arg_end]; + + // Trim this arg and its trailing spaces out of remaining. + self.remaining = self.remaining[arg_end..].trim_start_matches(' '); + + Some(arg) + } else { + // No more args to parse. + None + } + } +} + +impl<'a> ExactSizeIterator for Args<'a> { + fn len(&self) -> usize { + // Number of args remaining is equal to the number of points where a non-space + // character is preceded by a space character or the beginning of the string. + let mut len = 0; + let mut last = true; + for c in self.remaining.chars() { + let current = c == ' '; + if (last, current) == (true, false) { + len += 1; + } + last = current; + } + len + } +} + +#[cfg(test)] +mod test { + // TODO +} diff --git a/irc-proto/src/lib.rs b/irc-proto/src/lib.rs index 09e5bba6..2ff12b5c 100644 --- a/irc-proto/src/lib.rs +++ b/irc-proto/src/lib.rs @@ -12,6 +12,7 @@ extern crate tokio_codec; #[cfg(feature = "tokio")] extern crate tokio_io; +pub mod buf; pub mod caps; pub mod chan; pub mod colors; diff --git a/irc-proto/src/message.rs b/irc-proto/src/message.rs index 7a911867..b2648d71 100644 --- a/irc-proto/src/message.rs +++ b/irc-proto/src/message.rs @@ -1,587 +1,280 @@ //! A module providing a data structure for messages to and from IRC servers. -use std::borrow::Cow; -use std::fmt; +use std::borrow::ToOwned; +use std::fmt::{Display, Formatter, Result as FmtResult, Write}; use std::str::FromStr; +use chan::ChannelExt; use command::Command; +use error; use error::{MessageParseError, ProtocolError}; +use prefix::Prefix; -#[derive(Debug, PartialEq, Eq, Clone, Copy)] -struct Part { - start: u16, - end: u16, -} - -impl Part { - fn new(start: usize, end: usize) -> Part { - Part { - start: start as u16, - end: end as u16, - } - } - - fn index<'a>(&self, s: &'a str) -> &'a str { - &s[self.start as usize..self.end as usize] - } -} - -impl From for Message { - fn from(cmd: Command) -> Message { - let mut buf = String::from(&cmd); - buf.push_str("\r\n"); - Message::parse_string(buf) - .unwrap() - } -} - -/// The maximum number of bytes allowed in a message, currently set to `u16::max_value()`, though -/// the IRC specification is stricter than this. -pub const MAX_BYTES: usize = u16::max_value() as usize; - -/// A parsed IRC message, containing a buffer with pointers to the individual parts. +/// A data structure representing an IRC message according to the protocol specification. It +/// consists of a collection of IRCv3 tags, a prefix (describing the source of the message), and +/// the protocol command. If the command is unknown, it is treated as a special raw command that +/// consists of a collection of arguments and the special suffix argument. Otherwise, the command +/// is parsed into a more useful form as described in [Command](../command/enum.Command.html). #[derive(Clone, PartialEq, Debug)] pub struct Message { - buf: String, - tags: Option, - prefix: Option, - command_name: Part, - args: Part, - suffix: Option, + /// Message tags as defined by [IRCv3.2](http://ircv3.net/specs/core/message-tags-3.2.html). + /// These tags are used to add extended information to the given message, and are commonly used + /// in IRCv3 extensions to the IRC protocol. + pub tags: Option>, + /// The message prefix (or source) as defined by [RFC 2812](http://tools.ietf.org/html/rfc2812). + pub prefix: Option, + /// The IRC command, parsed according to the known specifications. The command itself and its + /// arguments (including the special suffix argument) are captured in this component. + pub command: Command, } impl Message { - /// Parses the message, converting the given object into an owned string. - /// - /// This will allocate a new `String` to hold the message data, even if a `String` is - /// passed. To avoid this and transfer ownership instead, use the [`parse_string`] method. - /// - /// This function does not parse arguments or tags, as those may have an arbitrary number of - /// elements and would require additional allocations to hold their pointer data. They have - /// their own iterator-parsers that produce the elements while avoiding additional allocations; - /// see the [`args`] and [`tags`] methods for more information. - /// - /// # Error - /// - /// This method will fail in the following conditions: - /// - /// - The message length is longer than the maximum supported number of bytes ([`MAX_BYTES`]). - /// - The message is missing required components such as the trailing CRLF or the command name. - /// - /// Note that it does not check whether the parts of the message have illegal forms, as - /// there is little benefit to restricting that. - /// - /// # Examples + /// Creates a new message from the given components. /// + /// # Example /// ``` - /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { - /// use irc_proto::Message; - /// - /// let message = Message::parse("PRIVMSG #rust :Hello Rustaceans!\r\n")?; - /// # Ok(()) + /// # extern crate irc_proto; + /// # use irc_proto::Message; + /// # fn main() { + /// let message = Message::new( + /// Some("nickname!username@hostname"), "JOIN", vec!["#channel"], None + /// ).unwrap(); /// # } /// ``` - /// - /// [`parse_string`]: #method.parse_string - /// [`args`]: #method.args - /// [`tags`]: #method.tags - /// [`MAX_BYTES`]: ./constant.MAX_BYTES.html - pub fn parse(message: S) -> Result - where - S: ToString, - { - Message::parse_string(message.to_string()) + pub fn new( + prefix: Option<&str>, + command: &str, + args: Vec<&str>, + suffix: Option<&str>, + ) -> Result { + Message::with_tags(None, prefix, command, args, suffix) } - /// Takes ownership of the given string and parses it into a message. - /// - /// For more information about the details of the parser, see the [`parse`] method. - /// - /// [`parse`] #method.parse - /// - /// # Examples - /// - /// ``` - /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { - /// use irc_proto::Message; - /// - /// let message = Message::parse_string("NICK ferris\r\n".to_string())?; - /// # Ok(()) - /// # } - /// ``` - pub fn parse_string(message: String) -> Result { - // To make sure pointers don't overflow: - if message.len() > MAX_BYTES { - return Err(MessageParseError::MaxLengthExceeded); - } - - // Make sure the message is terminated with line endings: - if !message.ends_with("\r\n") { - return Err(MessageParseError::MissingCrLf); - } - // Used as the end of the "useful" part of the message. - let crlf = message.len() - '\n'.len_utf8() - '\r'.len_utf8(); - - // Accumulating pointer used to keep track of how much has already been parsed. - let mut i = 0; - - // If word starts with '@', it is a tag. - let tags; - if message[i..].starts_with('@') { - // Take everything between '@' and next space. - i += '@'.len_utf8(); - let start = i; - - i += message[i..].find(' ').unwrap_or_else(|| crlf - i); - let end = i; - - tags = Some(Part::new(start, end)); - } else { - tags = None; - } - - // Skip to next non-space. - while message[i..].starts_with(' ') { - i += ' '.len_utf8(); - } - - // If word starts with ':', it is a prefix. - let prefix; - if message[i..].starts_with(':') { - // Take everything between ':' and next space. - i += ':'.len_utf8(); - let start = i; - - i += message[i..].find(' ').unwrap_or_else(|| crlf - i); - let end = i; - - prefix = Some(Part::new(start, end)); - } else { - prefix = None; - } - - // Skip to next non-space. - while message[i..].starts_with(' ') { - i += ' '.len_utf8(); - } - - // Next word must be command name. - let command_name = { - // Take everything between here and next space. - let start = i; - - i += message[i..].find(' ').unwrap_or_else(|| crlf - i); - let end = i; - - Part::new(start, end) - }; - - // Command name must not be empty. - if command_name.start == command_name.end { - return Err(MessageParseError::MissingCommand); - } - - // Skip to next non-space. - while message[i..].starts_with(' ') { - i += ' '.len_utf8(); - } - - // Everything from here to crlf must be args. - let args; - let suffix; - - // If " :" exists in the remaining data, the first instance marks the beginning of a - // suffix. - if let Some(suffix_idx) = message[i..].find(" :") { - // Middle args are everything from the current position to the last - // non-space character before the suffix. - let start = i; - - // Walking back to the last non-space character: - let mut j = i + suffix_idx; - while message[..j].ends_with(' ') { - j -= ' '.len_utf8(); - } - let end = j; - args = Part::new(start, end); - - // Suffix is everything between the leading " :" and crlf. - i += suffix_idx + ' '.len_utf8() + ':'.len_utf8(); - let start = i; - i = crlf; - let end = i; - suffix = Some(Part::new(start, end)); - } else { - // Middle arg are everything from the current position to the last non-space - // character before crlf. - let start = i; - - // Walking back to the last non-space character: - let mut j = crlf; - while message[..j].ends_with(' ') { - j -= ' '.len_utf8(); - } - let end = j; - args = Part::new(start, end); - - // Suffix does not exist: - suffix = None; - } - - // Done parsing. + /// Creates a new IRCv3.2 message from the given components, including message tags. These tags + /// are used to add extended information to the given message, and are commonly used in IRCv3 + /// extensions to the IRC protocol. + pub fn with_tags( + tags: Option>, + prefix: Option<&str>, + command: &str, + args: Vec<&str>, + suffix: Option<&str>, + ) -> Result { Ok(Message { - buf: message, - tags, - prefix, - command_name, - args, - suffix, + tags: tags, + prefix: prefix.map(|p| p.into()), + command: Command::new(command, args, suffix)?, }) } - /// Returns a borrowed string slice containing the serialized message. - /// - /// # Examples + /// Gets the nickname of the message source, if it exists. /// + /// # Example /// ``` - /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { - /// use irc_proto::Message; - /// - /// let raw_message = "JOIN #rust\r\n"; - /// let parsed_message = Message::parse(raw_message)?; - /// assert_eq!(parsed_message.as_str(), raw_message); - /// # Ok(()) + /// # extern crate irc_proto; + /// # use irc_proto::Message; + /// # fn main() { + /// let message = Message::new( + /// Some("nickname!username@hostname"), "JOIN", vec!["#channel"], None + /// ).unwrap(); + /// assert_eq!(message.source_nickname(), Some("nickname")); /// # } - pub fn as_str(&self) -> &str { - &self.buf - } - - /// Consumes this message, producing the inner string that contains the serialized message. - /// - /// # Examples - /// /// ``` - /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { - /// use irc_proto::Message; - /// - /// let raw_message = "JOIN #rust\r\n"; - /// let parsed_message = Message::parse(raw_message)?; - /// assert_eq!(parsed_message.into_string(), raw_message); - /// # Ok(()) - /// # } - pub fn into_string(self) -> String { - self.buf + pub fn source_nickname(&self) -> Option<&str> { + // ::= | [ '!' ] [ '@' ] + // ::= + self.prefix.as_ref().and_then(|p| match p { + Prefix::Nickname(name, _, _) => Some(&name[..]), + _ => None + }) } - /// Produces a parser iterator over the message's tags. The iterator will produce items of - /// `(&str, Option>)` for each tag in order, containing the tag's key and its value if - /// one exists for that key. It is mostly zero-copy, borrowing in all cases except when the - /// value contains escape sequences, in which case the unescaped value will be produced and - /// stored in an owned buffer. - /// - /// This parser will not dedupe tags, nor will it check whether the tag's key is empty or - /// whether it contains illegal characters. - /// - /// # Examples + /// Gets the likely intended place to respond to this message. + /// If the type of the message is a `PRIVMSG` or `NOTICE` and the message is sent to a channel, + /// the result will be that channel. In all other cases, this will call `source_nickname`. /// + /// # Example /// ``` - /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { - /// use irc_proto::Message; - /// use std::borrow::Cow; - /// - /// let message = Message::parse( - /// "@aaa=bbb;ccc;example.com/ddd=eee :nick!ident@host.com PRIVMSG me :Hello\r\n" - /// )?; - /// - /// let mut tags = message.tags(); - /// assert_eq!(tags.len(), 3); - /// - /// assert_eq!(tags.next(), Some(("aaa", Some(Cow::Borrowed("bbb"))))); - /// assert_eq!(tags.next(), Some(("ccc", None))); - /// assert_eq!(tags.next(), Some(("example.com/ddd", Some(Cow::Borrowed("eee"))))); - /// assert_eq!(tags.next(), None); - /// # Ok(()) + /// # extern crate irc_proto; + /// # use irc_proto::Message; + /// # fn main() { + /// let msg1 = Message::new( + /// Some("ada"), "PRIVMSG", vec!["#channel"], Some("Hi, everyone!") + /// ).unwrap(); + /// assert_eq!(msg1.response_target(), Some("#channel")); + /// let msg2 = Message::new( + /// Some("ada"), "PRIVMSG", vec!["betsy"], Some("betsy: hi") + /// ).unwrap(); + /// assert_eq!(msg2.response_target(), Some("ada")); /// # } /// ``` - pub fn tags(&self) -> Tags { - Tags { - remaining: self.tags.as_ref().map(|part| part.index(&self.buf)).unwrap_or(""), + pub fn response_target(&self) -> Option<&str> { + match self.command { + Command::PRIVMSG(ref target, _) if target.is_channel_name() => Some(target), + Command::NOTICE(ref target, _) if target.is_channel_name() => Some(target), + _ => self.source_nickname() } } - /// Returns a string slice containing the message's prefix, if it exists. - /// - /// # Examples + /// Converts a Message into a String according to the IRC protocol. /// + /// # Example /// ``` - /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { - /// use irc_proto::Message; - /// - /// let message = Message::parse(":nick!ident@host.com PRIVMSG me :Hello\r\n")?; - /// assert_eq!(message.prefix(), Some("nick!ident@host.com")); - /// # Ok(()) + /// # extern crate irc_proto; + /// # use irc_proto::Message; + /// # fn main() { + /// let msg = Message::new( + /// Some("ada"), "PRIVMSG", vec!["#channel"], Some("Hi, everyone!") + /// ).unwrap(); + /// assert_eq!(msg.to_string(), ":ada PRIVMSG #channel :Hi, everyone!\r\n"); /// # } /// ``` - pub fn prefix(&self) -> Option<&str> { - self.prefix.as_ref().map(|part| part.index(&self.buf)) - } - - /// Returns a string slice containing the message's command name. - /// - /// # Examples - /// - /// ``` - /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { - /// use irc_proto::Message; - /// - /// let message = Message::parse("NICK ferris\r\n")?; - /// assert_eq!(message.command_name(), "NICK"); - /// # Ok(()) - /// # } - /// ``` - pub fn command_name(&self) -> &str { - self.command_name.index(&self.buf) - } - - /// Converts this message into a [`Command`]. - /// - /// # Examples - /// - /// ``` - /// fn main() -> Result<(), irc_proto::error::MessageParseError> { - /// use irc_proto::{Message, Command}; - /// - /// let message = Message::parse("NICK ferris\r\n")?; - /// assert_eq!(message.command()?, Command::NICK("ferris".to_string())); - /// # Ok(()) - /// # } - /// ``` - /// - /// [`Command`]: ../command/enum.Command.html - pub fn command(&self) -> Result { - Command::new(self.command_name(), self.args().collect(), self.suffix()) - } - - /// Returns a parser iterator over the message's arguments. The iterator will produce items of - /// `&str` for each argument in order, containing the raw data in the argument. It is entirely - /// zero-copy, borrowing each argument slice directly from the message buffer. - /// - /// # Examples - /// - /// ``` - /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { - /// use irc_proto::Message; - /// - /// let message = Message::parse("USER guest tolmoon tolsun :Ronnie Reagan\r\n")?; - /// let mut args = message.args(); - /// assert_eq!(args.len(), 3); - /// assert_eq!(args.next(), Some("guest")); - /// assert_eq!(args.next(), Some("tolmoon")); - /// assert_eq!(args.next(), Some("tolsun")); - /// assert_eq!(args.next(), None); - /// # Ok(()) - /// # } - /// ``` - pub fn args(&self) -> Args { - Args { - remaining: self.args.index(&self.buf), + pub fn to_string(&self) -> String { + let mut ret = String::new(); + if let Some(ref tags) = self.tags { + ret.push('@'); + for tag in tags { + ret.push_str(&tag.0); + if let Some(ref value) = tag.1 { + ret.push('='); + ret.push_str(value); + } + ret.push(';'); + } + ret.pop(); + ret.push(' '); + } + if let Some(ref prefix) = self.prefix { + write!(ret, ":{} ", prefix).unwrap(); } + let cmd: String = From::from(&self.command); + ret.push_str(&cmd); + ret.push_str("\r\n"); + ret } +} - /// Returns the suffix of this message, if one exists. - /// - /// # Examples - /// - /// ``` - /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { - /// use irc_proto::Message; - /// - /// let message = Message::parse("USER guest tolmoon tolsun :Ronnie Reagan\r\n")?; - /// assert_eq!(message.suffix(), Some("Ronnie Reagan")); - /// # Ok(()) - /// # } - pub fn suffix(&self) -> Option<&str> { - self.suffix.map(|part| part.index(&self.buf)) +impl From for Message { + fn from(cmd: Command) -> Message { + Message { + tags: None, + prefix: None, + command: cmd, + } } } impl FromStr for Message { type Err = ProtocolError; - fn from_str(s: &str) -> Result { - Message::parse(s) - .map_err(|err| ProtocolError::InvalidMessage { string: s.to_string(), cause: err }) - } -} - -impl AsRef for Message { - fn as_ref(&self) -> &str { - self.as_str() - } -} + fn from_str(s: &str) -> Result { + if s.is_empty() { + return Err(ProtocolError::InvalidMessage { + string: s.to_owned(), + cause: MessageParseError::EmptyMessage, + }) + } -impl fmt::Display for Message { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.write_str(&self.buf) - } -} + let mut state = s; + + let tags = if state.starts_with('@') { + let tags = state.find(' ').map(|i| &state[1..i]); + state = state.find(' ').map_or("", |i| &state[i + 1..]); + tags.map(|ts| { + ts.split(';') + .filter(|s| !s.is_empty()) + .map(|s: &str| { + let mut iter = s.splitn(2, '='); + let (fst, snd) = (iter.next(), iter.next()); + Tag(fst.unwrap_or("").to_owned(), snd.map(|s| s.to_owned())) + }) + .collect::>() + }) + } else { + None + }; -/// A parser iterator over a message's tags. See [`Message::tags`] for more information. -/// -/// [`Message::tags`]: ./struct.Message.html#method.tags -pub struct Tags<'a> { - remaining: &'a str, -} + let prefix = if state.starts_with(':') { + let prefix = state.find(' ').map(|i| &state[1..i]); + state = state.find(' ').map_or("", |i| &state[i + 1..]); + prefix + } else { + None + }; -impl<'a> Iterator for Tags<'a> { - type Item = (&'a str, Option>); + let line_ending_len = if state.ends_with("\r\n") { + "\r\n" + } else if state.ends_with('\r') { + "\r" + } else if state.ends_with('\n') { + "\n" + } else { + "" + }.len(); - fn next(&mut self) -> Option { - // If remaining is empty, nothing is left to yield. - if self.remaining.len() == 0 { - None + let suffix = if state.contains(" :") { + let suffix = state.find(" :").map(|i| &state[i + 2..state.len() - line_ending_len]); + state = state.find(" :").map_or("", |i| &state[..i + 1]); + suffix } else { - // Take everything from here to next ';'. - let tag = self.remaining - .char_indices() - .find(|&(_i, c)| c == ';') - .map(|(i, _c)| &self.remaining[..i]) - .unwrap_or(&self.remaining); - - // Remove taken data from the remaining buffer. - if self.remaining.len() == tag.len() { - self.remaining = ""; - } else { - self.remaining = &self.remaining[tag.len() + ';'.len_utf8()..]; + state = &state[..state.len() - line_ending_len]; + None + }; + + let command = match state.find(' ').map(|i| &state[..i]) { + Some(cmd) => { + state = state.find(' ').map_or("", |i| &state[i + 1..]); + cmd } - - // If an equal sign exists in the tag data, it must have an associated value. - if let Some(key_end) = tag.find('=') { - // Everything before the first equal sign is the key. - let key = &tag[..key_end]; - - // Everything after the first equal sign is the value. - let mut raw_value = &tag[key_end + '='.len_utf8()..]; - - // Resolve escape sequences if any are found. - // This will not allocate unless data is given to it. - let mut value = String::new(); - while let Some(escape_idx) = raw_value.find('\\') { - // Copy everything before this escape sequence. - value.push_str(&raw_value[..escape_idx]); - // Resolve this escape sequence. - let c = match raw_value[escape_idx + '\\'.len_utf8()..].chars().next() { - Some(':') => Some(';'), - Some('s') => Some(' '), - Some('\\') => Some('\\'), - Some('r') => Some('\r'), - Some('n') => Some('\n'), - Some(c) => Some(c), - None => None, - }; - // If it resolves to a character, then push it. - if let Some(c) = c { - value.push(c); - } - // Cut off the beginning of raw_value such that it only contains - // everything after the parsed escape sequence. - // Upon looping, it will start searching from this point, skipping the last - // escape sequence. - raw_value = &raw_value[ - (escape_idx - + '\\'.len_utf8() - + c.map(char::len_utf8).unwrap_or(0) - ).. - ]; - } + // If there's no arguments but the "command" starts with colon, it's not a command. + None if state.starts_with(':') => return Err(ProtocolError::InvalidMessage { + string: s.to_owned(), + cause: MessageParseError::InvalidCommand, + }), + // If there's no arguments following the command, the rest of the state is the command. + None => { + let cmd = state; + state = ""; + cmd + }, + }; - // If we didn't add data, no escape sequences exist and the raw value can be - // referenced. - if value.len() == 0 { - Some((key, Some(Cow::Borrowed(raw_value)))) - } else { - // Make sure you add the rest of the raw value that doesn't contain escapes. - value.push_str(raw_value); - Some((key, Some(Cow::Owned(value)))) - } - } else { - Some((tag, None)) + let args: Vec<_> = state.splitn(14, ' ').filter(|s| !s.is_empty()).collect(); + + Message::with_tags(tags, prefix, command, args, suffix).map_err(|e| { + ProtocolError::InvalidMessage { + string: s.to_owned(), + cause: e, } - } + }) } } -impl<'a> ExactSizeIterator for Tags<'a> { - fn len(&self) -> usize { - // Number of tags yielded is number of remaining semicolons plus one, unless the - // remaining buffer is empty. - if self.remaining.len() == 0 { - 0 - } else { - self.remaining.chars().filter(|&c| c == ';').count() + 1 - } +impl<'a> From<&'a str> for Message { + fn from(s: &'a str) -> Message { + s.parse().unwrap() } } -/// An iterator over a message's arguments. See [`Message::args`] for more information. -/// -/// [`Message::args`]: ./struct.Message.html#method.args -pub struct Args<'a> { - remaining: &'a str, -} - -impl<'a> Iterator for Args<'a> { - type Item = &'a str; - - fn next(&mut self) -> Option { - // If remaining slice is non-empty, we still have args to take: - if self.remaining.len() > 0 { - // Next arg is everything from here to next whitespace character (or end of string). - let arg_end = self.remaining.find(' ').unwrap_or(self.remaining.len()); - let arg = &self.remaining[..arg_end]; - - // Trim this arg and its trailing spaces out of remaining. - self.remaining = self.remaining[arg_end..].trim_start_matches(' '); - - Some(arg) - } else { - // No more args to parse. - None - } +impl Display for Message { + fn fmt(&self, f: &mut Formatter) -> FmtResult { + write!(f, "{}", self.to_string()) } } -impl<'a> ExactSizeIterator for Args<'a> { - fn len(&self) -> usize { - // Number of args remaining is equal to the number of points where a non-space - // character is preceded by a space character or the beginning of the string. - let mut len = 0; - let mut last = true; - for c in self.remaining.chars() { - let current = c == ' '; - if (last, current) == (true, false) { - len += 1; - } - last = current; - } - len - } -} +/// A message tag as defined by [IRCv3.2](http://ircv3.net/specs/core/message-tags-3.2.html). +/// It consists of a tag key, and an optional value for the tag. Each message can contain a number +/// of tags (in the string format, they are separated by semicolons). Tags are used to add extended +/// information to a message under IRCv3. +#[derive(Clone, PartialEq, Debug)] +pub struct Tag(pub String, pub Option); #[cfg(test)] mod test { - - - - // Legacy tests - // TODO: Adapt to new message/command API - - /* use super::{Message, Tag}; use command::Command::{PRIVMSG, QUIT, Raw}; #[test] - #[ignore] fn new() { let message = Message { tags: None, @@ -595,7 +288,6 @@ mod test { } #[test] - #[ignore] fn source_nickname() { assert_eq!( Message::new(None, "PING", vec![], Some("data")) @@ -655,7 +347,6 @@ mod test { } #[test] - #[ignore] fn to_string() { let message = Message { tags: None, @@ -675,7 +366,6 @@ mod test { } #[test] - #[ignore] fn from_string() { let message = Message { tags: None, @@ -716,7 +406,6 @@ mod test { } #[test] - #[ignore] fn from_string_atypical_endings() { let message = Message { tags: None, @@ -738,7 +427,6 @@ mod test { } #[test] - #[ignore] fn from_and_to_string() { let message = "@aaa=bbb;ccc;example.com/ddd=eee :test!test@test PRIVMSG test :Testing with \ tags!\r\n"; @@ -746,7 +434,6 @@ mod test { } #[test] - #[ignore] fn to_message() { let message = Message { tags: None, @@ -765,7 +452,6 @@ mod test { } #[test] - #[ignore] fn to_message_with_colon_in_arg() { // Apparently, UnrealIRCd (and perhaps some others) send some messages that include // colons within individual parameters. So, let's make sure it parses correctly. @@ -783,7 +469,6 @@ mod test { } #[test] - #[ignore] fn to_message_no_prefix_no_args() { let message = Message { tags: None, @@ -795,10 +480,8 @@ mod test { } #[test] - #[ignore] #[should_panic] fn to_message_invalid_format() { let _: Message = ":invalid :message".into(); } - */ } From a165e9a93106592a5832d64e5c3459b6a5fcbe3b Mon Sep 17 00:00:00 2001 From: Adam Gausmann Date: Tue, 11 Jun 2019 00:10:24 -0500 Subject: [PATCH 11/14] Test fixes --- irc-proto/src/buf.rs | 18 +++++++++--------- irc-proto/src/command.rs | 4 ++-- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/irc-proto/src/buf.rs b/irc-proto/src/buf.rs index fd325f55..85b27fa4 100644 --- a/irc-proto/src/buf.rs +++ b/irc-proto/src/buf.rs @@ -66,7 +66,7 @@ impl MessageBuf { /// /// ``` /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { - /// use irc_proto::MessageBuf; + /// use irc_proto::buf::MessageBuf; /// /// let message = MessageBuf::parse("PRIVMSG #rust :Hello Rustaceans!\r\n")?; /// # Ok(()) @@ -92,7 +92,7 @@ impl MessageBuf { /// /// ``` /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { - /// use irc_proto::MessageBuf; + /// use irc_proto::buf::MessageBuf; /// /// let message = MessageBuf::parse_string("NICK ferris\r\n".to_string())?; /// # Ok(()) @@ -236,7 +236,7 @@ impl MessageBuf { /// /// ``` /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { - /// use irc_proto::MessageBuf; + /// use irc_proto::buf::MessageBuf; /// /// let raw_message = "JOIN #rust\r\n"; /// let parsed_message = MessageBuf::parse(raw_message)?; @@ -253,7 +253,7 @@ impl MessageBuf { /// /// ``` /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { - /// use irc_proto::MessageBuf; + /// use irc_proto::buf::MessageBuf; /// /// let raw_message = "JOIN #rust\r\n"; /// let parsed_message = MessageBuf::parse(raw_message)?; @@ -277,7 +277,7 @@ impl MessageBuf { /// /// ``` /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { - /// use irc_proto::MessageBuf; + /// use irc_proto::buf::MessageBuf; /// use std::borrow::Cow; /// /// let message = MessageBuf::parse( @@ -306,7 +306,7 @@ impl MessageBuf { /// /// ``` /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { - /// use irc_proto::MessageBuf; + /// use irc_proto::buf::MessageBuf; /// /// let message = MessageBuf::parse(":nick!ident@host.com PRIVMSG me :Hello\r\n")?; /// assert_eq!(message.prefix(), Some("nick!ident@host.com")); @@ -323,7 +323,7 @@ impl MessageBuf { /// /// ``` /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { - /// use irc_proto::MessageBuf; + /// use irc_proto::buf::MessageBuf; /// /// let message = MessageBuf::parse("NICK ferris\r\n")?; /// assert_eq!(message.command(), "NICK"); @@ -342,7 +342,7 @@ impl MessageBuf { /// /// ``` /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { - /// use irc_proto::MessageBuf; + /// use irc_proto::buf::MessageBuf; /// /// let message = MessageBuf::parse("USER guest tolmoon tolsun :Ronnie Reagan\r\n")?; /// let mut args = message.args(); @@ -366,7 +366,7 @@ impl MessageBuf { /// /// ``` /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { - /// use irc_proto::MessageBuf; + /// use irc_proto::buf::MessageBuf; /// /// let message = MessageBuf::parse("USER guest tolmoon tolsun :Ronnie Reagan\r\n")?; /// assert_eq!(message.suffix(), Some("Ronnie Reagan")); diff --git a/irc-proto/src/command.rs b/irc-proto/src/command.rs index dd0f1b34..387811ad 100644 --- a/irc-proto/src/command.rs +++ b/irc-proto/src/command.rs @@ -1802,13 +1802,13 @@ mod test { fn user_round_trip() { let cmd = Command::USER("a".to_string(), "b".to_string(), "c".to_string()); let line = Message::from(cmd.clone()).to_string(); - let returned_cmd = line.parse::().unwrap().command().unwrap(); + let returned_cmd = line.parse::().unwrap().command; assert_eq!(cmd, returned_cmd); } #[test] fn parse_user_message() { - let cmd = "USER a 0 * b\r\n".parse::().unwrap().command().unwrap(); + let cmd = "USER a 0 * b".parse::().unwrap().command; assert_eq!(Command::USER("a".to_string(), "0".to_string(), "b".to_string()), cmd); } } From 58fbbe480ad58db9d07826f422d36cf69a43d365 Mon Sep 17 00:00:00 2001 From: Adam Gausmann Date: Wed, 12 Jun 2019 01:40:32 -0500 Subject: [PATCH 12/14] Move to buffered module --- irc-proto/src/{buf.rs => buffered/message.rs} | 60 +++++++++---------- irc-proto/src/buffered/mod.rs | 3 + irc-proto/src/lib.rs | 2 +- 3 files changed, 34 insertions(+), 31 deletions(-) rename irc-proto/src/{buf.rs => buffered/message.rs} (91%) create mode 100644 irc-proto/src/buffered/mod.rs diff --git a/irc-proto/src/buf.rs b/irc-proto/src/buffered/message.rs similarity index 91% rename from irc-proto/src/buf.rs rename to irc-proto/src/buffered/message.rs index 85b27fa4..7ceddf66 100644 --- a/irc-proto/src/buf.rs +++ b/irc-proto/src/buffered/message.rs @@ -32,7 +32,7 @@ pub const MAX_BYTES: usize = u16::max_value() as usize; /// A parsed IRC message string, containing a single buffer with pointers to the individual parts. #[derive(Clone, PartialEq, Debug)] -pub struct MessageBuf { +pub struct Message { buf: String, tags: Option, prefix: Option, @@ -41,7 +41,7 @@ pub struct MessageBuf { suffix: Option, } -impl MessageBuf { +impl Message { /// Parses the message, converting the given object into an owned string. /// /// This will allocate a new `String` to hold the message data, even if a `String` is @@ -66,9 +66,9 @@ impl MessageBuf { /// /// ``` /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { - /// use irc_proto::buf::MessageBuf; + /// use irc_proto::buffered::message::Message; /// - /// let message = MessageBuf::parse("PRIVMSG #rust :Hello Rustaceans!\r\n")?; + /// let message = Message::parse("PRIVMSG #rust :Hello Rustaceans!\r\n")?; /// # Ok(()) /// # } /// ``` @@ -81,7 +81,7 @@ impl MessageBuf { where S: ToString, { - MessageBuf::parse_string(message.to_string()) + Message::parse_string(message.to_string()) } /// Takes ownership of the given string and parses it into a message. @@ -92,9 +92,9 @@ impl MessageBuf { /// /// ``` /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { - /// use irc_proto::buf::MessageBuf; + /// use irc_proto::buffered::message::Message; /// - /// let message = MessageBuf::parse_string("NICK ferris\r\n".to_string())?; + /// let message = Message::parse_string("NICK ferris\r\n".to_string())?; /// # Ok(()) /// # } /// ``` @@ -220,7 +220,7 @@ impl MessageBuf { } // Done parsing. - Ok(MessageBuf { + Ok(Message { buf: message, tags, prefix, @@ -236,10 +236,10 @@ impl MessageBuf { /// /// ``` /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { - /// use irc_proto::buf::MessageBuf; + /// use irc_proto::buffered::message::Message; /// /// let raw_message = "JOIN #rust\r\n"; - /// let parsed_message = MessageBuf::parse(raw_message)?; + /// let parsed_message = Message::parse(raw_message)?; /// assert_eq!(parsed_message.as_str(), raw_message); /// # Ok(()) /// # } @@ -253,10 +253,10 @@ impl MessageBuf { /// /// ``` /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { - /// use irc_proto::buf::MessageBuf; + /// use irc_proto::buffered::message::Message; /// /// let raw_message = "JOIN #rust\r\n"; - /// let parsed_message = MessageBuf::parse(raw_message)?; + /// let parsed_message = Message::parse(raw_message)?; /// assert_eq!(parsed_message.into_string(), raw_message); /// # Ok(()) /// # } @@ -277,10 +277,10 @@ impl MessageBuf { /// /// ``` /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { - /// use irc_proto::buf::MessageBuf; + /// use irc_proto::buffered::message::Message; /// use std::borrow::Cow; /// - /// let message = MessageBuf::parse( + /// let message = Message::parse( /// "@aaa=bbb;ccc;example.com/ddd=eee :nick!ident@host.com PRIVMSG me :Hello\r\n" /// )?; /// @@ -306,9 +306,9 @@ impl MessageBuf { /// /// ``` /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { - /// use irc_proto::buf::MessageBuf; + /// use irc_proto::buffered::message::Message; /// - /// let message = MessageBuf::parse(":nick!ident@host.com PRIVMSG me :Hello\r\n")?; + /// let message = Message::parse(":nick!ident@host.com PRIVMSG me :Hello\r\n")?; /// assert_eq!(message.prefix(), Some("nick!ident@host.com")); /// # Ok(()) /// # } @@ -323,9 +323,9 @@ impl MessageBuf { /// /// ``` /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { - /// use irc_proto::buf::MessageBuf; + /// use irc_proto::buffered::message::Message; /// - /// let message = MessageBuf::parse("NICK ferris\r\n")?; + /// let message = Message::parse("NICK ferris\r\n")?; /// assert_eq!(message.command(), "NICK"); /// # Ok(()) /// # } @@ -342,9 +342,9 @@ impl MessageBuf { /// /// ``` /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { - /// use irc_proto::buf::MessageBuf; + /// use irc_proto::buffered::message::Message; /// - /// let message = MessageBuf::parse("USER guest tolmoon tolsun :Ronnie Reagan\r\n")?; + /// let message = Message::parse("USER guest tolmoon tolsun :Ronnie Reagan\r\n")?; /// let mut args = message.args(); /// assert_eq!(args.len(), 3); /// assert_eq!(args.next(), Some("guest")); @@ -366,9 +366,9 @@ impl MessageBuf { /// /// ``` /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { - /// use irc_proto::buf::MessageBuf; + /// use irc_proto::buffered::message::Message; /// - /// let message = MessageBuf::parse("USER guest tolmoon tolsun :Ronnie Reagan\r\n")?; + /// let message = Message::parse("USER guest tolmoon tolsun :Ronnie Reagan\r\n")?; /// assert_eq!(message.suffix(), Some("Ronnie Reagan")); /// # Ok(()) /// # } @@ -377,30 +377,30 @@ impl MessageBuf { } } -impl FromStr for MessageBuf { +impl FromStr for Message { type Err = ProtocolError; fn from_str(s: &str) -> Result { - MessageBuf::parse(s) + Message::parse(s) .map_err(|err| ProtocolError::InvalidMessage { string: s.to_string(), cause: err }) } } -impl AsRef for MessageBuf { +impl AsRef for Message { fn as_ref(&self) -> &str { self.as_str() } } -impl fmt::Display for MessageBuf { +impl fmt::Display for Message { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.write_str(&self.buf) } } -/// A parser iterator over a message's tags. See [`MessageBuf::tags`] for more information. +/// A parser iterator over a message's tags. See [`Message::tags`] for more information. /// -/// [`MessageBuf::tags`]: ./struct.MessageBuf.html#method.tags +/// [`Message::tags`]: ./struct.Message.html#method.tags pub struct Tags<'a> { remaining: &'a str, } @@ -495,9 +495,9 @@ impl<'a> ExactSizeIterator for Tags<'a> { } } -/// An iterator over a message's arguments. See [`MessageBuf::args`] for more information. +/// An iterator over a message's arguments. See [`Message::args`] for more information. /// -/// [`MessageBuf::args`]: ./struct.MessageBuf.html#method.args +/// [`Message::args`]: ./struct.Message.html#method.args pub struct Args<'a> { remaining: &'a str, } diff --git a/irc-proto/src/buffered/mod.rs b/irc-proto/src/buffered/mod.rs new file mode 100644 index 00000000..889c0b8d --- /dev/null +++ b/irc-proto/src/buffered/mod.rs @@ -0,0 +1,3 @@ +//! New zero-copy/buffered API. + +pub mod message; diff --git a/irc-proto/src/lib.rs b/irc-proto/src/lib.rs index 2ff12b5c..e44976cf 100644 --- a/irc-proto/src/lib.rs +++ b/irc-proto/src/lib.rs @@ -12,7 +12,7 @@ extern crate tokio_codec; #[cfg(feature = "tokio")] extern crate tokio_io; -pub mod buf; +pub mod buffered; pub mod caps; pub mod chan; pub mod colors; From cf97aa7d2ee0c77a2351bf03d0ce9506b8aeace7 Mon Sep 17 00:00:00 2001 From: Adam Gausmann Date: Wed, 12 Jun 2019 02:13:15 -0500 Subject: [PATCH 13/14] Add prefix parsing --- irc-proto/src/buffered/message.rs | 109 +++++++++++++++++++++++++++++- 1 file changed, 106 insertions(+), 3 deletions(-) diff --git a/irc-proto/src/buffered/message.rs b/irc-proto/src/buffered/message.rs index 7ceddf66..33d4a98b 100644 --- a/irc-proto/src/buffered/message.rs +++ b/irc-proto/src/buffered/message.rs @@ -36,6 +36,9 @@ pub struct Message { buf: String, tags: Option, prefix: Option, + sender_name: Option, + sender_user: Option, + sender_host: Option, command: Part, args: Part, suffix: Option, @@ -138,17 +141,63 @@ impl Message { // If word starts with ':', it is a prefix. let prefix; + let sender_name; + let sender_user; + let sender_host; if message[i..].starts_with(':') { // Take everything between ':' and next space. i += ':'.len_utf8(); - let start = i; + let prefix_start = i; i += message[i..].find(' ').unwrap_or_else(|| crlf - i); - let end = i; + let prefix_end = i; + + prefix = Some(Part::new(prefix_start, prefix_end)); + + let prefix_str = &message[prefix_start..prefix_end]; + + if let Some(at_idx) = prefix_str.find('@') { + // ...@host + let host_start = prefix_start + at_idx + '@'.len_utf8(); + let host_end = prefix_end; + + sender_host = Some(Part::new(host_start, host_end)); + + if let Some(exclam_idx) = prefix_str[..at_idx].find('!') { + // name!user@host + + let name_start = prefix_start; + let name_end = prefix_start + exclam_idx; + + sender_name = Some(Part::new(name_start, name_end)); + + let user_start = prefix_start + exclam_idx + '!'.len_utf8(); + let user_end = prefix_start + at_idx; + + sender_user = Some(Part::new(user_start, user_end)); + } else { + // name@host + let name_start = prefix_start; + let name_end = prefix_start + at_idx; + + sender_name = Some(Part::new(name_start, name_end)); + sender_user = None; + } + } else { + // name only + + let name_start = prefix_start; + let name_end = prefix_end; - prefix = Some(Part::new(start, end)); + sender_name = Some(Part::new(name_start, name_end)); + sender_user = None; + sender_host = None; + } } else { prefix = None; + sender_name = None; + sender_user = None; + sender_host = None; } // Skip to next non-space. @@ -224,6 +273,9 @@ impl Message { buf: message, tags, prefix, + sender_name, + sender_user, + sender_host, command, args, suffix, @@ -317,6 +369,57 @@ impl Message { self.prefix.as_ref().map(|part| part.index(&self.buf)) } + /// Returns a string slice containing the message's sender if it was provided. It may be either + /// a server name or a nickname. + /// + /// # Examples + /// + /// ``` + /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { + /// use irc_proto::buffered::message::Message; + /// + /// let message = Message::parse(":nick!ident@host.com PRIVMSG me :Hello\r\n")?; + /// assert_eq!(message.sender_name(), Some("nick")); + /// # Ok(()) + /// # } + /// ``` + pub fn sender_name(&self) -> Option<&str> { + self.sender_name.as_ref().map(|part| part.index(&self.buf)) + } + + /// Returns a string slice containing the username of the message's sender if it was provided. + /// + /// # Examples + /// + /// ``` + /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { + /// use irc_proto::buffered::message::Message; + /// + /// let message = Message::parse(":nick!ident@host.com PRIVMSG me :Hello\r\n")?; + /// assert_eq!(message.sender_user(), Some("ident")); + /// # Ok(()) + /// # } + /// ``` + pub fn sender_user(&self) -> Option<&str> { + self.sender_user.as_ref().map(|part| part.index(&self.buf)) + } + + /// Returns a string slice containing the hostname of the message's sender if it was provided. + /// + /// # Examples + /// + /// ``` + /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { + /// use irc_proto::buffered::message::Message; + /// + /// let message = Message::parse(":nick!ident@host.com PRIVMSG me :Hello\r\n")?; + /// assert_eq!(message.sender_host(), Some("host.com")); + /// # Ok(()) + /// # } + pub fn sender_host(&self) -> Option<&str> { + self.sender_host.as_ref().map(|part| part.index(&self.buf)) + } + /// Returns a string slice containing the message's command. /// /// # Examples From aa4f08a85253f5d21fd1a92921f058b4e3dc0d40 Mon Sep 17 00:00:00 2001 From: Adam Gausmann Date: Wed, 12 Jun 2019 02:15:24 -0500 Subject: [PATCH 14/14] Reformat --- irc-proto/src/buffered/message.rs | 36 ++++++++++++++++--------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/irc-proto/src/buffered/message.rs b/irc-proto/src/buffered/message.rs index 33d4a98b..2a9d17a7 100644 --- a/irc-proto/src/buffered/message.rs +++ b/irc-proto/src/buffered/message.rs @@ -6,7 +6,6 @@ use std::str::FromStr; use error::{MessageParseError, ProtocolError}; - #[derive(Debug, PartialEq, Eq, Clone, Copy)] struct Part { start: u16, @@ -160,9 +159,9 @@ impl Message { // ...@host let host_start = prefix_start + at_idx + '@'.len_utf8(); let host_end = prefix_end; - + sender_host = Some(Part::new(host_start, host_end)); - + if let Some(exclam_idx) = prefix_str[..at_idx].find('!') { // name!user@host @@ -323,7 +322,7 @@ impl Message { /// stored in an owned buffer. /// /// This parser will not dedupe tags, nor will it check whether the tag's key is empty or - /// whether it contains illegal characters. + /// whether it contains illegal characters. /// /// # Examples /// @@ -348,7 +347,11 @@ impl Message { /// ``` pub fn tags(&self) -> Tags { Tags { - remaining: self.tags.as_ref().map(|part| part.index(&self.buf)).unwrap_or(""), + remaining: self + .tags + .as_ref() + .map(|part| part.index(&self.buf)) + .unwrap_or(""), } } @@ -437,7 +440,7 @@ impl Message { self.command.index(&self.buf) } - /// Returns a parser iterator over the message's arguments. The iterator will produce items of + /// Returns a parser iterator over the message's arguments. The iterator will produce items of /// `&str` for each argument in order, containing the raw data in the argument. It is entirely /// zero-copy, borrowing each argument slice directly from the message buffer. /// @@ -470,7 +473,7 @@ impl Message { /// ``` /// # fn main() -> Result<(), irc_proto::error::MessageParseError> { /// use irc_proto::buffered::message::Message; - /// + /// /// let message = Message::parse("USER guest tolmoon tolsun :Ronnie Reagan\r\n")?; /// assert_eq!(message.suffix(), Some("Ronnie Reagan")); /// # Ok(()) @@ -484,8 +487,10 @@ impl FromStr for Message { type Err = ProtocolError; fn from_str(s: &str) -> Result { - Message::parse(s) - .map_err(|err| ProtocolError::InvalidMessage { string: s.to_string(), cause: err }) + Message::parse(s).map_err(|err| ProtocolError::InvalidMessage { + string: s.to_string(), + cause: err, + }) } } @@ -517,7 +522,8 @@ impl<'a> Iterator for Tags<'a> { None } else { // Take everything from here to next ';'. - let tag = self.remaining + let tag = self + .remaining .char_indices() .find(|&(_i, c)| c == ';') .map(|(i, _c)| &self.remaining[..i]) @@ -529,7 +535,7 @@ impl<'a> Iterator for Tags<'a> { } else { self.remaining = &self.remaining[tag.len() + ';'.len_utf8()..]; } - + // If an equal sign exists in the tag data, it must have an associated value. if let Some(key_end) = tag.find('=') { // Everything before the first equal sign is the key. @@ -562,12 +568,8 @@ impl<'a> Iterator for Tags<'a> { // everything after the parsed escape sequence. // Upon looping, it will start searching from this point, skipping the last // escape sequence. - raw_value = &raw_value[ - (escape_idx - + '\\'.len_utf8() - + c.map(char::len_utf8).unwrap_or(0) - ).. - ]; + raw_value = &raw_value + [(escape_idx + '\\'.len_utf8() + c.map(char::len_utf8).unwrap_or(0))..]; } // If we didn't add data, no escape sequences exist and the raw value can be