Struct burn::nn::attention::MultiHeadAttentionConfig
pub struct MultiHeadAttentionConfig {
pub d_model: usize,
pub n_heads: usize,
pub dropout: f64,
pub min_float: f64,
pub quiet_softmax: bool,
pub initializer: Initializer,
}
Expand description
Configuration to create a Multi Head Attention layer using the init function.
Fields§
§d_model: usize
The size of each linear layer.
n_heads: usize
The number of heads.
dropout: f64
The dropout rate. Default: 0.1
min_float: f64
The minimum value a float can take. Default: -1.0e4 This is used to mask attention scores before calculating attention weights. A value too low might result in NaN.
quiet_softmax: bool
Use “quiet softmax” instead of regular softmax.
- Usage may improve performance by allowing attention heads to deposit no information (if the sequence contains no information relevant to that head).
- Usage may reduce the entropy of weights in the model, enhancing quantization and compression.
Reference: https://www.evanmiller.org/attention-is-off-by-one.html
initializer: Initializer
The type of function used to initialize neural network parameters
Implementations§
§impl MultiHeadAttentionConfig
impl MultiHeadAttentionConfig
pub fn new(d_model: usize, n_heads: usize) -> MultiHeadAttentionConfig
pub fn new(d_model: usize, n_heads: usize) -> MultiHeadAttentionConfig
Create a new instance of the config.
§impl MultiHeadAttentionConfig
impl MultiHeadAttentionConfig
pub fn with_dropout(self, dropout: f64) -> MultiHeadAttentionConfig
pub fn with_dropout(self, dropout: f64) -> MultiHeadAttentionConfig
The dropout rate. Default: 0.1
pub fn with_min_float(self, min_float: f64) -> MultiHeadAttentionConfig
pub fn with_min_float(self, min_float: f64) -> MultiHeadAttentionConfig
The minimum value a float can take. Default: -1.0e4
pub fn with_quiet_softmax(self, quiet_softmax: bool) -> MultiHeadAttentionConfig
pub fn with_quiet_softmax(self, quiet_softmax: bool) -> MultiHeadAttentionConfig
Use “quiet softmax” instead of regular softmax.
pub fn with_initializer(
self,
initializer: Initializer,
) -> MultiHeadAttentionConfig
pub fn with_initializer( self, initializer: Initializer, ) -> MultiHeadAttentionConfig
The type of function used to initialize neural network parameters
§impl MultiHeadAttentionConfig
impl MultiHeadAttentionConfig
pub fn init<B>(&self, device: &<B as Backend>::Device) -> MultiHeadAttention<B>where
B: Backend,
pub fn init<B>(&self, device: &<B as Backend>::Device) -> MultiHeadAttention<B>where
B: Backend,
Initialize a new multihead attention module.
Trait Implementations§
§impl Clone for MultiHeadAttentionConfig
impl Clone for MultiHeadAttentionConfig
§fn clone(&self) -> MultiHeadAttentionConfig
fn clone(&self) -> MultiHeadAttentionConfig
1.0.0 · source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source
. Read more§impl Config for MultiHeadAttentionConfig
impl Config for MultiHeadAttentionConfig
§impl<'de> Deserialize<'de> for MultiHeadAttentionConfig
impl<'de> Deserialize<'de> for MultiHeadAttentionConfig
§fn deserialize<D>(
deserializer: D,
) -> Result<MultiHeadAttentionConfig, <D as Deserializer<'de>>::Error>where
D: Deserializer<'de>,
fn deserialize<D>(
deserializer: D,
) -> Result<MultiHeadAttentionConfig, <D as Deserializer<'de>>::Error>where
D: Deserializer<'de>,
§impl Display for MultiHeadAttentionConfig
impl Display for MultiHeadAttentionConfig
§impl Serialize for MultiHeadAttentionConfig
impl Serialize for MultiHeadAttentionConfig
§fn serialize<S>(
&self,
serializer: S,
) -> Result<<S as Serializer>::Ok, <S as Serializer>::Error>where
S: Serializer,
fn serialize<S>(
&self,
serializer: S,
) -> Result<<S as Serializer>::Ok, <S as Serializer>::Error>where
S: Serializer,
Auto Trait Implementations§
impl Freeze for MultiHeadAttentionConfig
impl RefUnwindSafe for MultiHeadAttentionConfig
impl Send for MultiHeadAttentionConfig
impl Sync for MultiHeadAttentionConfig
impl Unpin for MultiHeadAttentionConfig
impl UnwindSafe for MultiHeadAttentionConfig
Blanket Implementations§
source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
source§unsafe fn clone_to_uninit(&self, dst: *mut T)
unsafe fn clone_to_uninit(&self, dst: *mut T)
clone_to_uninit
)§impl<T> Instrument for T
impl<T> Instrument for T
§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
source§impl<T> IntoEither for T
impl<T> IntoEither for T
source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self
into a Left
variant of Either<Self, Self>
if into_left
is true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read moresource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self
into a Left
variant of Either<Self, Self>
if into_left(&self)
returns true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read more§impl<T> Pointable for T
impl<T> Pointable for T
§impl<T> ToCompactString for Twhere
T: Display,
impl<T> ToCompactString for Twhere
T: Display,
§fn try_to_compact_string(&self) -> Result<CompactString, ToCompactStringError>
fn try_to_compact_string(&self) -> Result<CompactString, ToCompactStringError>
ToCompactString::to_compact_string()
] Read more§fn to_compact_string(&self) -> CompactString
fn to_compact_string(&self) -> CompactString
CompactString
]. Read more