burn::nn::attention

Struct MultiHeadAttentionConfig

Help

pub struct MultiHeadAttentionConfig {
    pub d_model: usize,
    pub n_heads: usize,
    pub dropout: f64,
    pub min_float: f64,
    pub quiet_softmax: bool,
    pub initializer: Initializer,
}

Expand description

Configuration to create a Multi Head Attention layer using the init function.

Fields§

§d_model: usize

The size of each linear layer.

§n_heads: usize

The number of heads.

§dropout: f64

The dropout rate. Default: 0.1

§min_float: f64

The minimum value a float can take. Default: -1.0e4 This is used to mask attention scores before calculating attention weights. A value too low might result in NaN.

§quiet_softmax: bool

Use “quiet softmax” instead of regular softmax.

Usage may improve performance by allowing attention heads to deposit no information (if the sequence contains no information relevant to that head).
Usage may reduce the entropy of weights in the model, enhancing quantization and compression.

Reference: https://www.evanmiller.org/attention-is-off-by-one.html

§initializer: Initializer

The type of function used to initialize neural network parameters

Implementations§

§

impl MultiHeadAttentionConfig

pub fn new(d_model: usize, n_heads: usize) -> MultiHeadAttentionConfig

Create a new instance of the config.

§

impl MultiHeadAttentionConfig

pub fn with_dropout(self, dropout: f64) -> MultiHeadAttentionConfig

The dropout rate. Default: 0.1

pub fn with_min_float(self, min_float: f64) -> MultiHeadAttentionConfig

The minimum value a float can take. Default: -1.0e4

pub fn with_quiet_softmax(self, quiet_softmax: bool) -> MultiHeadAttentionConfig

Use “quiet softmax” instead of regular softmax.

pub fn with_initializer( self, initializer: Initializer, ) -> MultiHeadAttentionConfig

The type of function used to initialize neural network parameters

§

impl MultiHeadAttentionConfig

pub fn init(&self, device: &::Device) -> MultiHeadAttention
where B: Backend,

Initialize a new multihead attention module.

Trait Implementations§

§

impl Clone for MultiHeadAttentionConfig

§

fn clone(&self) -> MultiHeadAttentionConfig

Returns a copy of the value. Read more

1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more

§

impl Config for MultiHeadAttentionConfig

§

fn save(&self, file: P) -> Result<(), Error>
where P: AsRef<Path>,

Saves the configuration to a file. Read more

§

fn load(file: P) -> Result<Self, ConfigError>
where P: AsRef<Path>,

Loads the configuration from a file. Read more

§

fn load_binary(data: &[u8]) -> Result<Self, ConfigError>

Loads the configuration from a binary buffer. Read more

§

impl<'de> Deserialize<'de> for MultiHeadAttentionConfig

§

fn deserialize<D>( deserializer: D, ) -> Result<MultiHeadAttentionConfig, <D as Deserializer<'de>>::Error>
where D: Deserializer<'de>,

Deserialize this value from the given Serde deserializer. Read more

§

impl Display for MultiHeadAttentionConfig

§

fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>

Formats the value using the given formatter. Read more

§

impl Serialize for MultiHeadAttentionConfig

§

fn serialize<S>( &self, serializer: S, ) -> Result<<S as Serializer>::Ok, <S as Serializer>::Error>
where S: Serializer,

Serialize this value into the given Serde serializer. Read more

Auto Trait Implementations§

§

impl UnwindSafe for MultiHeadAttentionConfig

Blanket Implementations§

§

impl<T> Adaptor<()> for T

§

fn adapt(&self)

Adapt the type to be passed to a metric.

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> CloneToUninit for T
where T: Clone,

Source §

unsafe fn clone_to_uninit(&self, dst: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)

Performs copy-assignment from self to dst. Read more

§

impl<T> Downcast<T> for T

§

fn downcast(&self) -> &T

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

§

impl<T> FromRef<T> for T
where T: Clone,

§

fn from_ref(input: &T) -> T

Converts to this type from a reference to the input type.

§

impl<T> Instrument for T

§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided [Span], returning an Instrumented wrapper. Read more

§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T> IntoEither for T

Source §

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more

Source §

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more

§