An Example for Tensorflow Caculate Attention(Softmax) in Variable Sequence Length

import tensorflow as tf
import numpy as np

#a is atttention value by softmax on batch_size * time_step * input_size
a = np.array([[[0.2],[0.3],[0.4],[0.1]],[[0.3],[0.1],[0.2],[0.4]]])
#l is the valid sequence length,size is [batch_size], the first is 2, and the second is 3
#so,[[[0.2],[0.3],[0.4],[0.1]],[[0.3],[0.1],[0.2],[0.4]]], the bold value is invalid,
l = np.array([2,3])

aa = tf.convert_to_tensor(a,dtype=tf.float32)
ll = tf.convert_to_tensor(l,dtype=tf.float32)

def realAttention(attention,length):
    batch_size = tf.shape(attention)[0]
    max_length = tf.shape(attention)[1]
    mask = tf.sequence_mask(length, max_length, tf.float32)
    mask = tf.reshape(mask,[-1, max_length, 1])
    a = attention * mask
    a = a / tf.reduce_sum(a,axis=1,keep_dims=True)
    return a

real_attention = realAttention(aa,ll)
init = tf.global_variables_initializer() 
init_local = tf.local_variables_initializer()

with tf.Session() as sess:
    sess.run([init, init_local])
    print(sess.run(real_attention))

The result is:

[[[0.4       ]
  [0.6       ]
  [0.        ]
  [0.        ]]

 [[0.5       ]
  [0.16666666]
  [0.3333333 ]
  [0.        ]]]

,