This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sa_v1 = SelfAttention_v1(d_in, d_out) | |
sa_v2 = SelfAttention_v2(d_in, d_out) | |
# Transfer weights from sa_v2 to sa_v1 | |
with torch.no_grad(): | |
sa_v1.W_query.copy_(sa_v2.W_query.weight.T) | |
sa_v1.W_key.copy_(sa_v2.W_key.weight.T) | |
sa_v1.W_value.copy_(sa_v2.W_value.weight.T) | |
x = torch.randn(10, d_in) # Batch size of 10 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# <type>(<scope>): <subject> | |
# | |
# <body> | |
# | |
# <footer> | |
# | |
# Types: | |
# feat (new feature) | |
# fix (bug fix) | |
# docs (changes to documentation) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def getPositionalEncoding(seq_len, d=4, n=10000): | |
PE = np.zeros((seq_len, d)) | |
for i in range(d // 2): | |
denominator = np.power(n, 2 * i / d) | |
PE[:, 2 * i] = np.sin(np.arange(seq_len) / denominator) | |
PE[:, 2 * i + 1] = np.cos(np.arange(seq_len) / denominator) | |
return PE | |
seq_len = 2 | |
PE = getPositionalEncoding(seq_len=2) |