import numpy as np
[docs]def orthonormal_initializer(output_size, input_size):
"""
adopted from Timothy Dozat https://github.com/tdozat/Parser/blob/master/lib/linalg.py
"""
print (output_size, input_size)
I = np.eye(output_size)
lr = .1
eps = .05/(output_size + input_size)
success = False
tries = 0
while not success and tries < 10:
Q = np.random.randn(input_size, output_size) / np.sqrt(output_size)
for i in xrange(100):
QTQmI = Q.T.dot(Q) - I
loss = np.sum(QTQmI**2 / 2)
Q2 = Q**2
Q -= lr*Q.dot(QTQmI) / (np.abs(Q2 + Q2.sum(axis=0, keepdims=True) + Q2.sum(axis=1, keepdims=True) - 1) + eps)
if np.max(Q) > 1e6 or loss > 1e6 or not np.isfinite(loss):
tries += 1
lr /= 2
break
success = True
if success:
print('Orthogonal pretrainer loss: %.2e' % loss)
else:
print('Orthogonal pretrainer failed, using non-orthogonal random matrix')
Q = np.random.randn(input_size, output_size) / np.sqrt(output_size)
return np.transpose(Q.astype(np.float32))