Source code for antu.nn.dynet.initializer

import numpy as np

[docs]def orthonormal_initializer(output_size, input_size): """ adopted from Timothy Dozat https://github.com/tdozat/Parser/blob/master/lib/linalg.py """ print (output_size, input_size) I = np.eye(output_size) lr = .1 eps = .05/(output_size + input_size) success = False tries = 0 while not success and tries < 10: Q = np.random.randn(input_size, output_size) / np.sqrt(output_size) for i in xrange(100): QTQmI = Q.T.dot(Q) - I loss = np.sum(QTQmI**2 / 2) Q2 = Q**2 Q -= lr*Q.dot(QTQmI) / (np.abs(Q2 + Q2.sum(axis=0, keepdims=True) + Q2.sum(axis=1, keepdims=True) - 1) + eps) if np.max(Q) > 1e6 or loss > 1e6 or not np.isfinite(loss): tries += 1 lr /= 2 break success = True if success: print('Orthogonal pretrainer loss: %.2e' % loss) else: print('Orthogonal pretrainer failed, using non-orthogonal random matrix') Q = np.random.randn(input_size, output_size) / np.sqrt(output_size) return np.transpose(Q.astype(np.float32))