shackenberg · December 22, 2015 05:19
diff --git a/minimal.rnn.theano.py b/minimal.rnn.theano.py
 import numpy
 import theano
 import theano.tensor as TT

 # number of hidden units
 n = 50
 # number of input units
 nin = 5
 # number of output units
 nout = 5

 # input (where first dimension is time)
 u = TT.matrix()
 # target (where first dimension is time)
 t = TT.matrix()
 # initial hidden state of the RNN
 h0 = TT.vector()
 # learning rate
 lr = TT.scalar()
 # recurrent weights as a shared variable
 W = theano.shared(numpy.random.uniform(size=(n, n), low=-.01, high=.01))
 # input to hidden layer weights
 W_in = theano.shared(numpy.random.uniform(size=(nin, n), low=-.01, high=.01))
 # hidden to output layer weights
 W_out = theano.shared(numpy.random.uniform(size=(n, nout), low=-.01, high=.01))


 # recurrent function (using tanh activation function) and linear output
 # activation function
 def step(u_t, h_tm1, W, W_in, W_out):
    h_t = TT.tanh(TT.dot(u_t, W_in) + TT.dot(h_tm1, W))
    y_t = TT.dot(h_t, W_out)
    return h_t, y_t

 # the hidden state `h` for the entire sequence, and the output for the
 # entrie sequence `y` (first dimension is always time)
 [h, y], _ = theano.scan(step,
                        sequences=u,
                        outputs_info=[h0, None],
                        non_sequences=[W, W_in, W_out])
 # error between output and target
 error = ((y - t) ** 2).sum()
 # gradients on the weights using BPTT
 gW, gW_in, gW_out = TT.grad(error, [W, W_in, W_out])
 # training function, that computes the error and updates the weights using
 # SGD.
 fn = theano.function([h0, u, t, lr],
                     error,
                     updates={W: W - lr * gW,
                             W_in: W_in - lr * gW_in,
                             W_out: W_out - lr * gW_out},
                             profile=True)

 ntime = 1000
 data = numpy.random.uniform(size=(ntime, nin))
 targets = numpy.round(numpy.random.uniform(size=(ntime, nout)))
 init_state = numpy.zeros(n)
 learning_rate = 0.1
 result = fn(init_state, data,targets, learning_rate)
diff --git a/profile.report.txt b/profile.report.txt
 Function profiling
 ==================
  Message: None
  Time in 1 calls to Function.__call__: 7.018185e-02s
  Time in Function.fn.__call__: 7.013297e-02s (99.930%)
  Time in thunks: 7.007813e-02s (99.852%)
  Total compile time: 4.485335e+00s
    Theano Optimizer time: 2.545068e+00s
       Theano validate time: 2.463958e-01s
    Theano Linker time (includes C, CUDA code generation/compiling): 3.004949e-01s

 Class
 ---
 <% time> <sum %> <apply time> <time per call> <type> <#call> <#apply> <Class name>
  98.3%    98.3%       0.069s       3.44e-02s     Py       2        2   <class 'theano.scan_module.scan_op.Scan'>
   0.5%    98.8%       0.000s       3.68e-04s     C        1        1   <class 'theano.tensor.blas.Gemm'>
   0.5%    99.3%       0.000s       1.01e-05s     C       35       35   <class 'theano.tensor.elemwise.Elemwise'>
   0.3%    99.6%       0.000s       6.64e-05s     C        3        3   <class 'theano.tensor.basic.Alloc'>
   0.3%    99.9%       0.000s       1.90e-04s     C        1        1   <class 'theano.tensor.subtensor.IncSubtensor'>
   0.0%    99.9%       0.000s       2.86e-06s     C        8        8   <class 'theano.tensor.subtensor.Subtensor'>
   0.0%    99.9%       0.000s       1.77e-06s     C        9        9   <class 'theano.tensor.elemwise.DimShuffle'>
   0.0%   100.0%       0.000s       9.06e-06s     Py       1        1   <class 'theano.tensor.basic.Rebroadcast'>
   0.0%   100.0%       0.000s       9.06e-06s     C        1        1   <class 'theano.tensor.elemwise.Sum'>
   0.0%   100.0%       0.000s       9.88e-07s     C        7        7   <class 'theano.tensor.opt.Shape_i'>
   0.0%   100.0%       0.000s       7.68e-07s     C        9        9   <class 'theano.tensor.basic.ScalarFromTensor'>
   0.0%   100.0%       0.000s       2.03e-06s     C        2        2   <class 'theano.tensor.opt.MakeVector'>
   0.0%   100.0%       0.000s       4.77e-07s     C        2        2   <class 'theano.tensor.basic.Reshape'>
   ... (remaining 0 Classes account for   0.00%(0.00s) of the runtime)

 Ops
 ---
 <% time> <sum %> <apply time> <time per call> <type> <#call> <#apply> <Op name>
  73.3%    73.3%       0.051s       5.14e-02s     Py       1        1   forall_inplace,cpu,grad_of_scan_fn}
  25.0%    98.3%       0.018s       1.75e-02s     Py       1        1   forall_inplace,cpu,scan_fn}
   0.5%    98.8%       0.000s       3.68e-04s     C        1        1   Gemm{inplace}
   0.3%    99.1%       0.000s       2.15e-04s     C        1        1   Elemwise{Composite{[sub(i0, sqr(i1))]}}
   0.3%    99.4%       0.000s       6.64e-05s     C        3        3   Alloc
   0.3%    99.7%       0.000s       1.90e-04s     C        1        1   IncSubtensor{Set;:int64:}
   0.0%    99.7%       0.000s       2.91e-05s     C        1        1   Elemwise{mul,no_inplace}
   0.0%    99.8%       0.000s       7.51e-06s     C        2        2   Elemwise{Composite{[sub(i0, mul(i1, i2))]}}[(0, 0)]
   0.0%    99.8%       0.000s       9.06e-06s     C        1        1   Sum
   0.0%    99.8%       0.000s       9.06e-06s     Py       1        1   Rebroadcast{0}
   0.0%    99.8%       0.000s       4.41e-06s     C        2        2   Subtensor{int64:int64:int64}
   0.0%    99.8%       0.000s       8.11e-06s     C        1        1   Elemwise{Sub{output_types_preference=transfer_type{0}}}[(0, 0)]
   0.0%    99.8%       0.000s       7.15e-06s     C        1        1   Elemwise{Composite{[Composite{[Composite{[Composite{[Switch(i0, i1, Sw
   0.0%    99.8%       0.000s       7.68e-07s     C        9        9   ScalarFromTensor
   0.0%    99.8%       0.000s       3.46e-06s     C        2        2   Elemwise{Composite{[Composite{[Composite{[Composite{[Composite{[Compos
   0.0%    99.8%       0.000s       3.46e-06s     C        2        2   Elemwise{Composite{[Composite{[Composite{[Switch(i0, i1, maximum(i2, i
   0.0%    99.9%       0.000s       3.46e-06s     C        2        2   Elemwise{Composite{[Composite{[Composite{[Composite{[Composite{[Compos
   0.0%    99.9%       0.000s       6.91e-06s     C        1        1   Elemwise{Sqr{output_types_preference=transfer_type{0}}}[(0, 0)]
   0.0%    99.9%       0.000s       5.96e-06s     C        1        1   Elemwise{Composite{[Switch(i0, i1, minimum(i2, i3))]}}
   0.0%    99.9%       0.000s       2.62e-06s     C        2        2   Subtensor{::-1}
   ... (remaining 28 Ops account for   0.12%(0.00s) of the runtime)

 Apply
 ------
 <% time> <sum %> <apply time> <time per call> <#call> <id> <Apply name>
  73.3%    73.3%       0.051s       5.14e-02s      1    71 forall_inplace,cpu,grad_of_scan_fn}(Shape_i{0}.0, InplaceDimShuffle{0,x,1}.0, Ele
  25.0%    98.3%       0.018s       1.75e-02s      1    57 forall_inplace,cpu,scan_fn}(Shape_i{0}.0, Subtensor{int64:int64:int8}.0, IncSubte
   0.5%    98.8%       0.000s       3.68e-04s      1    80 Gemm{inplace}(<TensorType(float64, matrix)>, Elemwise{neg,no_inplace}.0, Reshape{
   0.3%    99.1%       0.000s       2.15e-04s      1    63 Elemwise{Composite{[sub(i0, sqr(i1))]}}(TensorConstant{(1, 1) of 1.0}, Subtensor{
   0.3%    99.4%       0.000s       1.91e-04s      1    18 Alloc(TensorConstant{0.0}, Elemwise{add,no_inplace}.0, Shape_i{0}.0)
   0.3%    99.7%       0.000s       1.90e-04s      1    22 IncSubtensor{Set;:int64:}(Alloc.0, Rebroadcast{0}.0, Constant{1})
   0.0%    99.7%       0.000s       2.91e-05s      1    65 Elemwise{mul,no_inplace}(TensorConstant{(1, 1) of 2.0}, Subtensor{::-1}.0)
   0.0%    99.7%       0.000s       1.31e-05s      1    77 Elemwise{Composite{[sub(i0, mul(i1, i2))]}}[(0, 0)](<TensorType(float64, matrix)>
   0.0%    99.8%       0.000s       9.06e-06s      1    70 Sum(Elemwise{Sqr{output_types_preference=transfer_type{0}}}[(0, 0)].0)
   0.0%    99.8%       0.000s       9.06e-06s      1    17 Rebroadcast{0}(InplaceDimShuffle{x,0}.0)
   0.0%    99.8%       0.000s       8.11e-06s      1    58 Elemwise{Sub{output_types_preference=transfer_type{0}}}[(0, 0)](forall_inplace,cp
   0.0%    99.8%       0.000s       7.87e-06s      1    59 Subtensor{int64:int64:int64}(forall_inplace,cpu,scan_fn}.0, ScalarFromTensor.0, S
   0.0%    99.8%       0.000s       7.15e-06s      1    45 Elemwise{Composite{[Composite{[Composite{[Composite{[Switch(i0, i1, Switch(i2, i3
   0.0%    99.8%       0.000s       6.91e-06s      1    67 Elemwise{Sqr{output_types_preference=transfer_type{0}}}[(0, 0)](Elemwise{Sub{outp
   0.0%    99.8%       0.000s       5.96e-06s      1    33 Elemwise{Composite{[Composite{[Composite{[Composite{[Composite{[Composite{[add(Ca
   0.0%    99.8%       0.000s       5.96e-06s      1    44 Elemwise{Composite{[Composite{[Composite{[Switch(i0, i1, maximum(i2, i3))]}(i0, i
   0.0%    99.8%       0.000s       5.96e-06s      1    28 Elemwise{Composite{[Composite{[Composite{[Composite{[Composite{[Composite{[Switch
   0.0%    99.8%       0.000s       5.96e-06s      1    29 Elemwise{Composite{[Switch(i0, i1, minimum(i2, i3))]}}(Elemwise{le,no_inplace}.0,
   0.0%    99.9%       0.000s       5.01e-06s      1    48 Elemwise{Composite{[Switch(i0, i1, minimum(i2, i3))]}}[(0, 2)](Elemwise{le,no_inp
   0.0%    99.9%       0.000s       5.01e-06s      1    40 Alloc(TensorConstant{0.0}, Elemwise{Composite{[Composite{[Composite{[Composite{[C
   ... (remaining 61 Apply instances account for 0.14%(0.00s) of the runtime)
	import numpy
	import theano
	import theano.tensor as TT

	# number of hidden units
	n = 50
	# number of input units
	nin = 5
	# number of output units
	nout = 5

	# input (where first dimension is time)
	u = TT.matrix()
	# target (where first dimension is time)
	t = TT.matrix()
	# initial hidden state of the RNN
	h0 = TT.vector()
	# learning rate
	lr = TT.scalar()
	# recurrent weights as a shared variable
	W = theano.shared(numpy.random.uniform(size=(n, n), low=-.01, high=.01))
	# input to hidden layer weights
	W_in = theano.shared(numpy.random.uniform(size=(nin, n), low=-.01, high=.01))
	# hidden to output layer weights
	W_out = theano.shared(numpy.random.uniform(size=(n, nout), low=-.01, high=.01))


	# recurrent function (using tanh activation function) and linear output
	# activation function
	def step(u_t, h_tm1, W, W_in, W_out):
	h_t = TT.tanh(TT.dot(u_t, W_in) + TT.dot(h_tm1, W))
	y_t = TT.dot(h_t, W_out)
	return h_t, y_t

	# the hidden state `h` for the entire sequence, and the output for the
	# entrie sequence `y` (first dimension is always time)
	[h, y], _ = theano.scan(step,
	sequences=u,
	outputs_info=[h0, None],
	non_sequences=[W, W_in, W_out])
	# error between output and target
	error = ((y - t) ** 2).sum()
	# gradients on the weights using BPTT
	gW, gW_in, gW_out = TT.grad(error, [W, W_in, W_out])
	# training function, that computes the error and updates the weights using
	# SGD.
	fn = theano.function([h0, u, t, lr],
	error,
	updates={W: W - lr * gW,
	W_in: W_in - lr * gW_in,
	W_out: W_out - lr * gW_out},
	profile=True)

	ntime = 1000
	data = numpy.random.uniform(size=(ntime, nin))
	targets = numpy.round(numpy.random.uniform(size=(ntime, nout)))
	init_state = numpy.zeros(n)
	learning_rate = 0.1
	result = fn(init_state, data,targets, learning_rate)
	Function profiling
	==================
	Message: None
	Time in 1 calls to Function.__call__: 7.018185e-02s
	Time in Function.fn.__call__: 7.013297e-02s (99.930%)
	Time in thunks: 7.007813e-02s (99.852%)
	Total compile time: 4.485335e+00s
	Theano Optimizer time: 2.545068e+00s
	Theano validate time: 2.463958e-01s
	Theano Linker time (includes C, CUDA code generation/compiling): 3.004949e-01s

	Class
	---
	<% time> <sum %> <apply time> <time per call> <type> <#call> <#apply> <Class name>
	98.3% 98.3% 0.069s 3.44e-02s Py 2 2 <class 'theano.scan_module.scan_op.Scan'>
	0.5% 98.8% 0.000s 3.68e-04s C 1 1 <class 'theano.tensor.blas.Gemm'>
	0.5% 99.3% 0.000s 1.01e-05s C 35 35 <class 'theano.tensor.elemwise.Elemwise'>
	0.3% 99.6% 0.000s 6.64e-05s C 3 3 <class 'theano.tensor.basic.Alloc'>
	0.3% 99.9% 0.000s 1.90e-04s C 1 1 <class 'theano.tensor.subtensor.IncSubtensor'>
	0.0% 99.9% 0.000s 2.86e-06s C 8 8 <class 'theano.tensor.subtensor.Subtensor'>
	0.0% 99.9% 0.000s 1.77e-06s C 9 9 <class 'theano.tensor.elemwise.DimShuffle'>
	0.0% 100.0% 0.000s 9.06e-06s Py 1 1 <class 'theano.tensor.basic.Rebroadcast'>
	0.0% 100.0% 0.000s 9.06e-06s C 1 1 <class 'theano.tensor.elemwise.Sum'>
	0.0% 100.0% 0.000s 9.88e-07s C 7 7 <class 'theano.tensor.opt.Shape_i'>
	0.0% 100.0% 0.000s 7.68e-07s C 9 9 <class 'theano.tensor.basic.ScalarFromTensor'>
	0.0% 100.0% 0.000s 2.03e-06s C 2 2 <class 'theano.tensor.opt.MakeVector'>
	0.0% 100.0% 0.000s 4.77e-07s C 2 2 <class 'theano.tensor.basic.Reshape'>
	... (remaining 0 Classes account for 0.00%(0.00s) of the runtime)

	Ops
	---
	<% time> <sum %> <apply time> <time per call> <type> <#call> <#apply> <Op name>
	73.3% 73.3% 0.051s 5.14e-02s Py 1 1 forall_inplace,cpu,grad_of_scan_fn}
	25.0% 98.3% 0.018s 1.75e-02s Py 1 1 forall_inplace,cpu,scan_fn}
	0.5% 98.8% 0.000s 3.68e-04s C 1 1 Gemm{inplace}
	0.3% 99.1% 0.000s 2.15e-04s C 1 1 Elemwise{Composite{[sub(i0, sqr(i1))]}}
	0.3% 99.4% 0.000s 6.64e-05s C 3 3 Alloc
	0.3% 99.7% 0.000s 1.90e-04s C 1 1 IncSubtensor{Set;:int64:}
	0.0% 99.7% 0.000s 2.91e-05s C 1 1 Elemwise{mul,no_inplace}
	0.0% 99.8% 0.000s 7.51e-06s C 2 2 Elemwise{Composite{[sub(i0, mul(i1, i2))]}}[(0, 0)]
	0.0% 99.8% 0.000s 9.06e-06s C 1 1 Sum
	0.0% 99.8% 0.000s 9.06e-06s Py 1 1 Rebroadcast{0}
	0.0% 99.8% 0.000s 4.41e-06s C 2 2 Subtensor{int64:int64:int64}
	0.0% 99.8% 0.000s 8.11e-06s C 1 1 Elemwise{Sub{output_types_preference=transfer_type{0}}}[(0, 0)]
	0.0% 99.8% 0.000s 7.15e-06s C 1 1 Elemwise{Composite{[Composite{[Composite{[Composite{[Switch(i0, i1, Sw
	0.0% 99.8% 0.000s 7.68e-07s C 9 9 ScalarFromTensor
	0.0% 99.8% 0.000s 3.46e-06s C 2 2 Elemwise{Composite{[Composite{[Composite{[Composite{[Composite{[Compos
	0.0% 99.8% 0.000s 3.46e-06s C 2 2 Elemwise{Composite{[Composite{[Composite{[Switch(i0, i1, maximum(i2, i
	0.0% 99.9% 0.000s 3.46e-06s C 2 2 Elemwise{Composite{[Composite{[Composite{[Composite{[Composite{[Compos
	0.0% 99.9% 0.000s 6.91e-06s C 1 1 Elemwise{Sqr{output_types_preference=transfer_type{0}}}[(0, 0)]
	0.0% 99.9% 0.000s 5.96e-06s C 1 1 Elemwise{Composite{[Switch(i0, i1, minimum(i2, i3))]}}
	0.0% 99.9% 0.000s 2.62e-06s C 2 2 Subtensor{::-1}
	... (remaining 28 Ops account for 0.12%(0.00s) of the runtime)

	Apply
	------
	<% time> <sum %> <apply time> <time per call> <#call> <id> <Apply name>
	73.3% 73.3% 0.051s 5.14e-02s 1 71 forall_inplace,cpu,grad_of_scan_fn}(Shape_i{0}.0, InplaceDimShuffle{0,x,1}.0, Ele
	25.0% 98.3% 0.018s 1.75e-02s 1 57 forall_inplace,cpu,scan_fn}(Shape_i{0}.0, Subtensor{int64:int64:int8}.0, IncSubte
	0.5% 98.8% 0.000s 3.68e-04s 1 80 Gemm{inplace}(<TensorType(float64, matrix)>, Elemwise{neg,no_inplace}.0, Reshape{
	0.3% 99.1% 0.000s 2.15e-04s 1 63 Elemwise{Composite{[sub(i0, sqr(i1))]}}(TensorConstant{(1, 1) of 1.0}, Subtensor{
	0.3% 99.4% 0.000s 1.91e-04s 1 18 Alloc(TensorConstant{0.0}, Elemwise{add,no_inplace}.0, Shape_i{0}.0)
	0.3% 99.7% 0.000s 1.90e-04s 1 22 IncSubtensor{Set;:int64:}(Alloc.0, Rebroadcast{0}.0, Constant{1})
	0.0% 99.7% 0.000s 2.91e-05s 1 65 Elemwise{mul,no_inplace}(TensorConstant{(1, 1) of 2.0}, Subtensor{::-1}.0)
	0.0% 99.7% 0.000s 1.31e-05s 1 77 Elemwise{Composite{[sub(i0, mul(i1, i2))]}}[(0, 0)](<TensorType(float64, matrix)>
	0.0% 99.8% 0.000s 9.06e-06s 1 70 Sum(Elemwise{Sqr{output_types_preference=transfer_type{0}}}[(0, 0)].0)
	0.0% 99.8% 0.000s 9.06e-06s 1 17 Rebroadcast{0}(InplaceDimShuffle{x,0}.0)
	0.0% 99.8% 0.000s 8.11e-06s 1 58 Elemwise{Sub{output_types_preference=transfer_type{0}}}[(0, 0)](forall_inplace,cp
	0.0% 99.8% 0.000s 7.87e-06s 1 59 Subtensor{int64:int64:int64}(forall_inplace,cpu,scan_fn}.0, ScalarFromTensor.0, S
	0.0% 99.8% 0.000s 7.15e-06s 1 45 Elemwise{Composite{[Composite{[Composite{[Composite{[Switch(i0, i1, Switch(i2, i3
	0.0% 99.8% 0.000s 6.91e-06s 1 67 Elemwise{Sqr{output_types_preference=transfer_type{0}}}[(0, 0)](Elemwise{Sub{outp
	0.0% 99.8% 0.000s 5.96e-06s 1 33 Elemwise{Composite{[Composite{[Composite{[Composite{[Composite{[Composite{[add(Ca
	0.0% 99.8% 0.000s 5.96e-06s 1 44 Elemwise{Composite{[Composite{[Composite{[Switch(i0, i1, maximum(i2, i3))]}(i0, i
	0.0% 99.8% 0.000s 5.96e-06s 1 28 Elemwise{Composite{[Composite{[Composite{[Composite{[Composite{[Composite{[Switch
	0.0% 99.8% 0.000s 5.96e-06s 1 29 Elemwise{Composite{[Switch(i0, i1, minimum(i2, i3))]}}(Elemwise{le,no_inplace}.0,
	0.0% 99.9% 0.000s 5.01e-06s 1 48 Elemwise{Composite{[Switch(i0, i1, minimum(i2, i3))]}}[(0, 2)](Elemwise{le,no_inp
	0.0% 99.9% 0.000s 5.01e-06s 1 40 Alloc(TensorConstant{0.0}, Elemwise{Composite{[Composite{[Composite{[Composite{[C
	... (remaining 61 Apply instances account for 0.14%(0.00s) of the runtime)