Last active
December 20, 2015 16:49
-
-
Save shackenberg/6164487 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
>>> theano.test() | |
Theano version 0.6.0rc3 | |
theano is installed in /usr/local/lib/python2.7/dist-packages/theano | |
NumPy version 1.6.1 | |
NumPy is installed in /usr/lib/python2.7/dist-packages/numpy | |
Python version 2.7.3 (default, Aug 1 2012, 05:14:39) [GCC 4.6.3] | |
nose version 1.1.2 | |
Using gpu device 0: GeForce GTX 670 | |
.........................................K.............../usr/local/lib/python2.7/dist-packages/theano/compile/tests/test_inplace_opt_for_value.py:170: UserWarning: theano modules are deprecated and will be removed in release 0.7 | |
super(ExampleRNN, self).__init__() | |
...............................................................................................WARNING (theano.gof.cmodule): Cache leak due to unpickle-able key data set([(((1,), (10, '1.6.1'), (10, '1.6.1')), ('CLinker.cmodule_key', ('-D NPY_ARRAY_ALIGNED=NPY_ALIGNED', '-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS', '-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY', '-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS', '-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL', '-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE', '-O3', '-Wno-unused-label', '-Wno-unused-variable', '-Wno-write-strings', '-fno-math-errno'), (), (), 'NPY_ABI_VERSION=0x1000009', 'c_compiler_str=g++ 4.6', 'md5:0c1bf1caaa6b5b7fa8a3374b9014ef6e', (<theano.gof.tests.test_compute_test_value.IncOneC object at 0x10317410>, ((Scalar(int32), ((-1, 0), False)),), (1, (False,)))))]) | |
................................................................................................................................................................................................................................................................................................................................................................./usr/lib/python2.7/dist-packages/scipy/signal/signaltools.py:408: ComplexWarning: Casting complex values to real discards the imaginary part | |
return sigtools._convolve2d(in1,in2,1,val,bval,fillvalue) | |
..............................................................................SS...SSSSS...............1 #include <Python.h> | |
2 #include <iostream> | |
3 #include <numpy/arrayobject.h> | |
4 #include <math.h> | |
5 #include "curand.h" | |
6 #include <numpy/arrayscalars.h> | |
7 #include "cuda_ndarray.cuh" | |
8 ////////////////////// | |
9 //// Support Code | |
10 ////////////////////// | |
11 | |
12 | |
13 void free_generator(void *_gen) | |
14 { | |
15 curandGenerator_t * gen = (curandGenerator_t*)_gen; | |
16 curandStatus_t err = curandDestroyGenerator(*gen); | |
17 if (err != CURAND_STATUS_SUCCESS) | |
18 { | |
19 fprintf(stderr, "Failure (%%i) in destroying CURAND generator", | |
20 (int)err); | |
21 } | |
22 free(_gen); | |
23 } | |
24 | |
25 | |
26 struct __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964 { | |
27 PyObject* __ERROR; | |
28 | |
29 PyObject* storage_V3; | |
30 PyObject* storage_V5; | |
31 PyObject* storage_V7; | |
32 PyObject* storage_V1; | |
33 | |
34 | |
35 __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964() {} | |
36 ~__struct_compiled_op_7a7573cd1a887cbf5d8946c487571964(void) { | |
37 cleanup(); | |
38 } | |
39 | |
40 int init(PyObject* __ERROR, PyObject* storage_V3, PyObject* storage_V5, PyObject* storage_V7, PyObject* storage_V1) { | |
41 Py_XINCREF(storage_V3); | |
42 Py_XINCREF(storage_V5); | |
43 Py_XINCREF(storage_V7); | |
44 Py_XINCREF(storage_V1); | |
45 this->storage_V3 = storage_V3; | |
46 this->storage_V5 = storage_V5; | |
47 this->storage_V7 = storage_V7; | |
48 this->storage_V1 = storage_V1; | |
49 int __failure = 0; | |
50 | |
51 { | |
52 | |
53 { | |
54 | |
55 { | |
56 | |
57 { | |
58 | |
59 this->__ERROR = __ERROR; | |
60 return 0; | |
61 __label_7: | |
62 | |
63 double __DUMMY_7; | |
64 | |
65 } | |
66 __label_5: | |
67 | |
68 double __DUMMY_5; | |
69 | |
70 } | |
71 __label_3: | |
72 | |
73 double __DUMMY_3; | |
74 | |
75 } | |
76 __label_1: | |
77 | |
78 double __DUMMY_1; | |
79 | |
80 } | |
81 | |
82 Py_XDECREF(this->storage_V3); | |
83 Py_XDECREF(this->storage_V5); | |
84 Py_XDECREF(this->storage_V7); | |
85 Py_XDECREF(this->storage_V1); | |
86 | |
87 if (__failure) { | |
88 // When there is a failure, this code puts the exception | |
89 // in __ERROR. | |
90 PyObject* err_type = NULL; | |
91 PyObject* err_msg = NULL; | |
92 PyObject* err_traceback = NULL; | |
93 PyErr_Fetch(&err_type, &err_msg, &err_traceback); | |
94 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);} | |
95 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);} | |
96 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);} | |
97 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0); | |
98 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1); | |
99 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2); | |
100 PyList_SET_ITEM(__ERROR, 0, err_type); | |
101 PyList_SET_ITEM(__ERROR, 1, err_msg); | |
102 PyList_SET_ITEM(__ERROR, 2, err_traceback); | |
103 {Py_XDECREF(old_err_type);} | |
104 {Py_XDECREF(old_err_msg);} | |
105 {Py_XDECREF(old_err_traceback);} | |
106 } | |
107 // The failure code is returned to index what code block failed. | |
108 return __failure; | |
109 | |
110 } | |
111 void cleanup(void) { | |
112 __label_1: | |
113 | |
114 double __DUMMY_1; | |
115 __label_3: | |
116 | |
117 double __DUMMY_3; | |
118 __label_5: | |
119 | |
120 double __DUMMY_5; | |
121 __label_7: | |
122 | |
123 double __DUMMY_7; | |
124 | |
125 Py_XDECREF(this->storage_V3); | |
126 Py_XDECREF(this->storage_V5); | |
127 Py_XDECREF(this->storage_V7); | |
128 Py_XDECREF(this->storage_V1); | |
129 } | |
130 int run(void) { | |
131 int __failure = 0; | |
132 | |
133 PyObject* py_V1; | |
134 CudaNdarray * V1; | |
135 PyObject* py_V3; | |
136 | |
137 PyObject* V3; | |
138 | |
139 PyObject* py_V5; | |
140 | |
141 PyArrayObject* V5; | |
142 int type_num_V5; | |
143 typedef npy_int32 dtype_V5; | |
144 | |
145 PyObject* py_V7; | |
146 | |
147 PyObject* V7; | |
148 | |
149 { | |
150 | |
151 py_V1 = PyList_GET_ITEM(storage_V1, 0); | |
152 {Py_XINCREF(py_V1);} | |
153 | |
154 if (py_V1 == Py_None) | |
155 { | |
156 V1 = NULL; | |
157 } | |
158 else | |
159 { | |
160 | |
161 assert(py_V1->ob_refcnt >= 2); // There should be at least one ref from the container object, | |
162 // and one ref from the local scope. | |
163 | |
164 if (CudaNdarray_Check(py_V1)) | |
165 { | |
166 //fprintf(stderr, "c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
167 V1 = (CudaNdarray*)py_V1; | |
168 //std::cerr << "c_extract " << V1 << '\n'; | |
169 if (V1->nd != 2) | |
170 { | |
171 PyErr_Format(PyExc_RuntimeError, | |
172 "c_extract: Some CudaNdarray has rank %i, it was supposed to have rank 2", | |
173 V1->nd); | |
174 V1 = NULL; | |
175 {__failure = 2; goto __label_2;}; | |
176 } | |
177 //std::cerr << "c_extract " << V1 << " nd check passed\n"; | |
178 | |
179 | |
180 assert(V1); | |
181 Py_INCREF(py_V1); | |
182 } | |
183 else if (py_V1 == Py_None) | |
184 { | |
185 PyErr_SetString(PyExc_TypeError, | |
186 "expected a CudaNdarray, not None"); | |
187 V1 = NULL; | |
188 {__failure = 2; goto __label_2;}; | |
189 } | |
190 else | |
191 { | |
192 //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
193 PyErr_SetString(PyExc_TypeError, "Argument not a CudaNdarray"); | |
194 V1 = NULL; | |
195 {__failure = 2; goto __label_2;}; | |
196 } | |
197 //std::cerr << "c_extract done " << V1 << '\n'; | |
198 | |
199 | |
200 } | |
201 | |
202 { | |
203 | |
204 py_V3 = PyList_GET_ITEM(storage_V3, 0); | |
205 {Py_XINCREF(py_V3);} | |
206 | |
207 Py_INCREF(py_V3); | |
208 V3 = py_V3; | |
209 | |
210 { | |
211 | |
212 py_V5 = PyList_GET_ITEM(storage_V5, 0); | |
213 {Py_XINCREF(py_V5);} | |
214 | |
215 V5 = NULL; | |
216 if (py_V5 == Py_None) { | |
217 // We can either fail here or set V5 to NULL and rely on Ops | |
218 // using tensors to handle the NULL case, but if they fail to do so | |
219 // they'll end up with nasty segfaults, so this is public service. | |
220 PyErr_SetString(PyExc_ValueError, "expected an ndarray, not None"); | |
221 {__failure = 6; goto __label_6;} | |
222 } | |
223 if (!PyArray_Check(py_V5)) { | |
224 PyErr_SetString(PyExc_ValueError, "expected an ndarray"); | |
225 {__failure = 6; goto __label_6;} | |
226 } | |
227 // We expect NPY_INT32 | |
228 type_num_V5 = ((PyArrayObject*)py_V5)->descr->type_num; | |
229 if (!PyArray_ISALIGNED(py_V5)) { | |
230 PyErr_Format(PyExc_NotImplementedError, | |
231 "expected an aligned array of type %d " | |
232 "(NPY_INT32), got non-aligned array of type %d" | |
233 " with %d dimensions, with 3 last dims %d, %d, %d" | |
234 " and 3 last strides %d %d, %d.", | |
235 NPY_INT32, type_num_V5, | |
236 PyArray_NDIM(py_V5), | |
237 PyArray_NDIM(py_V5) >= 3 ? | |
238 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-3] : -1, | |
239 PyArray_NDIM(py_V5) >= 2 ? | |
240 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-2] : -1, | |
241 PyArray_NDIM(py_V5) >= 1 ? | |
242 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-1] : -1, | |
243 PyArray_NDIM(py_V5) >= 2 ? | |
244 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-3] : -1, | |
245 PyArray_NDIM(py_V5) >= 3 ? | |
246 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-2] : -1, | |
247 PyArray_NDIM(py_V5) >= 1 ? | |
248 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-1] : -1 | |
249 ); | |
250 {__failure = 6; goto __label_6;} | |
251 } | |
252 // This is a TypeError to be consistent with DEBUG_MODE | |
253 // Note: DEBUG_MODE also tells the name of the container | |
254 if (type_num_V5 != NPY_INT32) { | |
255 PyErr_Format(PyExc_TypeError, | |
256 "expected type_num %d (NPY_INT32) got %d", | |
257 NPY_INT32, type_num_V5); | |
258 {__failure = 6; goto __label_6;} | |
259 } | |
260 V5 = (PyArrayObject*)(py_V5); | |
261 Py_XINCREF(V5); | |
262 | |
263 { | |
264 | |
265 py_V7 = Py_None; | |
266 {Py_XINCREF(py_V7);} | |
267 | |
268 V7 = NULL; | |
269 | |
270 { | |
271 | |
272 //////// <code generated by CURAND_Base> | |
273 | |
274 int odims[2]; | |
275 int n_elements = 1; | |
276 int must_alloc_sample = ((NULL == V1) | |
277 || !CudaNdarray_Check(py_V1) | |
278 || (V1->nd != 2)); | |
279 | |
280 if (V5->nd != 1) | |
281 { | |
282 PyErr_SetString(PyExc_ValueError, "size must be vector"); | |
283 {__failure = 9; goto __label_9;} | |
284 } | |
285 if (V5->dimensions[0] != 2) | |
286 { | |
287 PyErr_Format(PyExc_ValueError, "size must have length %i (not %i)", | |
288 2, V5->dimensions[0]); | |
289 {__failure = 9; goto __label_9;} | |
290 } | |
291 if (PyArray_DESCR(V5)->type_num != NPY_INT32) | |
292 { | |
293 PyErr_SetString(PyExc_ValueError, "size must be int32"); | |
294 {__failure = 9; goto __label_9;} | |
295 } | |
296 for (int i = 0; i < 2; ++i) | |
297 { | |
298 odims[i] = ((npy_int32*)(V5->data + V5->strides[0] * i))[0]; | |
299 n_elements *= odims[i]; | |
300 must_alloc_sample = (must_alloc_sample | |
301 || CudaNdarray_HOST_DIMS(V1)[i] != odims[i]); | |
302 } | |
303 if (must_alloc_sample) | |
304 { | |
305 Py_XDECREF(V1); | |
306 V1 = (CudaNdarray*)CudaNdarray_NewDims(2, odims); | |
307 if(!V1) | |
308 { | |
309 {__failure = 9; goto __label_9;}; | |
310 } | |
311 } | |
312 if (!PyCObject_Check(V3)) | |
313 { | |
314 // allocate a new generator for o_generator | |
315 Py_XDECREF(V7); | |
316 curandGenerator_t * gen = (curandGenerator_t*)malloc(sizeof(curandGenerator_t)); | |
317 assert(gen); | |
318 if (CURAND_STATUS_SUCCESS != | |
319 curandCreateGenerator(gen, CURAND_RNG_PSEUDO_DEFAULT)) { | |
320 PyErr_Format(PyExc_RuntimeError, "Failed to initialize curand generator"); | |
321 {__failure = 9; goto __label_9;}; | |
322 } | |
323 if (CURAND_STATUS_SUCCESS != | |
324 curandSetPseudoRandomGeneratorSeed(*gen,234)) | |
325 { | |
326 PyErr_Format(PyExc_RuntimeError, "Failed to set curand generator seed"); | |
327 {__failure = 9; goto __label_9;}; | |
328 } | |
329 V7 = PyCObject_FromVoidPtr(gen, &free_generator); | |
330 assert (V3 == Py_False); | |
331 } | |
332 else if (1) | |
333 { | |
334 // use i_generator for o_generator | |
335 Py_XDECREF(V7); | |
336 Py_INCREF(V3); | |
337 V7 = V3; | |
338 } | |
339 else | |
340 { | |
341 // copy i_generator for o_generator | |
342 PyErr_Format(PyExc_NotImplementedError, "non-destructive CURAND generation"); | |
343 {__failure = 9; goto __label_9;}; | |
344 } | |
345 { | |
346 curandGenerator_t * gen = (curandGenerator_t*)PyCObject_AsVoidPtr(V7); | |
347 curandStatus_t err = curandGenerateUniform(*gen, | |
348 CudaNdarray_DEV_DATA(V1), | |
349 n_elements); | |
350 | |
351 | |
352 if (err != CURAND_STATUS_SUCCESS) | |
353 { | |
354 PyErr_Format(PyExc_RuntimeError, "curand error generating random normals %i", (int)err); | |
355 {__failure = 9; goto __label_9;}; | |
356 } | |
357 cudaThreadSynchronize(); | |
358 } | |
359 //////// </ code generated by CURAND_Base> | |
360 __label_9: | |
361 | |
362 double __DUMMY_9; | |
363 | |
364 } | |
365 __label_8: | |
366 | |
367 if (!__failure) { | |
368 | |
369 assert(py_V7->ob_refcnt > 1); | |
370 Py_DECREF(py_V7); | |
371 py_V7 = V7 ? V7 : Py_None; | |
372 Py_INCREF(py_V7); | |
373 | |
374 PyObject* old = PyList_GET_ITEM(storage_V7, 0); | |
375 {Py_XINCREF(py_V7);} | |
376 PyList_SET_ITEM(storage_V7, 0, py_V7); | |
377 {Py_XDECREF(old);} | |
378 } | |
379 | |
380 Py_XDECREF(V7); | |
381 | |
382 {Py_XDECREF(py_V7);} | |
383 | |
384 double __DUMMY_8; | |
385 | |
386 } | |
387 __label_6: | |
388 | |
389 if (V5) { | |
390 Py_XDECREF(V5); | |
391 } | |
392 | |
393 {Py_XDECREF(py_V5);} | |
394 | |
395 double __DUMMY_6; | |
396 | |
397 } | |
398 __label_4: | |
399 | |
400 Py_XDECREF(V3); | |
401 | |
402 {Py_XDECREF(py_V3);} | |
403 | |
404 double __DUMMY_4; | |
405 | |
406 } | |
407 __label_2: | |
408 | |
409 if (!__failure) { | |
410 | |
411 //std::cerr << "sync\n"; | |
412 if (NULL == V1) { | |
413 // failure: sync None to storage | |
414 Py_XDECREF(py_V1); | |
415 py_V1 = Py_None; | |
416 Py_INCREF(py_V1); | |
417 } | |
418 else | |
419 { | |
420 if (py_V1 != (PyObject*)V1) | |
421 { | |
422 Py_XDECREF(py_V1); | |
423 py_V1 = (PyObject*)V1; | |
424 Py_INCREF(py_V1); | |
425 } | |
426 assert(py_V1->ob_refcnt); | |
427 } | |
428 | |
429 PyObject* old = PyList_GET_ITEM(storage_V1, 0); | |
430 {Py_XINCREF(py_V1);} | |
431 PyList_SET_ITEM(storage_V1, 0, py_V1); | |
432 {Py_XDECREF(old);} | |
433 } | |
434 | |
435 //std::cerr << "cleanup " << py_V1 << " " << V1 << "\n"; | |
436 //fprintf(stderr, "c_cleanup CNDA py_object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
437 if (V1) | |
438 { | |
439 //fprintf(stderr, "c_cleanup CNDA cn_object w refcnt %p %i\n", V1, (V1->ob_refcnt)); | |
440 Py_XDECREF(V1); | |
441 } | |
442 //std::cerr << "cleanup done" << py_V1 << "\n"; | |
443 | |
444 {Py_XDECREF(py_V1);} | |
445 | |
446 double __DUMMY_2; | |
447 | |
448 } | |
449 | |
450 | |
451 if (__failure) { | |
452 // When there is a failure, this code puts the exception | |
453 // in __ERROR. | |
454 PyObject* err_type = NULL; | |
455 PyObject* err_msg = NULL; | |
456 PyObject* err_traceback = NULL; | |
457 PyErr_Fetch(&err_type, &err_msg, &err_traceback); | |
458 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);} | |
459 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);} | |
460 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);} | |
461 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0); | |
462 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1); | |
463 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2); | |
464 PyList_SET_ITEM(__ERROR, 0, err_type); | |
465 PyList_SET_ITEM(__ERROR, 1, err_msg); | |
466 PyList_SET_ITEM(__ERROR, 2, err_traceback); | |
467 {Py_XDECREF(old_err_type);} | |
468 {Py_XDECREF(old_err_msg);} | |
469 {Py_XDECREF(old_err_traceback);} | |
470 } | |
471 // The failure code is returned to index what code block failed. | |
472 return __failure; | |
473 | |
474 } | |
475 }; | |
476 | |
477 | |
478 int __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964_executor(__struct_compiled_op_7a7573cd1a887cbf5d8946c487571964* self) { | |
479 return self->run(); | |
480 } | |
481 | |
482 void __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964_destructor(void* executor, void* self) { | |
483 //printf("doing cleanup\n"); | |
484 //fflush(stdout); | |
485 // ((__struct_compiled_op_7a7573cd1a887cbf5d8946c487571964*)self)->cleanup(); | |
486 // free(self); | |
487 delete ((__struct_compiled_op_7a7573cd1a887cbf5d8946c487571964*)self); | |
488 //printf("done cleanup\n"); | |
489 //fflush(stdout); | |
490 } | |
491 | |
492 ////////////////////// | |
493 //// Functions | |
494 ////////////////////// | |
495 static PyObject * instantiate(PyObject * self, PyObject *argtuple) { | |
496 assert(PyTuple_Check(argtuple)); | |
497 if (5 != PyTuple_Size(argtuple)){ | |
498 PyErr_Format(PyExc_TypeError, "Wrong number of arguments, expected 5, got %i", (int)PyTuple_Size(argtuple)); | |
499 return NULL; | |
500 } | |
501 __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964* struct_ptr = new __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964(); | |
502 struct_ptr->init( PyTuple_GET_ITEM(argtuple, 0),PyTuple_GET_ITEM(argtuple, 1),PyTuple_GET_ITEM(argtuple, 2),PyTuple_GET_ITEM(argtuple, 3),PyTuple_GET_ITEM(argtuple, 4) ); | |
503 PyObject* thunk = PyCObject_FromVoidPtrAndDesc((void*)(&__struct_compiled_op_7a7573cd1a887cbf5d8946c487571964_executor), struct_ptr, __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964_destructor); | |
504 return thunk; } | |
505 | |
506 ////////////////////// | |
507 //// Module init | |
508 ////////////////////// | |
509 static PyMethodDef MyMethods[] = { | |
510 {"instantiate", instantiate, METH_VARARGS, "undocumented"} , | |
511 {NULL, NULL, 0, NULL} | |
512 }; | |
513 PyMODINIT_FUNC init7a7573cd1a887cbf5d8946c487571964(void){ | |
514 import_array(); | |
515 (void) Py_InitModule("7a7573cd1a887cbf5d8946c487571964", MyMethods); | |
516 } | |
517 | |
=============================== | |
In file included from /usr/include/python2.7/Python.h:8:0, | |
from mod.cu:1: | |
/usr/include/python2.7/pyconfig.h:1161:0: warning: "_POSIX_C_SOURCE" redefined [enabled by default] | |
/usr/include/features.h:164:0: note: this is the location of the previous definition | |
/usr/include/python2.7/pyconfig.h:1183:0: warning: "_XOPEN_SOURCE" redefined [enabled by default] | |
/usr/include/features.h:166:0: note: this is the location of the previous definition | |
mod.cu:5:20: fatal error: /usr/local/cuda-5.5/include/curand.h: Permission denied | |
compilation terminated. | |
E1 #include <Python.h> | |
2 #include <iostream> | |
3 #include <numpy/arrayobject.h> | |
4 #include <math.h> | |
5 #include "curand.h" | |
6 #include <numpy/arrayscalars.h> | |
7 #include "cuda_ndarray.cuh" | |
8 ////////////////////// | |
9 //// Support Code | |
10 ////////////////////// | |
11 | |
12 | |
13 void free_generator(void *_gen) | |
14 { | |
15 curandGenerator_t * gen = (curandGenerator_t*)_gen; | |
16 curandStatus_t err = curandDestroyGenerator(*gen); | |
17 if (err != CURAND_STATUS_SUCCESS) | |
18 { | |
19 fprintf(stderr, "Failure (%%i) in destroying CURAND generator", | |
20 (int)err); | |
21 } | |
22 free(_gen); | |
23 } | |
24 | |
25 | |
26 struct __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964 { | |
27 PyObject* __ERROR; | |
28 | |
29 PyObject* storage_V3; | |
30 PyObject* storage_V5; | |
31 PyObject* storage_V7; | |
32 PyObject* storage_V1; | |
33 | |
34 | |
35 __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964() {} | |
36 ~__struct_compiled_op_7a7573cd1a887cbf5d8946c487571964(void) { | |
37 cleanup(); | |
38 } | |
39 | |
40 int init(PyObject* __ERROR, PyObject* storage_V3, PyObject* storage_V5, PyObject* storage_V7, PyObject* storage_V1) { | |
41 Py_XINCREF(storage_V3); | |
42 Py_XINCREF(storage_V5); | |
43 Py_XINCREF(storage_V7); | |
44 Py_XINCREF(storage_V1); | |
45 this->storage_V3 = storage_V3; | |
46 this->storage_V5 = storage_V5; | |
47 this->storage_V7 = storage_V7; | |
48 this->storage_V1 = storage_V1; | |
49 int __failure = 0; | |
50 | |
51 { | |
52 | |
53 { | |
54 | |
55 { | |
56 | |
57 { | |
58 | |
59 this->__ERROR = __ERROR; | |
60 return 0; | |
61 __label_7: | |
62 | |
63 double __DUMMY_7; | |
64 | |
65 } | |
66 __label_5: | |
67 | |
68 double __DUMMY_5; | |
69 | |
70 } | |
71 __label_3: | |
72 | |
73 double __DUMMY_3; | |
74 | |
75 } | |
76 __label_1: | |
77 | |
78 double __DUMMY_1; | |
79 | |
80 } | |
81 | |
82 Py_XDECREF(this->storage_V3); | |
83 Py_XDECREF(this->storage_V5); | |
84 Py_XDECREF(this->storage_V7); | |
85 Py_XDECREF(this->storage_V1); | |
86 | |
87 if (__failure) { | |
88 // When there is a failure, this code puts the exception | |
89 // in __ERROR. | |
90 PyObject* err_type = NULL; | |
91 PyObject* err_msg = NULL; | |
92 PyObject* err_traceback = NULL; | |
93 PyErr_Fetch(&err_type, &err_msg, &err_traceback); | |
94 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);} | |
95 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);} | |
96 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);} | |
97 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0); | |
98 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1); | |
99 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2); | |
100 PyList_SET_ITEM(__ERROR, 0, err_type); | |
101 PyList_SET_ITEM(__ERROR, 1, err_msg); | |
102 PyList_SET_ITEM(__ERROR, 2, err_traceback); | |
103 {Py_XDECREF(old_err_type);} | |
104 {Py_XDECREF(old_err_msg);} | |
105 {Py_XDECREF(old_err_traceback);} | |
106 } | |
107 // The failure code is returned to index what code block failed. | |
108 return __failure; | |
109 | |
110 } | |
111 void cleanup(void) { | |
112 __label_1: | |
113 | |
114 double __DUMMY_1; | |
115 __label_3: | |
116 | |
117 double __DUMMY_3; | |
118 __label_5: | |
119 | |
120 double __DUMMY_5; | |
121 __label_7: | |
122 | |
123 double __DUMMY_7; | |
124 | |
125 Py_XDECREF(this->storage_V3); | |
126 Py_XDECREF(this->storage_V5); | |
127 Py_XDECREF(this->storage_V7); | |
128 Py_XDECREF(this->storage_V1); | |
129 } | |
130 int run(void) { | |
131 int __failure = 0; | |
132 | |
133 PyObject* py_V1; | |
134 CudaNdarray * V1; | |
135 PyObject* py_V3; | |
136 | |
137 PyObject* V3; | |
138 | |
139 PyObject* py_V5; | |
140 | |
141 PyArrayObject* V5; | |
142 int type_num_V5; | |
143 typedef npy_int32 dtype_V5; | |
144 | |
145 PyObject* py_V7; | |
146 | |
147 PyObject* V7; | |
148 | |
149 { | |
150 | |
151 py_V1 = PyList_GET_ITEM(storage_V1, 0); | |
152 {Py_XINCREF(py_V1);} | |
153 | |
154 if (py_V1 == Py_None) | |
155 { | |
156 V1 = NULL; | |
157 } | |
158 else | |
159 { | |
160 | |
161 assert(py_V1->ob_refcnt >= 2); // There should be at least one ref from the container object, | |
162 // and one ref from the local scope. | |
163 | |
164 if (CudaNdarray_Check(py_V1)) | |
165 { | |
166 //fprintf(stderr, "c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
167 V1 = (CudaNdarray*)py_V1; | |
168 //std::cerr << "c_extract " << V1 << '\n'; | |
169 if (V1->nd != 2) | |
170 { | |
171 PyErr_Format(PyExc_RuntimeError, | |
172 "c_extract: Some CudaNdarray has rank %i, it was supposed to have rank 2", | |
173 V1->nd); | |
174 V1 = NULL; | |
175 {__failure = 2; goto __label_2;}; | |
176 } | |
177 //std::cerr << "c_extract " << V1 << " nd check passed\n"; | |
178 | |
179 | |
180 assert(V1); | |
181 Py_INCREF(py_V1); | |
182 } | |
183 else if (py_V1 == Py_None) | |
184 { | |
185 PyErr_SetString(PyExc_TypeError, | |
186 "expected a CudaNdarray, not None"); | |
187 V1 = NULL; | |
188 {__failure = 2; goto __label_2;}; | |
189 } | |
190 else | |
191 { | |
192 //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
193 PyErr_SetString(PyExc_TypeError, "Argument not a CudaNdarray"); | |
194 V1 = NULL; | |
195 {__failure = 2; goto __label_2;}; | |
196 } | |
197 //std::cerr << "c_extract done " << V1 << '\n'; | |
198 | |
199 | |
200 } | |
201 | |
202 { | |
203 | |
204 py_V3 = PyList_GET_ITEM(storage_V3, 0); | |
205 {Py_XINCREF(py_V3);} | |
206 | |
207 Py_INCREF(py_V3); | |
208 V3 = py_V3; | |
209 | |
210 { | |
211 | |
212 py_V5 = PyList_GET_ITEM(storage_V5, 0); | |
213 {Py_XINCREF(py_V5);} | |
214 | |
215 V5 = NULL; | |
216 if (py_V5 == Py_None) { | |
217 // We can either fail here or set V5 to NULL and rely on Ops | |
218 // using tensors to handle the NULL case, but if they fail to do so | |
219 // they'll end up with nasty segfaults, so this is public service. | |
220 PyErr_SetString(PyExc_ValueError, "expected an ndarray, not None"); | |
221 {__failure = 6; goto __label_6;} | |
222 } | |
223 if (!PyArray_Check(py_V5)) { | |
224 PyErr_SetString(PyExc_ValueError, "expected an ndarray"); | |
225 {__failure = 6; goto __label_6;} | |
226 } | |
227 // We expect NPY_INT32 | |
228 type_num_V5 = ((PyArrayObject*)py_V5)->descr->type_num; | |
229 if (!PyArray_ISALIGNED(py_V5)) { | |
230 PyErr_Format(PyExc_NotImplementedError, | |
231 "expected an aligned array of type %d " | |
232 "(NPY_INT32), got non-aligned array of type %d" | |
233 " with %d dimensions, with 3 last dims %d, %d, %d" | |
234 " and 3 last strides %d %d, %d.", | |
235 NPY_INT32, type_num_V5, | |
236 PyArray_NDIM(py_V5), | |
237 PyArray_NDIM(py_V5) >= 3 ? | |
238 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-3] : -1, | |
239 PyArray_NDIM(py_V5) >= 2 ? | |
240 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-2] : -1, | |
241 PyArray_NDIM(py_V5) >= 1 ? | |
242 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-1] : -1, | |
243 PyArray_NDIM(py_V5) >= 2 ? | |
244 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-3] : -1, | |
245 PyArray_NDIM(py_V5) >= 3 ? | |
246 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-2] : -1, | |
247 PyArray_NDIM(py_V5) >= 1 ? | |
248 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-1] : -1 | |
249 ); | |
250 {__failure = 6; goto __label_6;} | |
251 } | |
252 // This is a TypeError to be consistent with DEBUG_MODE | |
253 // Note: DEBUG_MODE also tells the name of the container | |
254 if (type_num_V5 != NPY_INT32) { | |
255 PyErr_Format(PyExc_TypeError, | |
256 "expected type_num %d (NPY_INT32) got %d", | |
257 NPY_INT32, type_num_V5); | |
258 {__failure = 6; goto __label_6;} | |
259 } | |
260 V5 = (PyArrayObject*)(py_V5); | |
261 Py_XINCREF(V5); | |
262 | |
263 { | |
264 | |
265 py_V7 = Py_None; | |
266 {Py_XINCREF(py_V7);} | |
267 | |
268 V7 = NULL; | |
269 | |
270 { | |
271 | |
272 //////// <code generated by CURAND_Base> | |
273 | |
274 int odims[2]; | |
275 int n_elements = 1; | |
276 int must_alloc_sample = ((NULL == V1) | |
277 || !CudaNdarray_Check(py_V1) | |
278 || (V1->nd != 2)); | |
279 | |
280 if (V5->nd != 1) | |
281 { | |
282 PyErr_SetString(PyExc_ValueError, "size must be vector"); | |
283 {__failure = 9; goto __label_9;} | |
284 } | |
285 if (V5->dimensions[0] != 2) | |
286 { | |
287 PyErr_Format(PyExc_ValueError, "size must have length %i (not %i)", | |
288 2, V5->dimensions[0]); | |
289 {__failure = 9; goto __label_9;} | |
290 } | |
291 if (PyArray_DESCR(V5)->type_num != NPY_INT32) | |
292 { | |
293 PyErr_SetString(PyExc_ValueError, "size must be int32"); | |
294 {__failure = 9; goto __label_9;} | |
295 } | |
296 for (int i = 0; i < 2; ++i) | |
297 { | |
298 odims[i] = ((npy_int32*)(V5->data + V5->strides[0] * i))[0]; | |
299 n_elements *= odims[i]; | |
300 must_alloc_sample = (must_alloc_sample | |
301 || CudaNdarray_HOST_DIMS(V1)[i] != odims[i]); | |
302 } | |
303 if (must_alloc_sample) | |
304 { | |
305 Py_XDECREF(V1); | |
306 V1 = (CudaNdarray*)CudaNdarray_NewDims(2, odims); | |
307 if(!V1) | |
308 { | |
309 {__failure = 9; goto __label_9;}; | |
310 } | |
311 } | |
312 if (!PyCObject_Check(V3)) | |
313 { | |
314 // allocate a new generator for o_generator | |
315 Py_XDECREF(V7); | |
316 curandGenerator_t * gen = (curandGenerator_t*)malloc(sizeof(curandGenerator_t)); | |
317 assert(gen); | |
318 if (CURAND_STATUS_SUCCESS != | |
319 curandCreateGenerator(gen, CURAND_RNG_PSEUDO_DEFAULT)) { | |
320 PyErr_Format(PyExc_RuntimeError, "Failed to initialize curand generator"); | |
321 {__failure = 9; goto __label_9;}; | |
322 } | |
323 if (CURAND_STATUS_SUCCESS != | |
324 curandSetPseudoRandomGeneratorSeed(*gen,234)) | |
325 { | |
326 PyErr_Format(PyExc_RuntimeError, "Failed to set curand generator seed"); | |
327 {__failure = 9; goto __label_9;}; | |
328 } | |
329 V7 = PyCObject_FromVoidPtr(gen, &free_generator); | |
330 assert (V3 == Py_False); | |
331 } | |
332 else if (1) | |
333 { | |
334 // use i_generator for o_generator | |
335 Py_XDECREF(V7); | |
336 Py_INCREF(V3); | |
337 V7 = V3; | |
338 } | |
339 else | |
340 { | |
341 // copy i_generator for o_generator | |
342 PyErr_Format(PyExc_NotImplementedError, "non-destructive CURAND generation"); | |
343 {__failure = 9; goto __label_9;}; | |
344 } | |
345 { | |
346 curandGenerator_t * gen = (curandGenerator_t*)PyCObject_AsVoidPtr(V7); | |
347 curandStatus_t err = curandGenerateUniform(*gen, | |
348 CudaNdarray_DEV_DATA(V1), | |
349 n_elements); | |
350 | |
351 | |
352 if (err != CURAND_STATUS_SUCCESS) | |
353 { | |
354 PyErr_Format(PyExc_RuntimeError, "curand error generating random normals %i", (int)err); | |
355 {__failure = 9; goto __label_9;}; | |
356 } | |
357 cudaThreadSynchronize(); | |
358 } | |
359 //////// </ code generated by CURAND_Base> | |
360 __label_9: | |
361 | |
362 double __DUMMY_9; | |
363 | |
364 } | |
365 __label_8: | |
366 | |
367 if (!__failure) { | |
368 | |
369 assert(py_V7->ob_refcnt > 1); | |
370 Py_DECREF(py_V7); | |
371 py_V7 = V7 ? V7 : Py_None; | |
372 Py_INCREF(py_V7); | |
373 | |
374 PyObject* old = PyList_GET_ITEM(storage_V7, 0); | |
375 {Py_XINCREF(py_V7);} | |
376 PyList_SET_ITEM(storage_V7, 0, py_V7); | |
377 {Py_XDECREF(old);} | |
378 } | |
379 | |
380 Py_XDECREF(V7); | |
381 | |
382 {Py_XDECREF(py_V7);} | |
383 | |
384 double __DUMMY_8; | |
385 | |
386 } | |
387 __label_6: | |
388 | |
389 if (V5) { | |
390 Py_XDECREF(V5); | |
391 } | |
392 | |
393 {Py_XDECREF(py_V5);} | |
394 | |
395 double __DUMMY_6; | |
396 | |
397 } | |
398 __label_4: | |
399 | |
400 Py_XDECREF(V3); | |
401 | |
402 {Py_XDECREF(py_V3);} | |
403 | |
404 double __DUMMY_4; | |
405 | |
406 } | |
407 __label_2: | |
408 | |
409 if (!__failure) { | |
410 | |
411 //std::cerr << "sync\n"; | |
412 if (NULL == V1) { | |
413 // failure: sync None to storage | |
414 Py_XDECREF(py_V1); | |
415 py_V1 = Py_None; | |
416 Py_INCREF(py_V1); | |
417 } | |
418 else | |
419 { | |
420 if (py_V1 != (PyObject*)V1) | |
421 { | |
422 Py_XDECREF(py_V1); | |
423 py_V1 = (PyObject*)V1; | |
424 Py_INCREF(py_V1); | |
425 } | |
426 assert(py_V1->ob_refcnt); | |
427 } | |
428 | |
429 PyObject* old = PyList_GET_ITEM(storage_V1, 0); | |
430 {Py_XINCREF(py_V1);} | |
431 PyList_SET_ITEM(storage_V1, 0, py_V1); | |
432 {Py_XDECREF(old);} | |
433 } | |
434 | |
435 //std::cerr << "cleanup " << py_V1 << " " << V1 << "\n"; | |
436 //fprintf(stderr, "c_cleanup CNDA py_object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
437 if (V1) | |
438 { | |
439 //fprintf(stderr, "c_cleanup CNDA cn_object w refcnt %p %i\n", V1, (V1->ob_refcnt)); | |
440 Py_XDECREF(V1); | |
441 } | |
442 //std::cerr << "cleanup done" << py_V1 << "\n"; | |
443 | |
444 {Py_XDECREF(py_V1);} | |
445 | |
446 double __DUMMY_2; | |
447 | |
448 } | |
449 | |
450 | |
451 if (__failure) { | |
452 // When there is a failure, this code puts the exception | |
453 // in __ERROR. | |
454 PyObject* err_type = NULL; | |
455 PyObject* err_msg = NULL; | |
456 PyObject* err_traceback = NULL; | |
457 PyErr_Fetch(&err_type, &err_msg, &err_traceback); | |
458 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);} | |
459 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);} | |
460 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);} | |
461 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0); | |
462 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1); | |
463 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2); | |
464 PyList_SET_ITEM(__ERROR, 0, err_type); | |
465 PyList_SET_ITEM(__ERROR, 1, err_msg); | |
466 PyList_SET_ITEM(__ERROR, 2, err_traceback); | |
467 {Py_XDECREF(old_err_type);} | |
468 {Py_XDECREF(old_err_msg);} | |
469 {Py_XDECREF(old_err_traceback);} | |
470 } | |
471 // The failure code is returned to index what code block failed. | |
472 return __failure; | |
473 | |
474 } | |
475 }; | |
476 | |
477 | |
478 int __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964_executor(__struct_compiled_op_7a7573cd1a887cbf5d8946c487571964* self) { | |
479 return self->run(); | |
480 } | |
481 | |
482 void __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964_destructor(void* executor, void* self) { | |
483 //printf("doing cleanup\n"); | |
484 //fflush(stdout); | |
485 // ((__struct_compiled_op_7a7573cd1a887cbf5d8946c487571964*)self)->cleanup(); | |
486 // free(self); | |
487 delete ((__struct_compiled_op_7a7573cd1a887cbf5d8946c487571964*)self); | |
488 //printf("done cleanup\n"); | |
489 //fflush(stdout); | |
490 } | |
491 | |
492 ////////////////////// | |
493 //// Functions | |
494 ////////////////////// | |
495 static PyObject * instantiate(PyObject * self, PyObject *argtuple) { | |
496 assert(PyTuple_Check(argtuple)); | |
497 if (5 != PyTuple_Size(argtuple)){ | |
498 PyErr_Format(PyExc_TypeError, "Wrong number of arguments, expected 5, got %i", (int)PyTuple_Size(argtuple)); | |
499 return NULL; | |
500 } | |
501 __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964* struct_ptr = new __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964(); | |
502 struct_ptr->init( PyTuple_GET_ITEM(argtuple, 0),PyTuple_GET_ITEM(argtuple, 1),PyTuple_GET_ITEM(argtuple, 2),PyTuple_GET_ITEM(argtuple, 3),PyTuple_GET_ITEM(argtuple, 4) ); | |
503 PyObject* thunk = PyCObject_FromVoidPtrAndDesc((void*)(&__struct_compiled_op_7a7573cd1a887cbf5d8946c487571964_executor), struct_ptr, __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964_destructor); | |
504 return thunk; } | |
505 | |
506 ////////////////////// | |
507 //// Module init | |
508 ////////////////////// | |
509 static PyMethodDef MyMethods[] = { | |
510 {"instantiate", instantiate, METH_VARARGS, "undocumented"} , | |
511 {NULL, NULL, 0, NULL} | |
512 }; | |
513 PyMODINIT_FUNC init7a7573cd1a887cbf5d8946c487571964(void){ | |
514 import_array(); | |
515 (void) Py_InitModule("7a7573cd1a887cbf5d8946c487571964", MyMethods); | |
516 } | |
517 | |
=============================== | |
In file included from /usr/include/python2.7/Python.h:8:0, | |
from mod.cu:1: | |
/usr/include/python2.7/pyconfig.h:1161:0: warning: "_POSIX_C_SOURCE" redefined [enabled by default] | |
/usr/include/features.h:164:0: note: this is the location of the previous definition | |
/usr/include/python2.7/pyconfig.h:1183:0: warning: "_XOPEN_SOURCE" redefined [enabled by default] | |
/usr/include/features.h:166:0: note: this is the location of the previous definition | |
mod.cu:5:20: fatal error: /usr/local/cuda-5.5/include/curand.h: Permission denied | |
compilation terminated. | |
E1 #include <Python.h> | |
2 #include <iostream> | |
3 #include <numpy/arrayobject.h> | |
4 #include <math.h> | |
5 #include "curand.h" | |
6 #include <numpy/arrayscalars.h> | |
7 #include "cuda_ndarray.cuh" | |
8 ////////////////////// | |
9 //// Support Code | |
10 ////////////////////// | |
11 | |
12 | |
13 void free_generator(void *_gen) | |
14 { | |
15 curandGenerator_t * gen = (curandGenerator_t*)_gen; | |
16 curandStatus_t err = curandDestroyGenerator(*gen); | |
17 if (err != CURAND_STATUS_SUCCESS) | |
18 { | |
19 fprintf(stderr, "Failure (%%i) in destroying CURAND generator", | |
20 (int)err); | |
21 } | |
22 free(_gen); | |
23 } | |
24 | |
25 | |
26 struct __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964 { | |
27 PyObject* __ERROR; | |
28 | |
29 PyObject* storage_V3; | |
30 PyObject* storage_V5; | |
31 PyObject* storage_V7; | |
32 PyObject* storage_V1; | |
33 | |
34 | |
35 __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964() {} | |
36 ~__struct_compiled_op_7a7573cd1a887cbf5d8946c487571964(void) { | |
37 cleanup(); | |
38 } | |
39 | |
40 int init(PyObject* __ERROR, PyObject* storage_V3, PyObject* storage_V5, PyObject* storage_V7, PyObject* storage_V1) { | |
41 Py_XINCREF(storage_V3); | |
42 Py_XINCREF(storage_V5); | |
43 Py_XINCREF(storage_V7); | |
44 Py_XINCREF(storage_V1); | |
45 this->storage_V3 = storage_V3; | |
46 this->storage_V5 = storage_V5; | |
47 this->storage_V7 = storage_V7; | |
48 this->storage_V1 = storage_V1; | |
49 int __failure = 0; | |
50 | |
51 { | |
52 | |
53 { | |
54 | |
55 { | |
56 | |
57 { | |
58 | |
59 this->__ERROR = __ERROR; | |
60 return 0; | |
61 __label_7: | |
62 | |
63 double __DUMMY_7; | |
64 | |
65 } | |
66 __label_5: | |
67 | |
68 double __DUMMY_5; | |
69 | |
70 } | |
71 __label_3: | |
72 | |
73 double __DUMMY_3; | |
74 | |
75 } | |
76 __label_1: | |
77 | |
78 double __DUMMY_1; | |
79 | |
80 } | |
81 | |
82 Py_XDECREF(this->storage_V3); | |
83 Py_XDECREF(this->storage_V5); | |
84 Py_XDECREF(this->storage_V7); | |
85 Py_XDECREF(this->storage_V1); | |
86 | |
87 if (__failure) { | |
88 // When there is a failure, this code puts the exception | |
89 // in __ERROR. | |
90 PyObject* err_type = NULL; | |
91 PyObject* err_msg = NULL; | |
92 PyObject* err_traceback = NULL; | |
93 PyErr_Fetch(&err_type, &err_msg, &err_traceback); | |
94 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);} | |
95 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);} | |
96 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);} | |
97 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0); | |
98 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1); | |
99 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2); | |
100 PyList_SET_ITEM(__ERROR, 0, err_type); | |
101 PyList_SET_ITEM(__ERROR, 1, err_msg); | |
102 PyList_SET_ITEM(__ERROR, 2, err_traceback); | |
103 {Py_XDECREF(old_err_type);} | |
104 {Py_XDECREF(old_err_msg);} | |
105 {Py_XDECREF(old_err_traceback);} | |
106 } | |
107 // The failure code is returned to index what code block failed. | |
108 return __failure; | |
109 | |
110 } | |
111 void cleanup(void) { | |
112 __label_1: | |
113 | |
114 double __DUMMY_1; | |
115 __label_3: | |
116 | |
117 double __DUMMY_3; | |
118 __label_5: | |
119 | |
120 double __DUMMY_5; | |
121 __label_7: | |
122 | |
123 double __DUMMY_7; | |
124 | |
125 Py_XDECREF(this->storage_V3); | |
126 Py_XDECREF(this->storage_V5); | |
127 Py_XDECREF(this->storage_V7); | |
128 Py_XDECREF(this->storage_V1); | |
129 } | |
130 int run(void) { | |
131 int __failure = 0; | |
132 | |
133 PyObject* py_V1; | |
134 CudaNdarray * V1; | |
135 PyObject* py_V3; | |
136 | |
137 PyObject* V3; | |
138 | |
139 PyObject* py_V5; | |
140 | |
141 PyArrayObject* V5; | |
142 int type_num_V5; | |
143 typedef npy_int32 dtype_V5; | |
144 | |
145 PyObject* py_V7; | |
146 | |
147 PyObject* V7; | |
148 | |
149 { | |
150 | |
151 py_V1 = PyList_GET_ITEM(storage_V1, 0); | |
152 {Py_XINCREF(py_V1);} | |
153 | |
154 if (py_V1 == Py_None) | |
155 { | |
156 V1 = NULL; | |
157 } | |
158 else | |
159 { | |
160 | |
161 assert(py_V1->ob_refcnt >= 2); // There should be at least one ref from the container object, | |
162 // and one ref from the local scope. | |
163 | |
164 if (CudaNdarray_Check(py_V1)) | |
165 { | |
166 //fprintf(stderr, "c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
167 V1 = (CudaNdarray*)py_V1; | |
168 //std::cerr << "c_extract " << V1 << '\n'; | |
169 if (V1->nd != 2) | |
170 { | |
171 PyErr_Format(PyExc_RuntimeError, | |
172 "c_extract: Some CudaNdarray has rank %i, it was supposed to have rank 2", | |
173 V1->nd); | |
174 V1 = NULL; | |
175 {__failure = 2; goto __label_2;}; | |
176 } | |
177 //std::cerr << "c_extract " << V1 << " nd check passed\n"; | |
178 | |
179 | |
180 assert(V1); | |
181 Py_INCREF(py_V1); | |
182 } | |
183 else if (py_V1 == Py_None) | |
184 { | |
185 PyErr_SetString(PyExc_TypeError, | |
186 "expected a CudaNdarray, not None"); | |
187 V1 = NULL; | |
188 {__failure = 2; goto __label_2;}; | |
189 } | |
190 else | |
191 { | |
192 //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
193 PyErr_SetString(PyExc_TypeError, "Argument not a CudaNdarray"); | |
194 V1 = NULL; | |
195 {__failure = 2; goto __label_2;}; | |
196 } | |
197 //std::cerr << "c_extract done " << V1 << '\n'; | |
198 | |
199 | |
200 } | |
201 | |
202 { | |
203 | |
204 py_V3 = PyList_GET_ITEM(storage_V3, 0); | |
205 {Py_XINCREF(py_V3);} | |
206 | |
207 Py_INCREF(py_V3); | |
208 V3 = py_V3; | |
209 | |
210 { | |
211 | |
212 py_V5 = PyList_GET_ITEM(storage_V5, 0); | |
213 {Py_XINCREF(py_V5);} | |
214 | |
215 V5 = NULL; | |
216 if (py_V5 == Py_None) { | |
217 // We can either fail here or set V5 to NULL and rely on Ops | |
218 // using tensors to handle the NULL case, but if they fail to do so | |
219 // they'll end up with nasty segfaults, so this is public service. | |
220 PyErr_SetString(PyExc_ValueError, "expected an ndarray, not None"); | |
221 {__failure = 6; goto __label_6;} | |
222 } | |
223 if (!PyArray_Check(py_V5)) { | |
224 PyErr_SetString(PyExc_ValueError, "expected an ndarray"); | |
225 {__failure = 6; goto __label_6;} | |
226 } | |
227 // We expect NPY_INT32 | |
228 type_num_V5 = ((PyArrayObject*)py_V5)->descr->type_num; | |
229 if (!PyArray_ISALIGNED(py_V5)) { | |
230 PyErr_Format(PyExc_NotImplementedError, | |
231 "expected an aligned array of type %d " | |
232 "(NPY_INT32), got non-aligned array of type %d" | |
233 " with %d dimensions, with 3 last dims %d, %d, %d" | |
234 " and 3 last strides %d %d, %d.", | |
235 NPY_INT32, type_num_V5, | |
236 PyArray_NDIM(py_V5), | |
237 PyArray_NDIM(py_V5) >= 3 ? | |
238 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-3] : -1, | |
239 PyArray_NDIM(py_V5) >= 2 ? | |
240 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-2] : -1, | |
241 PyArray_NDIM(py_V5) >= 1 ? | |
242 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-1] : -1, | |
243 PyArray_NDIM(py_V5) >= 2 ? | |
244 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-3] : -1, | |
245 PyArray_NDIM(py_V5) >= 3 ? | |
246 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-2] : -1, | |
247 PyArray_NDIM(py_V5) >= 1 ? | |
248 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-1] : -1 | |
249 ); | |
250 {__failure = 6; goto __label_6;} | |
251 } | |
252 // This is a TypeError to be consistent with DEBUG_MODE | |
253 // Note: DEBUG_MODE also tells the name of the container | |
254 if (type_num_V5 != NPY_INT32) { | |
255 PyErr_Format(PyExc_TypeError, | |
256 "expected type_num %d (NPY_INT32) got %d", | |
257 NPY_INT32, type_num_V5); | |
258 {__failure = 6; goto __label_6;} | |
259 } | |
260 V5 = (PyArrayObject*)(py_V5); | |
261 Py_XINCREF(V5); | |
262 | |
263 { | |
264 | |
265 py_V7 = Py_None; | |
266 {Py_XINCREF(py_V7);} | |
267 | |
268 V7 = NULL; | |
269 | |
270 { | |
271 | |
272 //////// <code generated by CURAND_Base> | |
273 | |
274 int odims[2]; | |
275 int n_elements = 1; | |
276 int must_alloc_sample = ((NULL == V1) | |
277 || !CudaNdarray_Check(py_V1) | |
278 || (V1->nd != 2)); | |
279 | |
280 if (V5->nd != 1) | |
281 { | |
282 PyErr_SetString(PyExc_ValueError, "size must be vector"); | |
283 {__failure = 9; goto __label_9;} | |
284 } | |
285 if (V5->dimensions[0] != 2) | |
286 { | |
287 PyErr_Format(PyExc_ValueError, "size must have length %i (not %i)", | |
288 2, V5->dimensions[0]); | |
289 {__failure = 9; goto __label_9;} | |
290 } | |
291 if (PyArray_DESCR(V5)->type_num != NPY_INT32) | |
292 { | |
293 PyErr_SetString(PyExc_ValueError, "size must be int32"); | |
294 {__failure = 9; goto __label_9;} | |
295 } | |
296 for (int i = 0; i < 2; ++i) | |
297 { | |
298 odims[i] = ((npy_int32*)(V5->data + V5->strides[0] * i))[0]; | |
299 n_elements *= odims[i]; | |
300 must_alloc_sample = (must_alloc_sample | |
301 || CudaNdarray_HOST_DIMS(V1)[i] != odims[i]); | |
302 } | |
303 if (must_alloc_sample) | |
304 { | |
305 Py_XDECREF(V1); | |
306 V1 = (CudaNdarray*)CudaNdarray_NewDims(2, odims); | |
307 if(!V1) | |
308 { | |
309 {__failure = 9; goto __label_9;}; | |
310 } | |
311 } | |
312 if (!PyCObject_Check(V3)) | |
313 { | |
314 // allocate a new generator for o_generator | |
315 Py_XDECREF(V7); | |
316 curandGenerator_t * gen = (curandGenerator_t*)malloc(sizeof(curandGenerator_t)); | |
317 assert(gen); | |
318 if (CURAND_STATUS_SUCCESS != | |
319 curandCreateGenerator(gen, CURAND_RNG_PSEUDO_DEFAULT)) { | |
320 PyErr_Format(PyExc_RuntimeError, "Failed to initialize curand generator"); | |
321 {__failure = 9; goto __label_9;}; | |
322 } | |
323 if (CURAND_STATUS_SUCCESS != | |
324 curandSetPseudoRandomGeneratorSeed(*gen,234)) | |
325 { | |
326 PyErr_Format(PyExc_RuntimeError, "Failed to set curand generator seed"); | |
327 {__failure = 9; goto __label_9;}; | |
328 } | |
329 V7 = PyCObject_FromVoidPtr(gen, &free_generator); | |
330 assert (V3 == Py_False); | |
331 } | |
332 else if (1) | |
333 { | |
334 // use i_generator for o_generator | |
335 Py_XDECREF(V7); | |
336 Py_INCREF(V3); | |
337 V7 = V3; | |
338 } | |
339 else | |
340 { | |
341 // copy i_generator for o_generator | |
342 PyErr_Format(PyExc_NotImplementedError, "non-destructive CURAND generation"); | |
343 {__failure = 9; goto __label_9;}; | |
344 } | |
345 { | |
346 curandGenerator_t * gen = (curandGenerator_t*)PyCObject_AsVoidPtr(V7); | |
347 curandStatus_t err = curandGenerateUniform(*gen, | |
348 CudaNdarray_DEV_DATA(V1), | |
349 n_elements); | |
350 | |
351 | |
352 if (err != CURAND_STATUS_SUCCESS) | |
353 { | |
354 PyErr_Format(PyExc_RuntimeError, "curand error generating random normals %i", (int)err); | |
355 {__failure = 9; goto __label_9;}; | |
356 } | |
357 cudaThreadSynchronize(); | |
358 } | |
359 //////// </ code generated by CURAND_Base> | |
360 __label_9: | |
361 | |
362 double __DUMMY_9; | |
363 | |
364 } | |
365 __label_8: | |
366 | |
367 if (!__failure) { | |
368 | |
369 assert(py_V7->ob_refcnt > 1); | |
370 Py_DECREF(py_V7); | |
371 py_V7 = V7 ? V7 : Py_None; | |
372 Py_INCREF(py_V7); | |
373 | |
374 PyObject* old = PyList_GET_ITEM(storage_V7, 0); | |
375 {Py_XINCREF(py_V7);} | |
376 PyList_SET_ITEM(storage_V7, 0, py_V7); | |
377 {Py_XDECREF(old);} | |
378 } | |
379 | |
380 Py_XDECREF(V7); | |
381 | |
382 {Py_XDECREF(py_V7);} | |
383 | |
384 double __DUMMY_8; | |
385 | |
386 } | |
387 __label_6: | |
388 | |
389 if (V5) { | |
390 Py_XDECREF(V5); | |
391 } | |
392 | |
393 {Py_XDECREF(py_V5);} | |
394 | |
395 double __DUMMY_6; | |
396 | |
397 } | |
398 __label_4: | |
399 | |
400 Py_XDECREF(V3); | |
401 | |
402 {Py_XDECREF(py_V3);} | |
403 | |
404 double __DUMMY_4; | |
405 | |
406 } | |
407 __label_2: | |
408 | |
409 if (!__failure) { | |
410 | |
411 //std::cerr << "sync\n"; | |
412 if (NULL == V1) { | |
413 // failure: sync None to storage | |
414 Py_XDECREF(py_V1); | |
415 py_V1 = Py_None; | |
416 Py_INCREF(py_V1); | |
417 } | |
418 else | |
419 { | |
420 if (py_V1 != (PyObject*)V1) | |
421 { | |
422 Py_XDECREF(py_V1); | |
423 py_V1 = (PyObject*)V1; | |
424 Py_INCREF(py_V1); | |
425 } | |
426 assert(py_V1->ob_refcnt); | |
427 } | |
428 | |
429 PyObject* old = PyList_GET_ITEM(storage_V1, 0); | |
430 {Py_XINCREF(py_V1);} | |
431 PyList_SET_ITEM(storage_V1, 0, py_V1); | |
432 {Py_XDECREF(old);} | |
433 } | |
434 | |
435 //std::cerr << "cleanup " << py_V1 << " " << V1 << "\n"; | |
436 //fprintf(stderr, "c_cleanup CNDA py_object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
437 if (V1) | |
438 { | |
439 //fprintf(stderr, "c_cleanup CNDA cn_object w refcnt %p %i\n", V1, (V1->ob_refcnt)); | |
440 Py_XDECREF(V1); | |
441 } | |
442 //std::cerr << "cleanup done" << py_V1 << "\n"; | |
443 | |
444 {Py_XDECREF(py_V1);} | |
445 | |
446 double __DUMMY_2; | |
447 | |
448 } | |
449 | |
450 | |
451 if (__failure) { | |
452 // When there is a failure, this code puts the exception | |
453 // in __ERROR. | |
454 PyObject* err_type = NULL; | |
455 PyObject* err_msg = NULL; | |
456 PyObject* err_traceback = NULL; | |
457 PyErr_Fetch(&err_type, &err_msg, &err_traceback); | |
458 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);} | |
459 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);} | |
460 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);} | |
461 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0); | |
462 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1); | |
463 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2); | |
464 PyList_SET_ITEM(__ERROR, 0, err_type); | |
465 PyList_SET_ITEM(__ERROR, 1, err_msg); | |
466 PyList_SET_ITEM(__ERROR, 2, err_traceback); | |
467 {Py_XDECREF(old_err_type);} | |
468 {Py_XDECREF(old_err_msg);} | |
469 {Py_XDECREF(old_err_traceback);} | |
470 } | |
471 // The failure code is returned to index what code block failed. | |
472 return __failure; | |
473 | |
474 } | |
475 }; | |
476 | |
477 | |
478 int __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964_executor(__struct_compiled_op_7a7573cd1a887cbf5d8946c487571964* self) { | |
479 return self->run(); | |
480 } | |
481 | |
482 void __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964_destructor(void* executor, void* self) { | |
483 //printf("doing cleanup\n"); | |
484 //fflush(stdout); | |
485 // ((__struct_compiled_op_7a7573cd1a887cbf5d8946c487571964*)self)->cleanup(); | |
486 // free(self); | |
487 delete ((__struct_compiled_op_7a7573cd1a887cbf5d8946c487571964*)self); | |
488 //printf("done cleanup\n"); | |
489 //fflush(stdout); | |
490 } | |
491 | |
492 ////////////////////// | |
493 //// Functions | |
494 ////////////////////// | |
495 static PyObject * instantiate(PyObject * self, PyObject *argtuple) { | |
496 assert(PyTuple_Check(argtuple)); | |
497 if (5 != PyTuple_Size(argtuple)){ | |
498 PyErr_Format(PyExc_TypeError, "Wrong number of arguments, expected 5, got %i", (int)PyTuple_Size(argtuple)); | |
499 return NULL; | |
500 } | |
501 __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964* struct_ptr = new __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964(); | |
502 struct_ptr->init( PyTuple_GET_ITEM(argtuple, 0),PyTuple_GET_ITEM(argtuple, 1),PyTuple_GET_ITEM(argtuple, 2),PyTuple_GET_ITEM(argtuple, 3),PyTuple_GET_ITEM(argtuple, 4) ); | |
503 PyObject* thunk = PyCObject_FromVoidPtrAndDesc((void*)(&__struct_compiled_op_7a7573cd1a887cbf5d8946c487571964_executor), struct_ptr, __struct_compiled_op_7a7573cd1a887cbf5d8946c487571964_destructor); | |
504 return thunk; } | |
505 | |
506 ////////////////////// | |
507 //// Module init | |
508 ////////////////////// | |
509 static PyMethodDef MyMethods[] = { | |
510 {"instantiate", instantiate, METH_VARARGS, "undocumented"} , | |
511 {NULL, NULL, 0, NULL} | |
512 }; | |
513 PyMODINIT_FUNC init7a7573cd1a887cbf5d8946c487571964(void){ | |
514 import_array(); | |
515 (void) Py_InitModule("7a7573cd1a887cbf5d8946c487571964", MyMethods); | |
516 } | |
517 | |
=============================== | |
In file included from /usr/include/python2.7/Python.h:8:0, | |
from mod.cu:1: | |
/usr/include/python2.7/pyconfig.h:1161:0: warning: "_POSIX_C_SOURCE" redefined [enabled by default] | |
/usr/include/features.h:164:0: note: this is the location of the previous definition | |
/usr/include/python2.7/pyconfig.h:1183:0: warning: "_XOPEN_SOURCE" redefined [enabled by default] | |
/usr/include/features.h:166:0: note: this is the location of the previous definition | |
mod.cu:5:20: fatal error: /usr/local/cuda-5.5/include/curand.h: Permission denied | |
compilation terminated. | |
E1 #include <Python.h> | |
2 #include <iostream> | |
3 #include <numpy/arrayobject.h> | |
4 #include <math.h> | |
5 #include "curand.h" | |
6 #include <numpy/arrayscalars.h> | |
7 #include "cuda_ndarray.cuh" | |
8 ////////////////////// | |
9 //// Support Code | |
10 ////////////////////// | |
11 | |
12 | |
13 void free_generator(void *_gen) | |
14 { | |
15 curandGenerator_t * gen = (curandGenerator_t*)_gen; | |
16 curandStatus_t err = curandDestroyGenerator(*gen); | |
17 if (err != CURAND_STATUS_SUCCESS) | |
18 { | |
19 fprintf(stderr, "Failure (%%i) in destroying CURAND generator", | |
20 (int)err); | |
21 } | |
22 free(_gen); | |
23 } | |
24 | |
25 | |
26 struct __struct_compiled_op_889e175e75159a3e61d065caf0802126 { | |
27 PyObject* __ERROR; | |
28 | |
29 PyObject* storage_V3; | |
30 PyObject* storage_V5; | |
31 PyObject* storage_V7; | |
32 PyObject* storage_V1; | |
33 | |
34 | |
35 __struct_compiled_op_889e175e75159a3e61d065caf0802126() {} | |
36 ~__struct_compiled_op_889e175e75159a3e61d065caf0802126(void) { | |
37 cleanup(); | |
38 } | |
39 | |
40 int init(PyObject* __ERROR, PyObject* storage_V3, PyObject* storage_V5, PyObject* storage_V7, PyObject* storage_V1) { | |
41 Py_XINCREF(storage_V3); | |
42 Py_XINCREF(storage_V5); | |
43 Py_XINCREF(storage_V7); | |
44 Py_XINCREF(storage_V1); | |
45 this->storage_V3 = storage_V3; | |
46 this->storage_V5 = storage_V5; | |
47 this->storage_V7 = storage_V7; | |
48 this->storage_V1 = storage_V1; | |
49 int __failure = 0; | |
50 | |
51 { | |
52 | |
53 { | |
54 | |
55 { | |
56 | |
57 { | |
58 | |
59 this->__ERROR = __ERROR; | |
60 return 0; | |
61 __label_7: | |
62 | |
63 double __DUMMY_7; | |
64 | |
65 } | |
66 __label_5: | |
67 | |
68 double __DUMMY_5; | |
69 | |
70 } | |
71 __label_3: | |
72 | |
73 double __DUMMY_3; | |
74 | |
75 } | |
76 __label_1: | |
77 | |
78 double __DUMMY_1; | |
79 | |
80 } | |
81 | |
82 Py_XDECREF(this->storage_V3); | |
83 Py_XDECREF(this->storage_V5); | |
84 Py_XDECREF(this->storage_V7); | |
85 Py_XDECREF(this->storage_V1); | |
86 | |
87 if (__failure) { | |
88 // When there is a failure, this code puts the exception | |
89 // in __ERROR. | |
90 PyObject* err_type = NULL; | |
91 PyObject* err_msg = NULL; | |
92 PyObject* err_traceback = NULL; | |
93 PyErr_Fetch(&err_type, &err_msg, &err_traceback); | |
94 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);} | |
95 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);} | |
96 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);} | |
97 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0); | |
98 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1); | |
99 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2); | |
100 PyList_SET_ITEM(__ERROR, 0, err_type); | |
101 PyList_SET_ITEM(__ERROR, 1, err_msg); | |
102 PyList_SET_ITEM(__ERROR, 2, err_traceback); | |
103 {Py_XDECREF(old_err_type);} | |
104 {Py_XDECREF(old_err_msg);} | |
105 {Py_XDECREF(old_err_traceback);} | |
106 } | |
107 // The failure code is returned to index what code block failed. | |
108 return __failure; | |
109 | |
110 } | |
111 void cleanup(void) { | |
112 __label_1: | |
113 | |
114 double __DUMMY_1; | |
115 __label_3: | |
116 | |
117 double __DUMMY_3; | |
118 __label_5: | |
119 | |
120 double __DUMMY_5; | |
121 __label_7: | |
122 | |
123 double __DUMMY_7; | |
124 | |
125 Py_XDECREF(this->storage_V3); | |
126 Py_XDECREF(this->storage_V5); | |
127 Py_XDECREF(this->storage_V7); | |
128 Py_XDECREF(this->storage_V1); | |
129 } | |
130 int run(void) { | |
131 int __failure = 0; | |
132 | |
133 PyObject* py_V1; | |
134 CudaNdarray * V1; | |
135 PyObject* py_V3; | |
136 | |
137 PyObject* V3; | |
138 | |
139 PyObject* py_V5; | |
140 | |
141 PyArrayObject* V5; | |
142 int type_num_V5; | |
143 typedef npy_int32 dtype_V5; | |
144 | |
145 PyObject* py_V7; | |
146 | |
147 PyObject* V7; | |
148 | |
149 { | |
150 | |
151 py_V1 = PyList_GET_ITEM(storage_V1, 0); | |
152 {Py_XINCREF(py_V1);} | |
153 | |
154 if (py_V1 == Py_None) | |
155 { | |
156 V1 = NULL; | |
157 } | |
158 else | |
159 { | |
160 | |
161 assert(py_V1->ob_refcnt >= 2); // There should be at least one ref from the container object, | |
162 // and one ref from the local scope. | |
163 | |
164 if (CudaNdarray_Check(py_V1)) | |
165 { | |
166 //fprintf(stderr, "c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
167 V1 = (CudaNdarray*)py_V1; | |
168 //std::cerr << "c_extract " << V1 << '\n'; | |
169 if (V1->nd != 2) | |
170 { | |
171 PyErr_Format(PyExc_RuntimeError, | |
172 "c_extract: Some CudaNdarray has rank %i, it was supposed to have rank 2", | |
173 V1->nd); | |
174 V1 = NULL; | |
175 {__failure = 2; goto __label_2;}; | |
176 } | |
177 //std::cerr << "c_extract " << V1 << " nd check passed\n"; | |
178 | |
179 | |
180 assert(V1); | |
181 Py_INCREF(py_V1); | |
182 } | |
183 else if (py_V1 == Py_None) | |
184 { | |
185 PyErr_SetString(PyExc_TypeError, | |
186 "expected a CudaNdarray, not None"); | |
187 V1 = NULL; | |
188 {__failure = 2; goto __label_2;}; | |
189 } | |
190 else | |
191 { | |
192 //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
193 PyErr_SetString(PyExc_TypeError, "Argument not a CudaNdarray"); | |
194 V1 = NULL; | |
195 {__failure = 2; goto __label_2;}; | |
196 } | |
197 //std::cerr << "c_extract done " << V1 << '\n'; | |
198 | |
199 | |
200 } | |
201 | |
202 { | |
203 | |
204 py_V3 = PyList_GET_ITEM(storage_V3, 0); | |
205 {Py_XINCREF(py_V3);} | |
206 | |
207 Py_INCREF(py_V3); | |
208 V3 = py_V3; | |
209 | |
210 { | |
211 | |
212 py_V5 = PyList_GET_ITEM(storage_V5, 0); | |
213 {Py_XINCREF(py_V5);} | |
214 | |
215 V5 = NULL; | |
216 if (py_V5 == Py_None) { | |
217 // We can either fail here or set V5 to NULL and rely on Ops | |
218 // using tensors to handle the NULL case, but if they fail to do so | |
219 // they'll end up with nasty segfaults, so this is public service. | |
220 PyErr_SetString(PyExc_ValueError, "expected an ndarray, not None"); | |
221 {__failure = 6; goto __label_6;} | |
222 } | |
223 if (!PyArray_Check(py_V5)) { | |
224 PyErr_SetString(PyExc_ValueError, "expected an ndarray"); | |
225 {__failure = 6; goto __label_6;} | |
226 } | |
227 // We expect NPY_INT32 | |
228 type_num_V5 = ((PyArrayObject*)py_V5)->descr->type_num; | |
229 if (!PyArray_ISALIGNED(py_V5)) { | |
230 PyErr_Format(PyExc_NotImplementedError, | |
231 "expected an aligned array of type %d " | |
232 "(NPY_INT32), got non-aligned array of type %d" | |
233 " with %d dimensions, with 3 last dims %d, %d, %d" | |
234 " and 3 last strides %d %d, %d.", | |
235 NPY_INT32, type_num_V5, | |
236 PyArray_NDIM(py_V5), | |
237 PyArray_NDIM(py_V5) >= 3 ? | |
238 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-3] : -1, | |
239 PyArray_NDIM(py_V5) >= 2 ? | |
240 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-2] : -1, | |
241 PyArray_NDIM(py_V5) >= 1 ? | |
242 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-1] : -1, | |
243 PyArray_NDIM(py_V5) >= 2 ? | |
244 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-3] : -1, | |
245 PyArray_NDIM(py_V5) >= 3 ? | |
246 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-2] : -1, | |
247 PyArray_NDIM(py_V5) >= 1 ? | |
248 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-1] : -1 | |
249 ); | |
250 {__failure = 6; goto __label_6;} | |
251 } | |
252 // This is a TypeError to be consistent with DEBUG_MODE | |
253 // Note: DEBUG_MODE also tells the name of the container | |
254 if (type_num_V5 != NPY_INT32) { | |
255 PyErr_Format(PyExc_TypeError, | |
256 "expected type_num %d (NPY_INT32) got %d", | |
257 NPY_INT32, type_num_V5); | |
258 {__failure = 6; goto __label_6;} | |
259 } | |
260 V5 = (PyArrayObject*)(py_V5); | |
261 Py_XINCREF(V5); | |
262 | |
263 { | |
264 | |
265 py_V7 = Py_None; | |
266 {Py_XINCREF(py_V7);} | |
267 | |
268 V7 = NULL; | |
269 | |
270 { | |
271 | |
272 //////// <code generated by CURAND_Base> | |
273 | |
274 int odims[2]; | |
275 int n_elements = 1; | |
276 int must_alloc_sample = ((NULL == V1) | |
277 || !CudaNdarray_Check(py_V1) | |
278 || (V1->nd != 2)); | |
279 | |
280 if (V5->nd != 1) | |
281 { | |
282 PyErr_SetString(PyExc_ValueError, "size must be vector"); | |
283 {__failure = 9; goto __label_9;} | |
284 } | |
285 if (V5->dimensions[0] != 2) | |
286 { | |
287 PyErr_Format(PyExc_ValueError, "size must have length %i (not %i)", | |
288 2, V5->dimensions[0]); | |
289 {__failure = 9; goto __label_9;} | |
290 } | |
291 if (PyArray_DESCR(V5)->type_num != NPY_INT32) | |
292 { | |
293 PyErr_SetString(PyExc_ValueError, "size must be int32"); | |
294 {__failure = 9; goto __label_9;} | |
295 } | |
296 for (int i = 0; i < 2; ++i) | |
297 { | |
298 odims[i] = ((npy_int32*)(V5->data + V5->strides[0] * i))[0]; | |
299 n_elements *= odims[i]; | |
300 must_alloc_sample = (must_alloc_sample | |
301 || CudaNdarray_HOST_DIMS(V1)[i] != odims[i]); | |
302 } | |
303 if (must_alloc_sample) | |
304 { | |
305 Py_XDECREF(V1); | |
306 V1 = (CudaNdarray*)CudaNdarray_NewDims(2, odims); | |
307 if(!V1) | |
308 { | |
309 {__failure = 9; goto __label_9;}; | |
310 } | |
311 } | |
312 if (!PyCObject_Check(V3)) | |
313 { | |
314 // allocate a new generator for o_generator | |
315 Py_XDECREF(V7); | |
316 curandGenerator_t * gen = (curandGenerator_t*)malloc(sizeof(curandGenerator_t)); | |
317 assert(gen); | |
318 if (CURAND_STATUS_SUCCESS != | |
319 curandCreateGenerator(gen, CURAND_RNG_PSEUDO_DEFAULT)) { | |
320 PyErr_Format(PyExc_RuntimeError, "Failed to initialize curand generator"); | |
321 {__failure = 9; goto __label_9;}; | |
322 } | |
323 if (CURAND_STATUS_SUCCESS != | |
324 curandSetPseudoRandomGeneratorSeed(*gen,234)) | |
325 { | |
326 PyErr_Format(PyExc_RuntimeError, "Failed to set curand generator seed"); | |
327 {__failure = 9; goto __label_9;}; | |
328 } | |
329 V7 = PyCObject_FromVoidPtr(gen, &free_generator); | |
330 assert (V3 == Py_False); | |
331 } | |
332 else if (1) | |
333 { | |
334 // use i_generator for o_generator | |
335 Py_XDECREF(V7); | |
336 Py_INCREF(V3); | |
337 V7 = V3; | |
338 } | |
339 else | |
340 { | |
341 // copy i_generator for o_generator | |
342 PyErr_Format(PyExc_NotImplementedError, "non-destructive CURAND generation"); | |
343 {__failure = 9; goto __label_9;}; | |
344 } | |
345 { | |
346 curandGenerator_t * gen = (curandGenerator_t*)PyCObject_AsVoidPtr(V7); | |
347 curandStatus_t err = curandGenerateNormal(*gen, | |
348 CudaNdarray_DEV_DATA(V1), | |
349 n_elements, | |
350 0.0, 1.0); | |
351 | |
352 | |
353 if (err != CURAND_STATUS_SUCCESS) | |
354 { | |
355 PyErr_Format(PyExc_RuntimeError, "curand error generating random normals %i", (int)err); | |
356 {__failure = 9; goto __label_9;}; | |
357 } | |
358 cudaThreadSynchronize(); | |
359 } | |
360 //////// </ code generated by CURAND_Base> | |
361 __label_9: | |
362 | |
363 double __DUMMY_9; | |
364 | |
365 } | |
366 __label_8: | |
367 | |
368 if (!__failure) { | |
369 | |
370 assert(py_V7->ob_refcnt > 1); | |
371 Py_DECREF(py_V7); | |
372 py_V7 = V7 ? V7 : Py_None; | |
373 Py_INCREF(py_V7); | |
374 | |
375 PyObject* old = PyList_GET_ITEM(storage_V7, 0); | |
376 {Py_XINCREF(py_V7);} | |
377 PyList_SET_ITEM(storage_V7, 0, py_V7); | |
378 {Py_XDECREF(old);} | |
379 } | |
380 | |
381 Py_XDECREF(V7); | |
382 | |
383 {Py_XDECREF(py_V7);} | |
384 | |
385 double __DUMMY_8; | |
386 | |
387 } | |
388 __label_6: | |
389 | |
390 if (V5) { | |
391 Py_XDECREF(V5); | |
392 } | |
393 | |
394 {Py_XDECREF(py_V5);} | |
395 | |
396 double __DUMMY_6; | |
397 | |
398 } | |
399 __label_4: | |
400 | |
401 Py_XDECREF(V3); | |
402 | |
403 {Py_XDECREF(py_V3);} | |
404 | |
405 double __DUMMY_4; | |
406 | |
407 } | |
408 __label_2: | |
409 | |
410 if (!__failure) { | |
411 | |
412 //std::cerr << "sync\n"; | |
413 if (NULL == V1) { | |
414 // failure: sync None to storage | |
415 Py_XDECREF(py_V1); | |
416 py_V1 = Py_None; | |
417 Py_INCREF(py_V1); | |
418 } | |
419 else | |
420 { | |
421 if (py_V1 != (PyObject*)V1) | |
422 { | |
423 Py_XDECREF(py_V1); | |
424 py_V1 = (PyObject*)V1; | |
425 Py_INCREF(py_V1); | |
426 } | |
427 assert(py_V1->ob_refcnt); | |
428 } | |
429 | |
430 PyObject* old = PyList_GET_ITEM(storage_V1, 0); | |
431 {Py_XINCREF(py_V1);} | |
432 PyList_SET_ITEM(storage_V1, 0, py_V1); | |
433 {Py_XDECREF(old);} | |
434 } | |
435 | |
436 //std::cerr << "cleanup " << py_V1 << " " << V1 << "\n"; | |
437 //fprintf(stderr, "c_cleanup CNDA py_object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
438 if (V1) | |
439 { | |
440 //fprintf(stderr, "c_cleanup CNDA cn_object w refcnt %p %i\n", V1, (V1->ob_refcnt)); | |
441 Py_XDECREF(V1); | |
442 } | |
443 //std::cerr << "cleanup done" << py_V1 << "\n"; | |
444 | |
445 {Py_XDECREF(py_V1);} | |
446 | |
447 double __DUMMY_2; | |
448 | |
449 } | |
450 | |
451 | |
452 if (__failure) { | |
453 // When there is a failure, this code puts the exception | |
454 // in __ERROR. | |
455 PyObject* err_type = NULL; | |
456 PyObject* err_msg = NULL; | |
457 PyObject* err_traceback = NULL; | |
458 PyErr_Fetch(&err_type, &err_msg, &err_traceback); | |
459 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);} | |
460 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);} | |
461 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);} | |
462 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0); | |
463 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1); | |
464 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2); | |
465 PyList_SET_ITEM(__ERROR, 0, err_type); | |
466 PyList_SET_ITEM(__ERROR, 1, err_msg); | |
467 PyList_SET_ITEM(__ERROR, 2, err_traceback); | |
468 {Py_XDECREF(old_err_type);} | |
469 {Py_XDECREF(old_err_msg);} | |
470 {Py_XDECREF(old_err_traceback);} | |
471 } | |
472 // The failure code is returned to index what code block failed. | |
473 return __failure; | |
474 | |
475 } | |
476 }; | |
477 | |
478 | |
479 int __struct_compiled_op_889e175e75159a3e61d065caf0802126_executor(__struct_compiled_op_889e175e75159a3e61d065caf0802126* self) { | |
480 return self->run(); | |
481 } | |
482 | |
483 void __struct_compiled_op_889e175e75159a3e61d065caf0802126_destructor(void* executor, void* self) { | |
484 //printf("doing cleanup\n"); | |
485 //fflush(stdout); | |
486 // ((__struct_compiled_op_889e175e75159a3e61d065caf0802126*)self)->cleanup(); | |
487 // free(self); | |
488 delete ((__struct_compiled_op_889e175e75159a3e61d065caf0802126*)self); | |
489 //printf("done cleanup\n"); | |
490 //fflush(stdout); | |
491 } | |
492 | |
493 ////////////////////// | |
494 //// Functions | |
495 ////////////////////// | |
496 static PyObject * instantiate(PyObject * self, PyObject *argtuple) { | |
497 assert(PyTuple_Check(argtuple)); | |
498 if (5 != PyTuple_Size(argtuple)){ | |
499 PyErr_Format(PyExc_TypeError, "Wrong number of arguments, expected 5, got %i", (int)PyTuple_Size(argtuple)); | |
500 return NULL; | |
501 } | |
502 __struct_compiled_op_889e175e75159a3e61d065caf0802126* struct_ptr = new __struct_compiled_op_889e175e75159a3e61d065caf0802126(); | |
503 struct_ptr->init( PyTuple_GET_ITEM(argtuple, 0),PyTuple_GET_ITEM(argtuple, 1),PyTuple_GET_ITEM(argtuple, 2),PyTuple_GET_ITEM(argtuple, 3),PyTuple_GET_ITEM(argtuple, 4) ); | |
504 PyObject* thunk = PyCObject_FromVoidPtrAndDesc((void*)(&__struct_compiled_op_889e175e75159a3e61d065caf0802126_executor), struct_ptr, __struct_compiled_op_889e175e75159a3e61d065caf0802126_destructor); | |
505 return thunk; } | |
506 | |
507 ////////////////////// | |
508 //// Module init | |
509 ////////////////////// | |
510 static PyMethodDef MyMethods[] = { | |
511 {"instantiate", instantiate, METH_VARARGS, "undocumented"} , | |
512 {NULL, NULL, 0, NULL} | |
513 }; | |
514 PyMODINIT_FUNC init889e175e75159a3e61d065caf0802126(void){ | |
515 import_array(); | |
516 (void) Py_InitModule("889e175e75159a3e61d065caf0802126", MyMethods); | |
517 } | |
518 | |
=============================== | |
In file included from /usr/include/python2.7/Python.h:8:0, | |
from mod.cu:1: | |
/usr/include/python2.7/pyconfig.h:1161:0: warning: "_POSIX_C_SOURCE" redefined [enabled by default] | |
/usr/include/features.h:164:0: note: this is the location of the previous definition | |
/usr/include/python2.7/pyconfig.h:1183:0: warning: "_XOPEN_SOURCE" redefined [enabled by default] | |
/usr/include/features.h:166:0: note: this is the location of the previous definition | |
mod.cu:5:20: fatal error: /usr/local/cuda-5.5/include/curand.h: Permission denied | |
compilation terminated. | |
E1 #include <Python.h> | |
2 #include <iostream> | |
3 #include <numpy/arrayobject.h> | |
4 #include <math.h> | |
5 #include "curand.h" | |
6 #include <numpy/arrayscalars.h> | |
7 #include "cuda_ndarray.cuh" | |
8 ////////////////////// | |
9 //// Support Code | |
10 ////////////////////// | |
11 | |
12 | |
13 void free_generator(void *_gen) | |
14 { | |
15 curandGenerator_t * gen = (curandGenerator_t*)_gen; | |
16 curandStatus_t err = curandDestroyGenerator(*gen); | |
17 if (err != CURAND_STATUS_SUCCESS) | |
18 { | |
19 fprintf(stderr, "Failure (%%i) in destroying CURAND generator", | |
20 (int)err); | |
21 } | |
22 free(_gen); | |
23 } | |
24 | |
25 | |
26 struct __struct_compiled_op_889e175e75159a3e61d065caf0802126 { | |
27 PyObject* __ERROR; | |
28 | |
29 PyObject* storage_V3; | |
30 PyObject* storage_V5; | |
31 PyObject* storage_V7; | |
32 PyObject* storage_V1; | |
33 | |
34 | |
35 __struct_compiled_op_889e175e75159a3e61d065caf0802126() {} | |
36 ~__struct_compiled_op_889e175e75159a3e61d065caf0802126(void) { | |
37 cleanup(); | |
38 } | |
39 | |
40 int init(PyObject* __ERROR, PyObject* storage_V3, PyObject* storage_V5, PyObject* storage_V7, PyObject* storage_V1) { | |
41 Py_XINCREF(storage_V3); | |
42 Py_XINCREF(storage_V5); | |
43 Py_XINCREF(storage_V7); | |
44 Py_XINCREF(storage_V1); | |
45 this->storage_V3 = storage_V3; | |
46 this->storage_V5 = storage_V5; | |
47 this->storage_V7 = storage_V7; | |
48 this->storage_V1 = storage_V1; | |
49 int __failure = 0; | |
50 | |
51 { | |
52 | |
53 { | |
54 | |
55 { | |
56 | |
57 { | |
58 | |
59 this->__ERROR = __ERROR; | |
60 return 0; | |
61 __label_7: | |
62 | |
63 double __DUMMY_7; | |
64 | |
65 } | |
66 __label_5: | |
67 | |
68 double __DUMMY_5; | |
69 | |
70 } | |
71 __label_3: | |
72 | |
73 double __DUMMY_3; | |
74 | |
75 } | |
76 __label_1: | |
77 | |
78 double __DUMMY_1; | |
79 | |
80 } | |
81 | |
82 Py_XDECREF(this->storage_V3); | |
83 Py_XDECREF(this->storage_V5); | |
84 Py_XDECREF(this->storage_V7); | |
85 Py_XDECREF(this->storage_V1); | |
86 | |
87 if (__failure) { | |
88 // When there is a failure, this code puts the exception | |
89 // in __ERROR. | |
90 PyObject* err_type = NULL; | |
91 PyObject* err_msg = NULL; | |
92 PyObject* err_traceback = NULL; | |
93 PyErr_Fetch(&err_type, &err_msg, &err_traceback); | |
94 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);} | |
95 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);} | |
96 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);} | |
97 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0); | |
98 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1); | |
99 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2); | |
100 PyList_SET_ITEM(__ERROR, 0, err_type); | |
101 PyList_SET_ITEM(__ERROR, 1, err_msg); | |
102 PyList_SET_ITEM(__ERROR, 2, err_traceback); | |
103 {Py_XDECREF(old_err_type);} | |
104 {Py_XDECREF(old_err_msg);} | |
105 {Py_XDECREF(old_err_traceback);} | |
106 } | |
107 // The failure code is returned to index what code block failed. | |
108 return __failure; | |
109 | |
110 } | |
111 void cleanup(void) { | |
112 __label_1: | |
113 | |
114 double __DUMMY_1; | |
115 __label_3: | |
116 | |
117 double __DUMMY_3; | |
118 __label_5: | |
119 | |
120 double __DUMMY_5; | |
121 __label_7: | |
122 | |
123 double __DUMMY_7; | |
124 | |
125 Py_XDECREF(this->storage_V3); | |
126 Py_XDECREF(this->storage_V5); | |
127 Py_XDECREF(this->storage_V7); | |
128 Py_XDECREF(this->storage_V1); | |
129 } | |
130 int run(void) { | |
131 int __failure = 0; | |
132 | |
133 PyObject* py_V1; | |
134 CudaNdarray * V1; | |
135 PyObject* py_V3; | |
136 | |
137 PyObject* V3; | |
138 | |
139 PyObject* py_V5; | |
140 | |
141 PyArrayObject* V5; | |
142 int type_num_V5; | |
143 typedef npy_int32 dtype_V5; | |
144 | |
145 PyObject* py_V7; | |
146 | |
147 PyObject* V7; | |
148 | |
149 { | |
150 | |
151 py_V1 = PyList_GET_ITEM(storage_V1, 0); | |
152 {Py_XINCREF(py_V1);} | |
153 | |
154 if (py_V1 == Py_None) | |
155 { | |
156 V1 = NULL; | |
157 } | |
158 else | |
159 { | |
160 | |
161 assert(py_V1->ob_refcnt >= 2); // There should be at least one ref from the container object, | |
162 // and one ref from the local scope. | |
163 | |
164 if (CudaNdarray_Check(py_V1)) | |
165 { | |
166 //fprintf(stderr, "c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
167 V1 = (CudaNdarray*)py_V1; | |
168 //std::cerr << "c_extract " << V1 << '\n'; | |
169 if (V1->nd != 2) | |
170 { | |
171 PyErr_Format(PyExc_RuntimeError, | |
172 "c_extract: Some CudaNdarray has rank %i, it was supposed to have rank 2", | |
173 V1->nd); | |
174 V1 = NULL; | |
175 {__failure = 2; goto __label_2;}; | |
176 } | |
177 //std::cerr << "c_extract " << V1 << " nd check passed\n"; | |
178 | |
179 | |
180 assert(V1); | |
181 Py_INCREF(py_V1); | |
182 } | |
183 else if (py_V1 == Py_None) | |
184 { | |
185 PyErr_SetString(PyExc_TypeError, | |
186 "expected a CudaNdarray, not None"); | |
187 V1 = NULL; | |
188 {__failure = 2; goto __label_2;}; | |
189 } | |
190 else | |
191 { | |
192 //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
193 PyErr_SetString(PyExc_TypeError, "Argument not a CudaNdarray"); | |
194 V1 = NULL; | |
195 {__failure = 2; goto __label_2;}; | |
196 } | |
197 //std::cerr << "c_extract done " << V1 << '\n'; | |
198 | |
199 | |
200 } | |
201 | |
202 { | |
203 | |
204 py_V3 = PyList_GET_ITEM(storage_V3, 0); | |
205 {Py_XINCREF(py_V3);} | |
206 | |
207 Py_INCREF(py_V3); | |
208 V3 = py_V3; | |
209 | |
210 { | |
211 | |
212 py_V5 = PyList_GET_ITEM(storage_V5, 0); | |
213 {Py_XINCREF(py_V5);} | |
214 | |
215 V5 = NULL; | |
216 if (py_V5 == Py_None) { | |
217 // We can either fail here or set V5 to NULL and rely on Ops | |
218 // using tensors to handle the NULL case, but if they fail to do so | |
219 // they'll end up with nasty segfaults, so this is public service. | |
220 PyErr_SetString(PyExc_ValueError, "expected an ndarray, not None"); | |
221 {__failure = 6; goto __label_6;} | |
222 } | |
223 if (!PyArray_Check(py_V5)) { | |
224 PyErr_SetString(PyExc_ValueError, "expected an ndarray"); | |
225 {__failure = 6; goto __label_6;} | |
226 } | |
227 // We expect NPY_INT32 | |
228 type_num_V5 = ((PyArrayObject*)py_V5)->descr->type_num; | |
229 if (!PyArray_ISALIGNED(py_V5)) { | |
230 PyErr_Format(PyExc_NotImplementedError, | |
231 "expected an aligned array of type %d " | |
232 "(NPY_INT32), got non-aligned array of type %d" | |
233 " with %d dimensions, with 3 last dims %d, %d, %d" | |
234 " and 3 last strides %d %d, %d.", | |
235 NPY_INT32, type_num_V5, | |
236 PyArray_NDIM(py_V5), | |
237 PyArray_NDIM(py_V5) >= 3 ? | |
238 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-3] : -1, | |
239 PyArray_NDIM(py_V5) >= 2 ? | |
240 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-2] : -1, | |
241 PyArray_NDIM(py_V5) >= 1 ? | |
242 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-1] : -1, | |
243 PyArray_NDIM(py_V5) >= 2 ? | |
244 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-3] : -1, | |
245 PyArray_NDIM(py_V5) >= 3 ? | |
246 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-2] : -1, | |
247 PyArray_NDIM(py_V5) >= 1 ? | |
248 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-1] : -1 | |
249 ); | |
250 {__failure = 6; goto __label_6;} | |
251 } | |
252 // This is a TypeError to be consistent with DEBUG_MODE | |
253 // Note: DEBUG_MODE also tells the name of the container | |
254 if (type_num_V5 != NPY_INT32) { | |
255 PyErr_Format(PyExc_TypeError, | |
256 "expected type_num %d (NPY_INT32) got %d", | |
257 NPY_INT32, type_num_V5); | |
258 {__failure = 6; goto __label_6;} | |
259 } | |
260 V5 = (PyArrayObject*)(py_V5); | |
261 Py_XINCREF(V5); | |
262 | |
263 { | |
264 | |
265 py_V7 = Py_None; | |
266 {Py_XINCREF(py_V7);} | |
267 | |
268 V7 = NULL; | |
269 | |
270 { | |
271 | |
272 //////// <code generated by CURAND_Base> | |
273 | |
274 int odims[2]; | |
275 int n_elements = 1; | |
276 int must_alloc_sample = ((NULL == V1) | |
277 || !CudaNdarray_Check(py_V1) | |
278 || (V1->nd != 2)); | |
279 | |
280 if (V5->nd != 1) | |
281 { | |
282 PyErr_SetString(PyExc_ValueError, "size must be vector"); | |
283 {__failure = 9; goto __label_9;} | |
284 } | |
285 if (V5->dimensions[0] != 2) | |
286 { | |
287 PyErr_Format(PyExc_ValueError, "size must have length %i (not %i)", | |
288 2, V5->dimensions[0]); | |
289 {__failure = 9; goto __label_9;} | |
290 } | |
291 if (PyArray_DESCR(V5)->type_num != NPY_INT32) | |
292 { | |
293 PyErr_SetString(PyExc_ValueError, "size must be int32"); | |
294 {__failure = 9; goto __label_9;} | |
295 } | |
296 for (int i = 0; i < 2; ++i) | |
297 { | |
298 odims[i] = ((npy_int32*)(V5->data + V5->strides[0] * i))[0]; | |
299 n_elements *= odims[i]; | |
300 must_alloc_sample = (must_alloc_sample | |
301 || CudaNdarray_HOST_DIMS(V1)[i] != odims[i]); | |
302 } | |
303 if (must_alloc_sample) | |
304 { | |
305 Py_XDECREF(V1); | |
306 V1 = (CudaNdarray*)CudaNdarray_NewDims(2, odims); | |
307 if(!V1) | |
308 { | |
309 {__failure = 9; goto __label_9;}; | |
310 } | |
311 } | |
312 if (!PyCObject_Check(V3)) | |
313 { | |
314 // allocate a new generator for o_generator | |
315 Py_XDECREF(V7); | |
316 curandGenerator_t * gen = (curandGenerator_t*)malloc(sizeof(curandGenerator_t)); | |
317 assert(gen); | |
318 if (CURAND_STATUS_SUCCESS != | |
319 curandCreateGenerator(gen, CURAND_RNG_PSEUDO_DEFAULT)) { | |
320 PyErr_Format(PyExc_RuntimeError, "Failed to initialize curand generator"); | |
321 {__failure = 9; goto __label_9;}; | |
322 } | |
323 if (CURAND_STATUS_SUCCESS != | |
324 curandSetPseudoRandomGeneratorSeed(*gen,234)) | |
325 { | |
326 PyErr_Format(PyExc_RuntimeError, "Failed to set curand generator seed"); | |
327 {__failure = 9; goto __label_9;}; | |
328 } | |
329 V7 = PyCObject_FromVoidPtr(gen, &free_generator); | |
330 assert (V3 == Py_False); | |
331 } | |
332 else if (1) | |
333 { | |
334 // use i_generator for o_generator | |
335 Py_XDECREF(V7); | |
336 Py_INCREF(V3); | |
337 V7 = V3; | |
338 } | |
339 else | |
340 { | |
341 // copy i_generator for o_generator | |
342 PyErr_Format(PyExc_NotImplementedError, "non-destructive CURAND generation"); | |
343 {__failure = 9; goto __label_9;}; | |
344 } | |
345 { | |
346 curandGenerator_t * gen = (curandGenerator_t*)PyCObject_AsVoidPtr(V7); | |
347 curandStatus_t err = curandGenerateNormal(*gen, | |
348 CudaNdarray_DEV_DATA(V1), | |
349 n_elements, | |
350 0.0, 1.0); | |
351 | |
352 | |
353 if (err != CURAND_STATUS_SUCCESS) | |
354 { | |
355 PyErr_Format(PyExc_RuntimeError, "curand error generating random normals %i", (int)err); | |
356 {__failure = 9; goto __label_9;}; | |
357 } | |
358 cudaThreadSynchronize(); | |
359 } | |
360 //////// </ code generated by CURAND_Base> | |
361 __label_9: | |
362 | |
363 double __DUMMY_9; | |
364 | |
365 } | |
366 __label_8: | |
367 | |
368 if (!__failure) { | |
369 | |
370 assert(py_V7->ob_refcnt > 1); | |
371 Py_DECREF(py_V7); | |
372 py_V7 = V7 ? V7 : Py_None; | |
373 Py_INCREF(py_V7); | |
374 | |
375 PyObject* old = PyList_GET_ITEM(storage_V7, 0); | |
376 {Py_XINCREF(py_V7);} | |
377 PyList_SET_ITEM(storage_V7, 0, py_V7); | |
378 {Py_XDECREF(old);} | |
379 } | |
380 | |
381 Py_XDECREF(V7); | |
382 | |
383 {Py_XDECREF(py_V7);} | |
384 | |
385 double __DUMMY_8; | |
386 | |
387 } | |
388 __label_6: | |
389 | |
390 if (V5) { | |
391 Py_XDECREF(V5); | |
392 } | |
393 | |
394 {Py_XDECREF(py_V5);} | |
395 | |
396 double __DUMMY_6; | |
397 | |
398 } | |
399 __label_4: | |
400 | |
401 Py_XDECREF(V3); | |
402 | |
403 {Py_XDECREF(py_V3);} | |
404 | |
405 double __DUMMY_4; | |
406 | |
407 } | |
408 __label_2: | |
409 | |
410 if (!__failure) { | |
411 | |
412 //std::cerr << "sync\n"; | |
413 if (NULL == V1) { | |
414 // failure: sync None to storage | |
415 Py_XDECREF(py_V1); | |
416 py_V1 = Py_None; | |
417 Py_INCREF(py_V1); | |
418 } | |
419 else | |
420 { | |
421 if (py_V1 != (PyObject*)V1) | |
422 { | |
423 Py_XDECREF(py_V1); | |
424 py_V1 = (PyObject*)V1; | |
425 Py_INCREF(py_V1); | |
426 } | |
427 assert(py_V1->ob_refcnt); | |
428 } | |
429 | |
430 PyObject* old = PyList_GET_ITEM(storage_V1, 0); | |
431 {Py_XINCREF(py_V1);} | |
432 PyList_SET_ITEM(storage_V1, 0, py_V1); | |
433 {Py_XDECREF(old);} | |
434 } | |
435 | |
436 //std::cerr << "cleanup " << py_V1 << " " << V1 << "\n"; | |
437 //fprintf(stderr, "c_cleanup CNDA py_object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
438 if (V1) | |
439 { | |
440 //fprintf(stderr, "c_cleanup CNDA cn_object w refcnt %p %i\n", V1, (V1->ob_refcnt)); | |
441 Py_XDECREF(V1); | |
442 } | |
443 //std::cerr << "cleanup done" << py_V1 << "\n"; | |
444 | |
445 {Py_XDECREF(py_V1);} | |
446 | |
447 double __DUMMY_2; | |
448 | |
449 } | |
450 | |
451 | |
452 if (__failure) { | |
453 // When there is a failure, this code puts the exception | |
454 // in __ERROR. | |
455 PyObject* err_type = NULL; | |
456 PyObject* err_msg = NULL; | |
457 PyObject* err_traceback = NULL; | |
458 PyErr_Fetch(&err_type, &err_msg, &err_traceback); | |
459 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);} | |
460 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);} | |
461 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);} | |
462 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0); | |
463 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1); | |
464 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2); | |
465 PyList_SET_ITEM(__ERROR, 0, err_type); | |
466 PyList_SET_ITEM(__ERROR, 1, err_msg); | |
467 PyList_SET_ITEM(__ERROR, 2, err_traceback); | |
468 {Py_XDECREF(old_err_type);} | |
469 {Py_XDECREF(old_err_msg);} | |
470 {Py_XDECREF(old_err_traceback);} | |
471 } | |
472 // The failure code is returned to index what code block failed. | |
473 return __failure; | |
474 | |
475 } | |
476 }; | |
477 | |
478 | |
479 int __struct_compiled_op_889e175e75159a3e61d065caf0802126_executor(__struct_compiled_op_889e175e75159a3e61d065caf0802126* self) { | |
480 return self->run(); | |
481 } | |
482 | |
483 void __struct_compiled_op_889e175e75159a3e61d065caf0802126_destructor(void* executor, void* self) { | |
484 //printf("doing cleanup\n"); | |
485 //fflush(stdout); | |
486 // ((__struct_compiled_op_889e175e75159a3e61d065caf0802126*)self)->cleanup(); | |
487 // free(self); | |
488 delete ((__struct_compiled_op_889e175e75159a3e61d065caf0802126*)self); | |
489 //printf("done cleanup\n"); | |
490 //fflush(stdout); | |
491 } | |
492 | |
493 ////////////////////// | |
494 //// Functions | |
495 ////////////////////// | |
496 static PyObject * instantiate(PyObject * self, PyObject *argtuple) { | |
497 assert(PyTuple_Check(argtuple)); | |
498 if (5 != PyTuple_Size(argtuple)){ | |
499 PyErr_Format(PyExc_TypeError, "Wrong number of arguments, expected 5, got %i", (int)PyTuple_Size(argtuple)); | |
500 return NULL; | |
501 } | |
502 __struct_compiled_op_889e175e75159a3e61d065caf0802126* struct_ptr = new __struct_compiled_op_889e175e75159a3e61d065caf0802126(); | |
503 struct_ptr->init( PyTuple_GET_ITEM(argtuple, 0),PyTuple_GET_ITEM(argtuple, 1),PyTuple_GET_ITEM(argtuple, 2),PyTuple_GET_ITEM(argtuple, 3),PyTuple_GET_ITEM(argtuple, 4) ); | |
504 PyObject* thunk = PyCObject_FromVoidPtrAndDesc((void*)(&__struct_compiled_op_889e175e75159a3e61d065caf0802126_executor), struct_ptr, __struct_compiled_op_889e175e75159a3e61d065caf0802126_destructor); | |
505 return thunk; } | |
506 | |
507 ////////////////////// | |
508 //// Module init | |
509 ////////////////////// | |
510 static PyMethodDef MyMethods[] = { | |
511 {"instantiate", instantiate, METH_VARARGS, "undocumented"} , | |
512 {NULL, NULL, 0, NULL} | |
513 }; | |
514 PyMODINIT_FUNC init889e175e75159a3e61d065caf0802126(void){ | |
515 import_array(); | |
516 (void) Py_InitModule("889e175e75159a3e61d065caf0802126", MyMethods); | |
517 } | |
518 | |
=============================== | |
In file included from /usr/include/python2.7/Python.h:8:0, | |
from mod.cu:1: | |
/usr/include/python2.7/pyconfig.h:1161:0: warning: "_POSIX_C_SOURCE" redefined [enabled by default] | |
/usr/include/features.h:164:0: note: this is the location of the previous definition | |
/usr/include/python2.7/pyconfig.h:1183:0: warning: "_XOPEN_SOURCE" redefined [enabled by default] | |
/usr/include/features.h:166:0: note: this is the location of the previous definition | |
mod.cu:5:20: fatal error: /usr/local/cuda-5.5/include/curand.h: Permission denied | |
compilation terminated. | |
E1 #include <Python.h> | |
2 #include <iostream> | |
3 #include <numpy/arrayobject.h> | |
4 #include <math.h> | |
5 #include "curand.h" | |
6 #include <numpy/arrayscalars.h> | |
7 #include "cuda_ndarray.cuh" | |
8 ////////////////////// | |
9 //// Support Code | |
10 ////////////////////// | |
11 | |
12 | |
13 void free_generator(void *_gen) | |
14 { | |
15 curandGenerator_t * gen = (curandGenerator_t*)_gen; | |
16 curandStatus_t err = curandDestroyGenerator(*gen); | |
17 if (err != CURAND_STATUS_SUCCESS) | |
18 { | |
19 fprintf(stderr, "Failure (%%i) in destroying CURAND generator", | |
20 (int)err); | |
21 } | |
22 free(_gen); | |
23 } | |
24 | |
25 | |
26 struct __struct_compiled_op_889e175e75159a3e61d065caf0802126 { | |
27 PyObject* __ERROR; | |
28 | |
29 PyObject* storage_V3; | |
30 PyObject* storage_V5; | |
31 PyObject* storage_V7; | |
32 PyObject* storage_V1; | |
33 | |
34 | |
35 __struct_compiled_op_889e175e75159a3e61d065caf0802126() {} | |
36 ~__struct_compiled_op_889e175e75159a3e61d065caf0802126(void) { | |
37 cleanup(); | |
38 } | |
39 | |
40 int init(PyObject* __ERROR, PyObject* storage_V3, PyObject* storage_V5, PyObject* storage_V7, PyObject* storage_V1) { | |
41 Py_XINCREF(storage_V3); | |
42 Py_XINCREF(storage_V5); | |
43 Py_XINCREF(storage_V7); | |
44 Py_XINCREF(storage_V1); | |
45 this->storage_V3 = storage_V3; | |
46 this->storage_V5 = storage_V5; | |
47 this->storage_V7 = storage_V7; | |
48 this->storage_V1 = storage_V1; | |
49 int __failure = 0; | |
50 | |
51 { | |
52 | |
53 { | |
54 | |
55 { | |
56 | |
57 { | |
58 | |
59 this->__ERROR = __ERROR; | |
60 return 0; | |
61 __label_7: | |
62 | |
63 double __DUMMY_7; | |
64 | |
65 } | |
66 __label_5: | |
67 | |
68 double __DUMMY_5; | |
69 | |
70 } | |
71 __label_3: | |
72 | |
73 double __DUMMY_3; | |
74 | |
75 } | |
76 __label_1: | |
77 | |
78 double __DUMMY_1; | |
79 | |
80 } | |
81 | |
82 Py_XDECREF(this->storage_V3); | |
83 Py_XDECREF(this->storage_V5); | |
84 Py_XDECREF(this->storage_V7); | |
85 Py_XDECREF(this->storage_V1); | |
86 | |
87 if (__failure) { | |
88 // When there is a failure, this code puts the exception | |
89 // in __ERROR. | |
90 PyObject* err_type = NULL; | |
91 PyObject* err_msg = NULL; | |
92 PyObject* err_traceback = NULL; | |
93 PyErr_Fetch(&err_type, &err_msg, &err_traceback); | |
94 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);} | |
95 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);} | |
96 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);} | |
97 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0); | |
98 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1); | |
99 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2); | |
100 PyList_SET_ITEM(__ERROR, 0, err_type); | |
101 PyList_SET_ITEM(__ERROR, 1, err_msg); | |
102 PyList_SET_ITEM(__ERROR, 2, err_traceback); | |
103 {Py_XDECREF(old_err_type);} | |
104 {Py_XDECREF(old_err_msg);} | |
105 {Py_XDECREF(old_err_traceback);} | |
106 } | |
107 // The failure code is returned to index what code block failed. | |
108 return __failure; | |
109 | |
110 } | |
111 void cleanup(void) { | |
112 __label_1: | |
113 | |
114 double __DUMMY_1; | |
115 __label_3: | |
116 | |
117 double __DUMMY_3; | |
118 __label_5: | |
119 | |
120 double __DUMMY_5; | |
121 __label_7: | |
122 | |
123 double __DUMMY_7; | |
124 | |
125 Py_XDECREF(this->storage_V3); | |
126 Py_XDECREF(this->storage_V5); | |
127 Py_XDECREF(this->storage_V7); | |
128 Py_XDECREF(this->storage_V1); | |
129 } | |
130 int run(void) { | |
131 int __failure = 0; | |
132 | |
133 PyObject* py_V1; | |
134 CudaNdarray * V1; | |
135 PyObject* py_V3; | |
136 | |
137 PyObject* V3; | |
138 | |
139 PyObject* py_V5; | |
140 | |
141 PyArrayObject* V5; | |
142 int type_num_V5; | |
143 typedef npy_int32 dtype_V5; | |
144 | |
145 PyObject* py_V7; | |
146 | |
147 PyObject* V7; | |
148 | |
149 { | |
150 | |
151 py_V1 = PyList_GET_ITEM(storage_V1, 0); | |
152 {Py_XINCREF(py_V1);} | |
153 | |
154 if (py_V1 == Py_None) | |
155 { | |
156 V1 = NULL; | |
157 } | |
158 else | |
159 { | |
160 | |
161 assert(py_V1->ob_refcnt >= 2); // There should be at least one ref from the container object, | |
162 // and one ref from the local scope. | |
163 | |
164 if (CudaNdarray_Check(py_V1)) | |
165 { | |
166 //fprintf(stderr, "c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
167 V1 = (CudaNdarray*)py_V1; | |
168 //std::cerr << "c_extract " << V1 << '\n'; | |
169 if (V1->nd != 2) | |
170 { | |
171 PyErr_Format(PyExc_RuntimeError, | |
172 "c_extract: Some CudaNdarray has rank %i, it was supposed to have rank 2", | |
173 V1->nd); | |
174 V1 = NULL; | |
175 {__failure = 2; goto __label_2;}; | |
176 } | |
177 //std::cerr << "c_extract " << V1 << " nd check passed\n"; | |
178 | |
179 | |
180 assert(V1); | |
181 Py_INCREF(py_V1); | |
182 } | |
183 else if (py_V1 == Py_None) | |
184 { | |
185 PyErr_SetString(PyExc_TypeError, | |
186 "expected a CudaNdarray, not None"); | |
187 V1 = NULL; | |
188 {__failure = 2; goto __label_2;}; | |
189 } | |
190 else | |
191 { | |
192 //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
193 PyErr_SetString(PyExc_TypeError, "Argument not a CudaNdarray"); | |
194 V1 = NULL; | |
195 {__failure = 2; goto __label_2;}; | |
196 } | |
197 //std::cerr << "c_extract done " << V1 << '\n'; | |
198 | |
199 | |
200 } | |
201 | |
202 { | |
203 | |
204 py_V3 = PyList_GET_ITEM(storage_V3, 0); | |
205 {Py_XINCREF(py_V3);} | |
206 | |
207 Py_INCREF(py_V3); | |
208 V3 = py_V3; | |
209 | |
210 { | |
211 | |
212 py_V5 = PyList_GET_ITEM(storage_V5, 0); | |
213 {Py_XINCREF(py_V5);} | |
214 | |
215 V5 = NULL; | |
216 if (py_V5 == Py_None) { | |
217 // We can either fail here or set V5 to NULL and rely on Ops | |
218 // using tensors to handle the NULL case, but if they fail to do so | |
219 // they'll end up with nasty segfaults, so this is public service. | |
220 PyErr_SetString(PyExc_ValueError, "expected an ndarray, not None"); | |
221 {__failure = 6; goto __label_6;} | |
222 } | |
223 if (!PyArray_Check(py_V5)) { | |
224 PyErr_SetString(PyExc_ValueError, "expected an ndarray"); | |
225 {__failure = 6; goto __label_6;} | |
226 } | |
227 // We expect NPY_INT32 | |
228 type_num_V5 = ((PyArrayObject*)py_V5)->descr->type_num; | |
229 if (!PyArray_ISALIGNED(py_V5)) { | |
230 PyErr_Format(PyExc_NotImplementedError, | |
231 "expected an aligned array of type %d " | |
232 "(NPY_INT32), got non-aligned array of type %d" | |
233 " with %d dimensions, with 3 last dims %d, %d, %d" | |
234 " and 3 last strides %d %d, %d.", | |
235 NPY_INT32, type_num_V5, | |
236 PyArray_NDIM(py_V5), | |
237 PyArray_NDIM(py_V5) >= 3 ? | |
238 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-3] : -1, | |
239 PyArray_NDIM(py_V5) >= 2 ? | |
240 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-2] : -1, | |
241 PyArray_NDIM(py_V5) >= 1 ? | |
242 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-1] : -1, | |
243 PyArray_NDIM(py_V5) >= 2 ? | |
244 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-3] : -1, | |
245 PyArray_NDIM(py_V5) >= 3 ? | |
246 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-2] : -1, | |
247 PyArray_NDIM(py_V5) >= 1 ? | |
248 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-1] : -1 | |
249 ); | |
250 {__failure = 6; goto __label_6;} | |
251 } | |
252 // This is a TypeError to be consistent with DEBUG_MODE | |
253 // Note: DEBUG_MODE also tells the name of the container | |
254 if (type_num_V5 != NPY_INT32) { | |
255 PyErr_Format(PyExc_TypeError, | |
256 "expected type_num %d (NPY_INT32) got %d", | |
257 NPY_INT32, type_num_V5); | |
258 {__failure = 6; goto __label_6;} | |
259 } | |
260 V5 = (PyArrayObject*)(py_V5); | |
261 Py_XINCREF(V5); | |
262 | |
263 { | |
264 | |
265 py_V7 = Py_None; | |
266 {Py_XINCREF(py_V7);} | |
267 | |
268 V7 = NULL; | |
269 | |
270 { | |
271 | |
272 //////// <code generated by CURAND_Base> | |
273 | |
274 int odims[2]; | |
275 int n_elements = 1; | |
276 int must_alloc_sample = ((NULL == V1) | |
277 || !CudaNdarray_Check(py_V1) | |
278 || (V1->nd != 2)); | |
279 | |
280 if (V5->nd != 1) | |
281 { | |
282 PyErr_SetString(PyExc_ValueError, "size must be vector"); | |
283 {__failure = 9; goto __label_9;} | |
284 } | |
285 if (V5->dimensions[0] != 2) | |
286 { | |
287 PyErr_Format(PyExc_ValueError, "size must have length %i (not %i)", | |
288 2, V5->dimensions[0]); | |
289 {__failure = 9; goto __label_9;} | |
290 } | |
291 if (PyArray_DESCR(V5)->type_num != NPY_INT32) | |
292 { | |
293 PyErr_SetString(PyExc_ValueError, "size must be int32"); | |
294 {__failure = 9; goto __label_9;} | |
295 } | |
296 for (int i = 0; i < 2; ++i) | |
297 { | |
298 odims[i] = ((npy_int32*)(V5->data + V5->strides[0] * i))[0]; | |
299 n_elements *= odims[i]; | |
300 must_alloc_sample = (must_alloc_sample | |
301 || CudaNdarray_HOST_DIMS(V1)[i] != odims[i]); | |
302 } | |
303 if (must_alloc_sample) | |
304 { | |
305 Py_XDECREF(V1); | |
306 V1 = (CudaNdarray*)CudaNdarray_NewDims(2, odims); | |
307 if(!V1) | |
308 { | |
309 {__failure = 9; goto __label_9;}; | |
310 } | |
311 } | |
312 if (!PyCObject_Check(V3)) | |
313 { | |
314 // allocate a new generator for o_generator | |
315 Py_XDECREF(V7); | |
316 curandGenerator_t * gen = (curandGenerator_t*)malloc(sizeof(curandGenerator_t)); | |
317 assert(gen); | |
318 if (CURAND_STATUS_SUCCESS != | |
319 curandCreateGenerator(gen, CURAND_RNG_PSEUDO_DEFAULT)) { | |
320 PyErr_Format(PyExc_RuntimeError, "Failed to initialize curand generator"); | |
321 {__failure = 9; goto __label_9;}; | |
322 } | |
323 if (CURAND_STATUS_SUCCESS != | |
324 curandSetPseudoRandomGeneratorSeed(*gen,234)) | |
325 { | |
326 PyErr_Format(PyExc_RuntimeError, "Failed to set curand generator seed"); | |
327 {__failure = 9; goto __label_9;}; | |
328 } | |
329 V7 = PyCObject_FromVoidPtr(gen, &free_generator); | |
330 assert (V3 == Py_False); | |
331 } | |
332 else if (1) | |
333 { | |
334 // use i_generator for o_generator | |
335 Py_XDECREF(V7); | |
336 Py_INCREF(V3); | |
337 V7 = V3; | |
338 } | |
339 else | |
340 { | |
341 // copy i_generator for o_generator | |
342 PyErr_Format(PyExc_NotImplementedError, "non-destructive CURAND generation"); | |
343 {__failure = 9; goto __label_9;}; | |
344 } | |
345 { | |
346 curandGenerator_t * gen = (curandGenerator_t*)PyCObject_AsVoidPtr(V7); | |
347 curandStatus_t err = curandGenerateNormal(*gen, | |
348 CudaNdarray_DEV_DATA(V1), | |
349 n_elements, | |
350 0.0, 1.0); | |
351 | |
352 | |
353 if (err != CURAND_STATUS_SUCCESS) | |
354 { | |
355 PyErr_Format(PyExc_RuntimeError, "curand error generating random normals %i", (int)err); | |
356 {__failure = 9; goto __label_9;}; | |
357 } | |
358 cudaThreadSynchronize(); | |
359 } | |
360 //////// </ code generated by CURAND_Base> | |
361 __label_9: | |
362 | |
363 double __DUMMY_9; | |
364 | |
365 } | |
366 __label_8: | |
367 | |
368 if (!__failure) { | |
369 | |
370 assert(py_V7->ob_refcnt > 1); | |
371 Py_DECREF(py_V7); | |
372 py_V7 = V7 ? V7 : Py_None; | |
373 Py_INCREF(py_V7); | |
374 | |
375 PyObject* old = PyList_GET_ITEM(storage_V7, 0); | |
376 {Py_XINCREF(py_V7);} | |
377 PyList_SET_ITEM(storage_V7, 0, py_V7); | |
378 {Py_XDECREF(old);} | |
379 } | |
380 | |
381 Py_XDECREF(V7); | |
382 | |
383 {Py_XDECREF(py_V7);} | |
384 | |
385 double __DUMMY_8; | |
386 | |
387 } | |
388 __label_6: | |
389 | |
390 if (V5) { | |
391 Py_XDECREF(V5); | |
392 } | |
393 | |
394 {Py_XDECREF(py_V5);} | |
395 | |
396 double __DUMMY_6; | |
397 | |
398 } | |
399 __label_4: | |
400 | |
401 Py_XDECREF(V3); | |
402 | |
403 {Py_XDECREF(py_V3);} | |
404 | |
405 double __DUMMY_4; | |
406 | |
407 } | |
408 __label_2: | |
409 | |
410 if (!__failure) { | |
411 | |
412 //std::cerr << "sync\n"; | |
413 if (NULL == V1) { | |
414 // failure: sync None to storage | |
415 Py_XDECREF(py_V1); | |
416 py_V1 = Py_None; | |
417 Py_INCREF(py_V1); | |
418 } | |
419 else | |
420 { | |
421 if (py_V1 != (PyObject*)V1) | |
422 { | |
423 Py_XDECREF(py_V1); | |
424 py_V1 = (PyObject*)V1; | |
425 Py_INCREF(py_V1); | |
426 } | |
427 assert(py_V1->ob_refcnt); | |
428 } | |
429 | |
430 PyObject* old = PyList_GET_ITEM(storage_V1, 0); | |
431 {Py_XINCREF(py_V1);} | |
432 PyList_SET_ITEM(storage_V1, 0, py_V1); | |
433 {Py_XDECREF(old);} | |
434 } | |
435 | |
436 //std::cerr << "cleanup " << py_V1 << " " << V1 << "\n"; | |
437 //fprintf(stderr, "c_cleanup CNDA py_object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
438 if (V1) | |
439 { | |
440 //fprintf(stderr, "c_cleanup CNDA cn_object w refcnt %p %i\n", V1, (V1->ob_refcnt)); | |
441 Py_XDECREF(V1); | |
442 } | |
443 //std::cerr << "cleanup done" << py_V1 << "\n"; | |
444 | |
445 {Py_XDECREF(py_V1);} | |
446 | |
447 double __DUMMY_2; | |
448 | |
449 } | |
450 | |
451 | |
452 if (__failure) { | |
453 // When there is a failure, this code puts the exception | |
454 // in __ERROR. | |
455 PyObject* err_type = NULL; | |
456 PyObject* err_msg = NULL; | |
457 PyObject* err_traceback = NULL; | |
458 PyErr_Fetch(&err_type, &err_msg, &err_traceback); | |
459 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);} | |
460 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);} | |
461 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);} | |
462 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0); | |
463 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1); | |
464 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2); | |
465 PyList_SET_ITEM(__ERROR, 0, err_type); | |
466 PyList_SET_ITEM(__ERROR, 1, err_msg); | |
467 PyList_SET_ITEM(__ERROR, 2, err_traceback); | |
468 {Py_XDECREF(old_err_type);} | |
469 {Py_XDECREF(old_err_msg);} | |
470 {Py_XDECREF(old_err_traceback);} | |
471 } | |
472 // The failure code is returned to index what code block failed. | |
473 return __failure; | |
474 | |
475 } | |
476 }; | |
477 | |
478 | |
479 int __struct_compiled_op_889e175e75159a3e61d065caf0802126_executor(__struct_compiled_op_889e175e75159a3e61d065caf0802126* self) { | |
480 return self->run(); | |
481 } | |
482 | |
483 void __struct_compiled_op_889e175e75159a3e61d065caf0802126_destructor(void* executor, void* self) { | |
484 //printf("doing cleanup\n"); | |
485 //fflush(stdout); | |
486 // ((__struct_compiled_op_889e175e75159a3e61d065caf0802126*)self)->cleanup(); | |
487 // free(self); | |
488 delete ((__struct_compiled_op_889e175e75159a3e61d065caf0802126*)self); | |
489 //printf("done cleanup\n"); | |
490 //fflush(stdout); | |
491 } | |
492 | |
493 ////////////////////// | |
494 //// Functions | |
495 ////////////////////// | |
496 static PyObject * instantiate(PyObject * self, PyObject *argtuple) { | |
497 assert(PyTuple_Check(argtuple)); | |
498 if (5 != PyTuple_Size(argtuple)){ | |
499 PyErr_Format(PyExc_TypeError, "Wrong number of arguments, expected 5, got %i", (int)PyTuple_Size(argtuple)); | |
500 return NULL; | |
501 } | |
502 __struct_compiled_op_889e175e75159a3e61d065caf0802126* struct_ptr = new __struct_compiled_op_889e175e75159a3e61d065caf0802126(); | |
503 struct_ptr->init( PyTuple_GET_ITEM(argtuple, 0),PyTuple_GET_ITEM(argtuple, 1),PyTuple_GET_ITEM(argtuple, 2),PyTuple_GET_ITEM(argtuple, 3),PyTuple_GET_ITEM(argtuple, 4) ); | |
504 PyObject* thunk = PyCObject_FromVoidPtrAndDesc((void*)(&__struct_compiled_op_889e175e75159a3e61d065caf0802126_executor), struct_ptr, __struct_compiled_op_889e175e75159a3e61d065caf0802126_destructor); | |
505 return thunk; } | |
506 | |
507 ////////////////////// | |
508 //// Module init | |
509 ////////////////////// | |
510 static PyMethodDef MyMethods[] = { | |
511 {"instantiate", instantiate, METH_VARARGS, "undocumented"} , | |
512 {NULL, NULL, 0, NULL} | |
513 }; | |
514 PyMODINIT_FUNC init889e175e75159a3e61d065caf0802126(void){ | |
515 import_array(); | |
516 (void) Py_InitModule("889e175e75159a3e61d065caf0802126", MyMethods); | |
517 } | |
518 | |
=============================== | |
In file included from /usr/include/python2.7/Python.h:8:0, | |
from mod.cu:1: | |
/usr/include/python2.7/pyconfig.h:1161:0: warning: "_POSIX_C_SOURCE" redefined [enabled by default] | |
/usr/include/features.h:164:0: note: this is the location of the previous definition | |
/usr/include/python2.7/pyconfig.h:1183:0: warning: "_XOPEN_SOURCE" redefined [enabled by default] | |
/usr/include/features.h:166:0: note: this is the location of the previous definition | |
mod.cu:5:20: fatal error: /usr/local/cuda-5.5/include/curand.h: Permission denied | |
compilation terminated. | |
E..............KK....................................................WARNING (theano.gof.cmodule): Cache leak due to unpickle-able key data set([(((3, (4,), (4,), (4,), (4,), (4,)), (10, '1.6.1'), (10, '1.6.1'), (10, '1.6.1'), (10, '1.6.1'), (10, '1.6.1'), (10, '1.6.1')), ('CLinker.cmodule_key', ('-D NPY_ARRAY_ALIGNED=NPY_ALIGNED', '-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS', '-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY', '-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS', '-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL', '-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE', '-O3', '-Wno-unused-label', '-Wno-unused-variable', '-Wno-write-strings', '-fno-math-errno'), (), (), 'NPY_ABI_VERSION=0x1000009', 'c_compiler_str=g++ 4.6', 'md5:0c1bf1caaa6b5b7fa8a3374b9014ef6e', (<theano.scalar.basic.Composite object at 0x1224add0>, ((Scalar(float64), ((-1, 0), False)), (Scalar(float64), ((-1, 1), False)), (Scalar(float64), ((-1, 2), False))), (1, (False, False, False)))))]) | |
.WARNING (theano.gof.cmodule): Cache leak due to unpickle-able key data set([(((3, (4,), (4,), (4,)), (10, '1.6.1'), (10, '1.6.1'), (10, '1.6.1')), ('CLinker.cmodule_key', ('-D NPY_ARRAY_ALIGNED=NPY_ALIGNED', '-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS', '-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY', '-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS', '-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL', '-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE', '-O3', '-Wno-unused-label', '-Wno-unused-variable', '-Wno-write-strings', '-fno-math-errno'), (), (), 'NPY_ABI_VERSION=0x1000009', 'c_compiler_str=g++ 4.6', 'md5:0c1bf1caaa6b5b7fa8a3374b9014ef6e', (<theano.scalar.basic.Composite object at 0x125546d0>, ((Scalar(float64), ((-1, 0), False)), (Scalar(float64), ((-1, 1), False))), (1, (False,)))))]) | |
.WARNING (theano.gof.cmodule): Cache leak due to unpickle-able key data set([(((3, (4,), (4,), (4,)), (10, '1.6.1'), (10, '1.6.1'), (10, '1.6.1')), ('CLinker.cmodule_key', ('-D NPY_ARRAY_ALIGNED=NPY_ALIGNED', '-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS', '-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY', '-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS', '-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL', '-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE', '-O3', '-Wno-unused-label', '-Wno-unused-variable', '-Wno-write-strings', '-fno-math-errno'), (), (), 'NPY_ABI_VERSION=0x1000009', 'c_compiler_str=g++ 4.6', 'md5:0c1bf1caaa6b5b7fa8a3374b9014ef6e', (<theano.scalar.basic.Composite object at 0x11c8c990>, ((Scalar(float64), ((-1, 0), False)), (Scalar(float64), ((-1, 1), False))), (1, (False,)))))]) | |
.............................../usr/lib/python2.7/dist-packages/scipy/sparse/data.py:54: ComplexWarning: Casting complex values to real discards the imaginary part | |
return self._with_data(self.data.astype(t)) | |
/usr/local/lib/python2.7/dist-packages/theano/sparse/tests/test_basic.py:2021: ComplexWarning: Casting complex values to real discards the imaginary part | |
expected = data.toarray().astype(o_dtype) | |
...../usr/lib/python2.7/dist-packages/scipy/sparse/compressed.py:486: SparseEfficiencyWarning: changing the sparsity structure of a csc_matrix is expensive. lil_matrix is more efficient. | |
SparseEfficiencyWarning) | |
/usr/lib/python2.7/dist-packages/scipy/sparse/compressed.py:486: SparseEfficiencyWarning: changing the sparsity structure of a csr_matrix is expensive. lil_matrix is more efficient. | |
SparseEfficiencyWarning) | |
.....................................................S.......S...........................................................................................................................................^[^[^[........................S.SS............................................................................................................................................................................................................................................................................................................^[................................................................................................................................................................................................................................................................................................................................................................................K..............................................................................................................................S..................................................................................................................................................................................K..KKKK.K.....................................................K......................../usr/local/lib/python2.7/dist-packages/theano/tensor/tests/test_naacl09.py:69: UserWarning: RandomStreams is deprecated and will be removed in release 0.7. Use shared_randomstreams.RandomStreams or MRG_RandomStreams instead. | |
self.random = T.RandomStreams() | |
.....................................................S.S..S................................................K......K..................................................................................................................................................................SS...SSSSS..........K............................................. | |
====================================================================== | |
ERROR: Run the tests for `uniform` with different settings for the | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/usr/lib/python2.7/dist-packages/nose/case.py", line 197, in runTest | |
self.test(*self.arg) | |
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/tests/test_rng_curand.py", line 59, in check_uniform_basic | |
f0 = theano.function([], u0, mode=mode_with_gpu) | |
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function.py", line 221, in function | |
profile=profile) | |
File "/usr/local/lib/python2.7/dist-packages/theano/compile/pfunc.py", line 506, in pfunc | |
on_unused_input=on_unused_input) | |
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 1339, in orig_function | |
defaults) | |
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 1167, in create | |
_fn, _i, _o = self.linker.make_thunk(input_storage=input_storage_lists) | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/link.py", line 383, in make_thunk | |
output_storage = output_storage)[:3] | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/vm.py", line 799, in make_all | |
for node in order] | |
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/__init__.py", line 244, in make_thunk | |
compute_map, no_recycling) | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/op.py", line 580, in make_thunk | |
output_storage=node_output_storage) | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 913, in make_thunk | |
keep_lock=keep_lock) | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 856, in __compile__ | |
keep_lock=keep_lock) | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 1279, in cthunk_factory | |
key=key, fn=self.compile_cmodule_by_step, keep_lock=keep_lock) | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cmodule.py", line 975, in module_from_key | |
module = compile_steps.next() | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 1201, in compile_cmodule_by_step | |
preargs=preargs) | |
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/nvcc_compiler.py", line 391, in compile_str | |
'for cmd', ' '.join(cmd)) | |
Exception: ('nvcc return status', 1, 'for cmd', 'nvcc -shared -g -O3 -arch=sm_30 -m64 -Xcompiler -Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=90c42b258281d604500ad1a5624b6654,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC -Xlinker -rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray -I/usr/local/cuda-5.5/include -I/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray -I/usr/lib/python2.7/dist-packages/numpy/core/include -I/usr/include/python2.7 -I/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda -o /home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/tmpsYHM22/7a7573cd1a887cbf5d8946c487571964.so mod.cu -L/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray -L/usr/local/cuda-5.5/lib -L/usr/local/cuda-5.5/lib -L/usr/local/cuda-5.5/lib64 -L/usr/lib -lpython2.7 -lcudart -lcublas -lcurand -lcuda_ndarray', '[*1 -> CURAND_Uniform{inplace=True, out_dtype=CudaNdarrayType(float32, matrix)}(<Generic>, TensorConstant{(2,) of 10}), *1::1]') | |
-------------------- >> begin captured stdout << --------------------- | |
['nvcc', '-shared', '-g', '-O3', '-arch=sm_30', '-m64', '-Xcompiler', '-Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=90c42b258281d604500ad1a5624b6654,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC', '-Xlinker', '-rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray', '-I/usr/local/cuda-5.5/include', '-I/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray', '-I/usr/lib/python2.7/dist-packages/numpy/core/include', '-I/usr/include/python2.7', '-I/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda', '-o', '/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/tmpsYHM22/7a7573cd1a887cbf5d8946c487571964.so', 'mod.cu', '-L/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib64', '-L/usr/lib', '-lpython2.7', '-lcudart', '-lcublas', '-lcurand', '-lcuda_ndarray'] | |
--------------------- >> end captured stdout << ---------------------- | |
====================================================================== | |
ERROR: Run the tests for `uniform` with different settings for the | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/usr/lib/python2.7/dist-packages/nose/case.py", line 197, in runTest | |
self.test(*self.arg) | |
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/tests/test_rng_curand.py", line 59, in check_uniform_basic | |
f0 = theano.function([], u0, mode=mode_with_gpu) | |
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function.py", line 221, in function | |
profile=profile) | |
File "/usr/local/lib/python2.7/dist-packages/theano/compile/pfunc.py", line 506, in pfunc | |
on_unused_input=on_unused_input) | |
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 1339, in orig_function | |
defaults) | |
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 1167, in create | |
_fn, _i, _o = self.linker.make_thunk(input_storage=input_storage_lists) | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/link.py", line 383, in make_thunk | |
output_storage = output_storage)[:3] | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/vm.py", line 799, in make_all | |
for node in order] | |
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/__init__.py", line 244, in make_thunk | |
compute_map, no_recycling) | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/op.py", line 580, in make_thunk | |
output_storage=node_output_storage) | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 913, in make_thunk | |
keep_lock=keep_lock) | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 856, in __compile__ | |
keep_lock=keep_lock) | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 1279, in cthunk_factory | |
key=key, fn=self.compile_cmodule_by_step, keep_lock=keep_lock) | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cmodule.py", line 975, in module_from_key | |
module = compile_steps.next() | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 1201, in compile_cmodule_by_step | |
preargs=preargs) | |
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/nvcc_compiler.py", line 391, in compile_str | |
'for cmd', ' '.join(cmd)) | |
Exception: ('nvcc return status', 1, 'for cmd', 'nvcc -shared -g -O3 -arch=sm_30 -m64 -Xcompiler -Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=90c42b258281d604500ad1a5624b6654,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC -Xlinker -rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray -I/usr/local/cuda-5.5/include -I/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray -I/usr/lib/python2.7/dist-packages/numpy/core/include -I/usr/include/python2.7 -I/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda -o /home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/tmp9zHeYu/7a7573cd1a887cbf5d8946c487571964.so mod.cu -L/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray -L/usr/local/cuda-5.5/lib -L/usr/local/cuda-5.5/lib -L/usr/local/cuda-5.5/lib64 -L/usr/lib -lpython2.7 -lcudart -lcublas -lcurand -lcuda_ndarray', '[*1 -> CURAND_Uniform{inplace=True, out_dtype=CudaNdarrayType(float32, matrix)}(<Generic>, TensorConstant{(2,) of 10}), *1::1]') | |
-------------------- >> begin captured stdout << --------------------- | |
['nvcc', '-shared', '-g', '-O3', '-arch=sm_30', '-m64', '-Xcompiler', '-Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=90c42b258281d604500ad1a5624b6654,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC', '-Xlinker', '-rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray', '-I/usr/local/cuda-5.5/include', '-I/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray', '-I/usr/lib/python2.7/dist-packages/numpy/core/include', '-I/usr/include/python2.7', '-I/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda', '-o', '/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/tmp9zHeYu/7a7573cd1a887cbf5d8946c487571964.so', 'mod.cu', '-L/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib64', '-L/usr/lib', '-lpython2.7', '-lcudart', '-lcublas', '-lcurand', '-lcuda_ndarray'] | |
--------------------- >> end captured stdout << ---------------------- | |
====================================================================== | |
ERROR: Run the tests for `uniform` with different settings for the | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/usr/lib/python2.7/dist-packages/nose/case.py", line 197, in runTest | |
self.test(*self.arg) | |
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/tests/test_rng_curand.py", line 59, in check_uniform_basic | |
f0 = theano.function([], u0, mode=mode_with_gpu) | |
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function.py", line 221, in function | |
profile=profile) | |
File "/usr/local/lib/python2.7/dist-packages/theano/compile/pfunc.py", line 506, in pfunc | |
on_unused_input=on_unused_input) | |
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 1339, in orig_function | |
defaults) | |
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 1167, in create | |
_fn, _i, _o = self.linker.make_thunk(input_storage=input_storage_lists) | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/link.py", line 383, in make_thunk | |
output_storage = output_storage)[:3] | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/vm.py", line 799, in make_all | |
for node in order] | |
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/__init__.py", line 244, in make_thunk | |
compute_map, no_recycling) | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/op.py", line 580, in make_thunk | |
output_storage=node_output_storage) | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 913, in make_thunk | |
keep_lock=keep_lock) | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 856, in __compile__ | |
keep_lock=keep_lock) | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 1279, in cthunk_factory | |
key=key, fn=self.compile_cmodule_by_step, keep_lock=keep_lock) | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cmodule.py", line 975, in module_from_key | |
module = compile_steps.next() | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 1201, in compile_cmodule_by_step | |
preargs=preargs) | |
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/nvcc_compiler.py", line 391, in compile_str | |
'for cmd', ' '.join(cmd)) | |
Exception: ('nvcc return status', 1, 'for cmd', 'nvcc -shared -g -O3 -arch=sm_30 -m64 -Xcompiler -Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=90c42b258281d604500ad1a5624b6654,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC -Xlinker -rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray -I/usr/local/cuda-5.5/include -I/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray -I/usr/lib/python2.7/dist-packages/numpy/core/include -I/usr/include/python2.7 -I/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda -o /home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/tmp4H_5Ej/7a7573cd1a887cbf5d8946c487571964.so mod.cu -L/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray -L/usr/local/cuda-5.5/lib -L/usr/local/cuda-5.5/lib -L/usr/local/cuda-5.5/lib64 -L/usr/lib -lpython2.7 -lcudart -lcublas -lcurand -lcuda_ndarray', '[*1 -> CURAND_Uniform{inplace=True, out_dtype=CudaNdarrayType(float32, matrix)}(<Generic>, TensorConstant{(2,) of 10}), *1::1]') | |
-------------------- >> begin captured stdout << --------------------- | |
['nvcc', '-shared', '-g', '-O3', '-arch=sm_30', '-m64', '-Xcompiler', '-Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=90c42b258281d604500ad1a5624b6654,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC', '-Xlinker', '-rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray', '-I/usr/local/cuda-5.5/include', '-I/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray', '-I/usr/lib/python2.7/dist-packages/numpy/core/include', '-I/usr/include/python2.7', '-I/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda', '-o', '/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/tmp4H_5Ej/7a7573cd1a887cbf5d8946c487571964.so', 'mod.cu', '-L/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib64', '-L/usr/lib', '-lpython2.7', '-lcudart', '-lcublas', '-lcurand', '-lcuda_ndarray'] | |
--------------------- >> end captured stdout << ---------------------- | |
====================================================================== | |
ERROR: Run the tests for `normal` with different settings for the | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/usr/lib/python2.7/dist-packages/nose/case.py", line 197, in runTest | |
self.test(*self.arg) | |
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/tests/test_rng_curand.py", line 127, in check_normal_basic | |
f0 = theano.function([], u0, mode=mode_with_gpu) | |
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function.py", line 221, in function | |
profile=profile) | |
File "/usr/local/lib/python2.7/dist-packages/theano/compile/pfunc.py", line 506, in pfunc | |
on_unused_input=on_unused_input) | |
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 1339, in orig_function | |
defaults) | |
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 1167, in create | |
_fn, _i, _o = self.linker.make_thunk(input_storage=input_storage_lists) | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/link.py", line 383, in make_thunk | |
output_storage = output_storage)[:3] | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/vm.py", line 799, in make_all | |
for node in order] | |
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/__init__.py", line 244, in make_thunk | |
compute_map, no_recycling) | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/op.py", line 580, in make_thunk | |
output_storage=node_output_storage) | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 913, in make_thunk | |
keep_lock=keep_lock) | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 856, in __compile__ | |
keep_lock=keep_lock) | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 1279, in cthunk_factory | |
key=key, fn=self.compile_cmodule_by_step, keep_lock=keep_lock) | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cmodule.py", line 975, in module_from_key | |
module = compile_steps.next() | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 1201, in compile_cmodule_by_step | |
preargs=preargs) | |
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/nvcc_compiler.py", line 391, in compile_str | |
'for cmd', ' '.join(cmd)) | |
Exception: ('nvcc return status', 1, 'for cmd', 'nvcc -shared -g -O3 -arch=sm_30 -m64 -Xcompiler -Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=90c42b258281d604500ad1a5624b6654,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC -Xlinker -rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray -I/usr/local/cuda-5.5/include -I/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray -I/usr/lib/python2.7/dist-packages/numpy/core/include -I/usr/include/python2.7 -I/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda -o /home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/tmpCuTurr/889e175e75159a3e61d065caf0802126.so mod.cu -L/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray -L/usr/local/cuda-5.5/lib -L/usr/local/cuda-5.5/lib -L/usr/local/cuda-5.5/lib64 -L/usr/lib -lpython2.7 -lcudart -lcublas -lcurand -lcuda_ndarray', '[*1 -> CURAND_Normal{inplace=True, out_dtype=CudaNdarrayType(float32, matrix)}(<Generic>, TensorConstant{(2,) of 10}), *1::1]') | |
-------------------- >> begin captured stdout << --------------------- | |
['nvcc', '-shared', '-g', '-O3', '-arch=sm_30', '-m64', '-Xcompiler', '-Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=90c42b258281d604500ad1a5624b6654,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC', '-Xlinker', '-rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray', '-I/usr/local/cuda-5.5/include', '-I/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray', '-I/usr/lib/python2.7/dist-packages/numpy/core/include', '-I/usr/include/python2.7', '-I/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda', '-o', '/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/tmpCuTurr/889e175e75159a3e61d065caf0802126.so', 'mod.cu', '-L/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib64', '-L/usr/lib', '-lpython2.7', '-lcudart', '-lcublas', '-lcurand', '-lcuda_ndarray'] | |
--------------------- >> end captured stdout << ---------------------- | |
====================================================================== | |
ERROR: Run the tests for `normal` with different settings for the | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/usr/lib/python2.7/dist-packages/nose/case.py", line 197, in runTest | |
self.test(*self.arg) | |
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/tests/test_rng_curand.py", line 127, in check_normal_basic | |
f0 = theano.function([], u0, mode=mode_with_gpu) | |
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function.py", line 221, in function | |
profile=profile) | |
File "/usr/local/lib/python2.7/dist-packages/theano/compile/pfunc.py", line 506, in pfunc | |
on_unused_input=on_unused_input) | |
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 1339, in orig_function | |
defaults) | |
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 1167, in create | |
_fn, _i, _o = self.linker.make_thunk(input_storage=input_storage_lists) | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/link.py", line 383, in make_thunk | |
output_storage = output_storage)[:3] | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/vm.py", line 799, in make_all | |
for node in order] | |
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/__init__.py", line 244, in make_thunk | |
compute_map, no_recycling) | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/op.py", line 580, in make_thunk | |
output_storage=node_output_storage) | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 913, in make_thunk | |
keep_lock=keep_lock) | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 856, in __compile__ | |
keep_lock=keep_lock) | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 1279, in cthunk_factory | |
key=key, fn=self.compile_cmodule_by_step, keep_lock=keep_lock) | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cmodule.py", line 975, in module_from_key | |
module = compile_steps.next() | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 1201, in compile_cmodule_by_step | |
preargs=preargs) | |
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/nvcc_compiler.py", line 391, in compile_str | |
'for cmd', ' '.join(cmd)) | |
Exception: ('nvcc return status', 1, 'for cmd', 'nvcc -shared -g -O3 -arch=sm_30 -m64 -Xcompiler -Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=90c42b258281d604500ad1a5624b6654,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC -Xlinker -rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray -I/usr/local/cuda-5.5/include -I/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray -I/usr/lib/python2.7/dist-packages/numpy/core/include -I/usr/include/python2.7 -I/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda -o /home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/tmpjF7IY9/889e175e75159a3e61d065caf0802126.so mod.cu -L/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray -L/usr/local/cuda-5.5/lib -L/usr/local/cuda-5.5/lib -L/usr/local/cuda-5.5/lib64 -L/usr/lib -lpython2.7 -lcudart -lcublas -lcurand -lcuda_ndarray', '[*1 -> CURAND_Normal{inplace=True, out_dtype=CudaNdarrayType(float32, matrix)}(<Generic>, TensorConstant{(2,) of 10}), *1::1]') | |
-------------------- >> begin captured stdout << --------------------- | |
['nvcc', '-shared', '-g', '-O3', '-arch=sm_30', '-m64', '-Xcompiler', '-Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=90c42b258281d604500ad1a5624b6654,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC', '-Xlinker', '-rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray', '-I/usr/local/cuda-5.5/include', '-I/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray', '-I/usr/lib/python2.7/dist-packages/numpy/core/include', '-I/usr/include/python2.7', '-I/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda', '-o', '/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/tmpjF7IY9/889e175e75159a3e61d065caf0802126.so', 'mod.cu', '-L/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib64', '-L/usr/lib', '-lpython2.7', '-lcudart', '-lcublas', '-lcurand', '-lcuda_ndarray'] | |
--------------------- >> end captured stdout << ---------------------- | |
====================================================================== | |
ERROR: Run the tests for `normal` with different settings for the | |
---------------------------------------------------------------------- | |
Traceback (most recent call last): | |
File "/usr/lib/python2.7/dist-packages/nose/case.py", line 197, in runTest | |
self.test(*self.arg) | |
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/tests/test_rng_curand.py", line 127, in check_normal_basic | |
f0 = theano.function([], u0, mode=mode_with_gpu) | |
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function.py", line 221, in function | |
profile=profile) | |
File "/usr/local/lib/python2.7/dist-packages/theano/compile/pfunc.py", line 506, in pfunc | |
on_unused_input=on_unused_input) | |
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 1339, in orig_function | |
defaults) | |
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 1167, in create | |
_fn, _i, _o = self.linker.make_thunk(input_storage=input_storage_lists) | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/link.py", line 383, in make_thunk | |
output_storage = output_storage)[:3] | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/vm.py", line 799, in make_all | |
for node in order] | |
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/__init__.py", line 244, in make_thunk | |
compute_map, no_recycling) | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/op.py", line 580, in make_thunk | |
output_storage=node_output_storage) | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 913, in make_thunk | |
keep_lock=keep_lock) | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 856, in __compile__ | |
keep_lock=keep_lock) | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 1279, in cthunk_factory | |
key=key, fn=self.compile_cmodule_by_step, keep_lock=keep_lock) | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cmodule.py", line 975, in module_from_key | |
module = compile_steps.next() | |
File "/usr/local/lib/python2.7/dist-packages/theano/gof/cc.py", line 1201, in compile_cmodule_by_step | |
preargs=preargs) | |
File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/nvcc_compiler.py", line 391, in compile_str | |
'for cmd', ' '.join(cmd)) | |
Exception: ('nvcc return status', 1, 'for cmd', 'nvcc -shared -g -O3 -arch=sm_30 -m64 -Xcompiler -Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=90c42b258281d604500ad1a5624b6654,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC -Xlinker -rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray -I/usr/local/cuda-5.5/include -I/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray -I/usr/lib/python2.7/dist-packages/numpy/core/include -I/usr/include/python2.7 -I/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda -o /home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/tmp6Uc8gr/889e175e75159a3e61d065caf0802126.so mod.cu -L/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray -L/usr/local/cuda-5.5/lib -L/usr/local/cuda-5.5/lib -L/usr/local/cuda-5.5/lib64 -L/usr/lib -lpython2.7 -lcudart -lcublas -lcurand -lcuda_ndarray', '[*1 -> CURAND_Normal{inplace=True, out_dtype=CudaNdarrayType(float32, matrix)}(<Generic>, TensorConstant{(2,) of 10}), *1::1]') | |
-------------------- >> begin captured stdout << --------------------- | |
['nvcc', '-shared', '-g', '-O3', '-arch=sm_30', '-m64', '-Xcompiler', '-Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=90c42b258281d604500ad1a5624b6654,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC', '-Xlinker', '-rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray', '-I/usr/local/cuda-5.5/include', '-I/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray', '-I/usr/lib/python2.7/dist-packages/numpy/core/include', '-I/usr/include/python2.7', '-I/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda', '-o', '/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/tmp6Uc8gr/889e175e75159a3e61d065caf0802126.so', 'mod.cu', '-L/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3/cuda_ndarray', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib64', '-L/usr/lib', '-lpython2.7', '-lcudart', '-lcublas', '-lcurand', '-lcuda_ndarray'] | |
--------------------- >> end captured stdout << ---------------------- | |
---------------------------------------------------------------------- | |
Ran 2353 tests in 6431.965s | |
FAILED (KNOWNFAIL=14, SKIP=23, errors=6) | |
<nose.result.TextTestResult run=2353 errors=6 failures=0> | |
>>> | |
.Xauthority .bashrc .hgrc .nv/ .selected_editor Theano.dev/ playing_with_theano/ | |
.bash_history .cache/ .local/ .pip/ .ssh/ cuda5.5/ temp/ | |
.bash_logout .config/ .matplotlib/ .profile .theano/ ocropus/ | |
>>> theano.__version__ | |
'0.6.0rc3' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
>>> theano.test() | |
Theano version 0.6.0rc3.dev-6ecd6c6cfb8c06d8fd0bb73cf6924e1171e4d069 | |
theano is installed in /home/ludwig/Theano/theano | |
NumPy version 1.6.1 | |
NumPy is installed in /usr/lib/python2.7/dist-packages/numpy | |
Python version 2.7.3 (default, Aug 1 2012, 05:14:39) [GCC 4.6.3] | |
nose version 1.1.2 | |
/home/ludwig/Theano/theano/misc/pycuda_init.py:34: UserWarning: PyCUDA import failed in theano.misc.pycuda_init | |
warnings.warn("PyCUDA import failed in theano.misc.pycuda_init") | |
Using gpu device 0: GeForce GTX 670 | |
.........................................E.............../home/ludwig/Theano/theano/compile/tests/test_inplace_opt_for_value.py:170: UserWarning: theano modules are deprecated and will be removed in release 0.7 | |
super(ExampleRNN, self).__init__() | |
..............................*** NaN detected *** | |
Elemwise{Composite{[mul(log(i0), i0)]}} [@A] '' | |
|x [@B] | |
Inputs : [array(0.0)] | |
Outputs: [array(nan)] | |
.*** NaN detected *** | |
Elemwise{Mul{output_types_preference=transfer_type{0}}}[(0, 0)] [@A] '' | |
|Elemwise{log,no_inplace} [@B] '' | |
| |x [@C] | |
|x [@C] | |
Inputs : [array(nan), array(0.0)] | |
Outputs: [array(nan)] | |
.*** NaN detected *** | |
Elemwise{mul,no_inplace} [@A] '' | |
|Elemwise{log,no_inplace} [@B] '' | |
| |CGer{destructive} [@C] '' | |
| |Alloc [@D] '' | |
| | |TensorConstant{0.0} [@E] | |
| | |Shape_i{0} [@F] '' | |
| | | |x [@G] | |
| | |Shape_i{0} [@F] '' | |
| |TensorConstant{1.0} [@H] | |
| |x [@G] | |
| |x [@G] | |
|CGer{destructive} [@C] '' | |
Inputs : [array([[-inf, -inf], | |
[-inf, -inf]]), array([[ 0., 0.], | |
[ 0., 0.]])] | |
Outputs: [array([[ nan, nan], | |
[ nan, nan]])] | |
...........................................................Yay, TEST PASSED | |
.0.591525729001 | |
0.591525729001 | |
0.591525729001 | |
0.591525729001 | |
.......WARNING (theano.gof.cmodule): Cache leak due to unpickle-able key data set([(((1,), (10, '1.6.1'), (10, '1.6.1')), ('CLinker.cmodule_key', ('-D NPY_ARRAY_ALIGNED=NPY_ALIGNED', '-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS', '-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY', '-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS', '-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL', '-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE', '-O3', '-Wno-unused-label', '-Wno-unused-variable', '-Wno-write-strings', '-fPIC', '-fno-math-errno', '-m64'), (), (), 'NPY_ABI_VERSION=0x1000009', u'c_compiler_str=g++ 4.6', 'md5:2efdfc3f30b8f0efb4ed894c4c08f52f', (<theano.gof.tests.test_compute_test_value.IncOneC object at 0xacd29d0>, ((Scalar(int32), ((-1, 0), False)),), (1, (False,)))))]) | |
...................................................case 1 | |
... passed | |
case 2 | |
b | |
... passed | |
case 1 | |
... passed | |
case 2 | |
b | |
... passed | |
case 1 | |
... passed | |
case 2 | |
b | |
... passed | |
case 1 | |
... passed | |
case 2 | |
b | |
... passed | |
.20.5 | |
... passed | |
.........................[Op3(Op4(x, y))] | |
[Op2(x, y)] | |
.[Op1(Op1(Op3(x, y)))] | |
.before [Op3(Op4(x, y))] | |
after [Op1(x, y)] | |
........................................c|py takes 0.001862 s/Kop | |
vmLinker takes 0.001792 s/Kop | |
vmLinker_nogc takes 0.001305 s/Kop | |
vmLinker_CLOOP takes 0.000271 s/Kop | |
numpy takes 0.000000 s/Kop | |
.vmLinker takes 0.072410 s/Kop | |
vmLinker_nogc takes 0.062354 s/Kop | |
vmLinker_C takes 0.017431 s/Kop | |
.................................................................................................................................................................................................................................................(3, 10, 11, 11) (2, 10, 10, 10) (10, 11, 11) (10, 10) | |
HostFromGpu [@A] '' 4 | |
|GpuConv{full, (1, 1), None, (10, 10), True, (10, 11, 11), (10, 10)} [@B] '' 3 | |
|GpuFromHost [@C] '' 1 | |
| |<TensorType(float32, 4D)> [@D] | |
|GpuDimShuffle{1,0,2,3} [@E] '' 2 | |
|GpuFromHost [@F] '' 0 | |
|<TensorType(float32, 4D)> [@G] | |
(3, 10, 11, 11) (2, 10, 10, 10) (10, 22, 22) (10, 10) | |
HostFromGpu [@A] '' 8 | |
|GpuConv{full, (1, 1), None, (10, 10), True, (10, 11, 11), (10, 10)} [@B] '' 7 | |
|GpuIncSubtensor{InplaceSet;::, ::, ::2, ::2} [@C] '' 6 | |
| |GpuAlloc{memset_0=True} [@D] '' 5 | |
| | |CudaNdarrayConstant{[[[[ 0.]]]]} [@E] | |
| | |Subtensor{0} [@F] '' 4 | |
| | | |GpuShape [@G] '' 3 | |
| | | |GpuFromHost [@H] '' 1 | |
| | | |<TensorType(float32, 4D)> [@I] | |
| | |TensorConstant{10} [@J] | |
| | |TensorConstant{22} [@K] | |
| | |TensorConstant{22} [@K] | |
| |GpuFromHost [@H] '' 1 | |
|GpuDimShuffle{1,0,2,3} [@L] '' 2 | |
|GpuFromHost [@M] '' 0 | |
|<TensorType(float32, 4D)> [@N] | |
(3, 10, 11, 11) (2, 10, 10, 10) (10, 33, 33) (10, 10) | |
HostFromGpu [@A] '' 8 | |
|GpuConv{full, (1, 1), None, (10, 10), True, (10, 11, 11), (10, 10)} [@B] '' 7 | |
|GpuIncSubtensor{InplaceSet;::, ::, ::3, ::3} [@C] '' 6 | |
| |GpuAlloc{memset_0=True} [@D] '' 5 | |
| | |CudaNdarrayConstant{[[[[ 0.]]]]} [@E] | |
| | |Subtensor{0} [@F] '' 4 | |
| | | |GpuShape [@G] '' 3 | |
| | | |GpuFromHost [@H] '' 1 | |
| | | |<TensorType(float32, 4D)> [@I] | |
| | |TensorConstant{10} [@J] | |
| | |TensorConstant{33} [@K] | |
| | |TensorConstant{33} [@K] | |
| |GpuFromHost [@H] '' 1 | |
|GpuDimShuffle{1,0,2,3} [@L] '' 2 | |
|GpuFromHost [@M] '' 0 | |
|<TensorType(float32, 4D)> [@N] | |
./usr/lib/python2.7/dist-packages/scipy/signal/signaltools.py:408: ComplexWarning: Casting complex values to real discards the imaginary part | |
return sigtools._convolve2d(in1,in2,1,val,bval,fillvalue) | |
Executed 284 different shapes | |
.Executed 1460 different shapes | |
.Executed 143 different shapes | |
.Executed 368 different shapes | |
.Executed 1098 different shapes | |
.Executed 736 different shapes | |
.Executed 1107 different shapes | |
.Executed 2936 different shapes | |
.Executed 1374 different shapes | |
Executed 458 different shapes | |
......................................float32 | |
Before shared variable ('n malloc on the gpu', 1) | |
Shared took 0 kB | |
Before compilation ('n malloc on the gpu', 2) | |
After function compilation 1 ('n malloc on the gpu', 3) | |
After function compilation 2 ('n malloc on the gpu', 6) | |
After function evaluation 1 ('n malloc on the gpu', 6) | |
After function evaluation 2 ('n malloc on the gpu', 6) | |
After function evaluation 1 ('n malloc on the gpu', 6) | |
After function evaluation 2 ('n malloc on the gpu', 6) | |
After function evaluation 1 ('n malloc on the gpu', 6) | |
After function evaluation 2 ('n malloc on the gpu', 6) | |
After deleting function 2 ('n malloc on the gpu', 2) | |
After deleting shared variable and ref to it ('n malloc on the gpu', 1) | |
float64 | |
Before shared variable ('n malloc on the gpu', 1) | |
Shared took 0 kB | |
Before compilation ('n malloc on the gpu', 2) | |
After function compilation 1 ('n malloc on the gpu', 2) | |
After function compilation 2 ('n malloc on the gpu', 2) | |
After function evaluation 1 ('n malloc on the gpu', 2) | |
After function evaluation 2 ('n malloc on the gpu', 2) | |
After function evaluation 1 ('n malloc on the gpu', 2) | |
After function evaluation 2 ('n malloc on the gpu', 2) | |
After function evaluation 1 ('n malloc on the gpu', 2) | |
After function evaluation 2 ('n malloc on the gpu', 2) | |
After deleting function 2 ('n malloc on the gpu', 2) | |
After deleting shared variable and ref to it ('n malloc on the gpu', 1) | |
.float32 | |
Before shared variable ('n malloc on the gpu', 1) | |
Shared took 0 kB | |
Before compilation ('n malloc on the gpu', 2) | |
After function compilation 1 ('n malloc on the gpu', 4) | |
After function evaluation branch true ('n malloc on the gpu', 4) | |
After function evaluation branch false ('n malloc on the gpu', 4) | |
After function evaluation branch true ('n malloc on the gpu', 4) | |
After function evaluation branch false ('n malloc on the gpu', 4) | |
After function evaluation branch true ('n malloc on the gpu', 4) | |
After function evaluation branch false ('n malloc on the gpu', 4) | |
After deleting function 1 ('n malloc on the gpu', 2) | |
After deleting shared variable and ref to it ('n malloc on the gpu', 1) | |
float64 | |
Before shared variable ('n malloc on the gpu', 1) | |
Shared took 0 kB | |
Before compilation ('n malloc on the gpu', 2) | |
After function compilation 1 ('n malloc on the gpu', 2) | |
After function evaluation branch true ('n malloc on the gpu', 2) | |
After function evaluation branch false ('n malloc on the gpu', 2) | |
After function evaluation branch true ('n malloc on the gpu', 2) | |
After function evaluation branch false ('n malloc on the gpu', 2) | |
After function evaluation branch true ('n malloc on the gpu', 2) | |
After function evaluation branch false ('n malloc on the gpu', 2) | |
After deleting function 1 ('n malloc on the gpu', 2) | |
After deleting shared variable and ref to it ('n malloc on the gpu', 1) | |
...................................................... __str__ = [[ 0.41844133 0.42187652 0.41898602 0.91393226 0.2673761 ] | |
[ 0.43664843 0.95744944 0.87686652 0.56096673 0.20658322] | |
[ 0.18596491 0.4768765 0.82073575 0.22224128 0.80181545] | |
[ 0.82680261 0.51986992 0.56643027 0.00883366 0.15047923] | |
[ 0.39132819 0.62885153 0.2296776 0.30525967 0.92171496]] | |
...0 GpuFromHost(<TensorType(float32, matrix)>) | |
1 GpuFromHost(<TensorType(float32, matrix)>) | |
2 GpuElemwise{Composite{[add(add(i0, i1), i2)]}}[(0, 1)](a, GpuFromHost.0, GpuFromHost.0) | |
3 HostFromGpu(GpuElemwise{Composite{[add(add(i0, i1), i2)]}}[(0, 1)].0) | |
.Elemwise{Cast{float32}}(<TensorType(float64, matrix)>) | |
..1 #include <Python.h> | |
2 #include <iostream> | |
3 #include <numpy/arrayobject.h> | |
4 #include <math.h> | |
5 #include "curand.h" | |
6 #include <numpy/arrayscalars.h> | |
7 #include "cuda_ndarray.cuh" | |
8 ////////////////////// | |
9 //// Support Code | |
10 ////////////////////// | |
11 | |
12 | |
13 #if PY_MAJOR_VERSION >= 3 | |
14 void free_generator(PyObject *_gen) | |
15 { | |
16 curandGenerator_t * gen = (curandGenerator_t*)NpyCapsule_AsVoidPtr(_gen); | |
17 #else | |
18 void free_generator(void *_gen) | |
19 { | |
20 curandGenerator_t * gen = (curandGenerator_t*)_gen; | |
21 #endif | |
22 | |
23 curandStatus_t err = curandDestroyGenerator(*gen); | |
24 if (err != CURAND_STATUS_SUCCESS) | |
25 { | |
26 fprintf(stderr, "Failure (%i) in destroying CURAND generator.\n", | |
27 (int)err); | |
28 } | |
29 free(gen); | |
30 } | |
31 | |
32 | |
33 struct __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4 { | |
34 PyObject* __ERROR; | |
35 | |
36 PyObject* storage_V3; | |
37 PyObject* storage_V5; | |
38 PyObject* storage_V7; | |
39 PyObject* storage_V1; | |
40 | |
41 | |
42 __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4() {} | |
43 ~__struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4(void) { | |
44 cleanup(); | |
45 } | |
46 | |
47 int init(PyObject* __ERROR, PyObject* storage_V3, PyObject* storage_V5, PyObject* storage_V7, PyObject* storage_V1) { | |
48 Py_XINCREF(storage_V3); | |
49 Py_XINCREF(storage_V5); | |
50 Py_XINCREF(storage_V7); | |
51 Py_XINCREF(storage_V1); | |
52 this->storage_V3 = storage_V3; | |
53 this->storage_V5 = storage_V5; | |
54 this->storage_V7 = storage_V7; | |
55 this->storage_V1 = storage_V1; | |
56 int __failure = 0; | |
57 | |
58 { | |
59 | |
60 { | |
61 | |
62 { | |
63 | |
64 { | |
65 | |
66 this->__ERROR = __ERROR; | |
67 return 0; | |
68 __label_7: | |
69 | |
70 double __DUMMY_7; | |
71 | |
72 } | |
73 __label_5: | |
74 | |
75 double __DUMMY_5; | |
76 | |
77 } | |
78 __label_3: | |
79 | |
80 double __DUMMY_3; | |
81 | |
82 } | |
83 __label_1: | |
84 | |
85 double __DUMMY_1; | |
86 | |
87 } | |
88 | |
89 Py_XDECREF(this->storage_V3); | |
90 Py_XDECREF(this->storage_V5); | |
91 Py_XDECREF(this->storage_V7); | |
92 Py_XDECREF(this->storage_V1); | |
93 | |
94 if (__failure) { | |
95 // When there is a failure, this code puts the exception | |
96 // in __ERROR. | |
97 PyObject* err_type = NULL; | |
98 PyObject* err_msg = NULL; | |
99 PyObject* err_traceback = NULL; | |
100 PyErr_Fetch(&err_type, &err_msg, &err_traceback); | |
101 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);} | |
102 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);} | |
103 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);} | |
104 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0); | |
105 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1); | |
106 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2); | |
107 PyList_SET_ITEM(__ERROR, 0, err_type); | |
108 PyList_SET_ITEM(__ERROR, 1, err_msg); | |
109 PyList_SET_ITEM(__ERROR, 2, err_traceback); | |
110 {Py_XDECREF(old_err_type);} | |
111 {Py_XDECREF(old_err_msg);} | |
112 {Py_XDECREF(old_err_traceback);} | |
113 } | |
114 // The failure code is returned to index what code block failed. | |
115 return __failure; | |
116 | |
117 } | |
118 void cleanup(void) { | |
119 __label_1: | |
120 | |
121 double __DUMMY_1; | |
122 __label_3: | |
123 | |
124 double __DUMMY_3; | |
125 __label_5: | |
126 | |
127 double __DUMMY_5; | |
128 __label_7: | |
129 | |
130 double __DUMMY_7; | |
131 | |
132 Py_XDECREF(this->storage_V3); | |
133 Py_XDECREF(this->storage_V5); | |
134 Py_XDECREF(this->storage_V7); | |
135 Py_XDECREF(this->storage_V1); | |
136 } | |
137 int run(void) { | |
138 int __failure = 0; | |
139 | |
140 PyObject* py_V1; | |
141 CudaNdarray * V1; | |
142 PyObject* py_V3; | |
143 | |
144 PyObject* V3; | |
145 | |
146 PyObject* py_V5; | |
147 | |
148 PyArrayObject* V5; | |
149 int type_num_V5; | |
150 typedef npy_int32 dtype_V5; | |
151 | |
152 PyObject* py_V7; | |
153 | |
154 PyObject* V7; | |
155 | |
156 { | |
157 | |
158 py_V1 = PyList_GET_ITEM(storage_V1, 0); | |
159 {Py_XINCREF(py_V1);} | |
160 | |
161 if (py_V1 == Py_None) | |
162 { | |
163 V1 = NULL; | |
164 } | |
165 else | |
166 { | |
167 | |
168 assert(py_V1->ob_refcnt >= 2); // There should be at least one ref from the container object, | |
169 // and one ref from the local scope. | |
170 | |
171 if (CudaNdarray_Check(py_V1)) | |
172 { | |
173 //fprintf(stderr, "c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
174 V1 = (CudaNdarray*)py_V1; | |
175 //std::cerr << "c_extract " << V1 << '\n'; | |
176 if (V1->nd != 2) | |
177 { | |
178 PyErr_Format(PyExc_RuntimeError, | |
179 "c_extract: Some CudaNdarray has rank %i, it was supposed to have rank 2", | |
180 V1->nd); | |
181 V1 = NULL; | |
182 { | |
183 __failure = 2; | |
184 if (!PyErr_Occurred()) { | |
185 PyErr_SetString(PyExc_RuntimeError, | |
186 "Unexpected error in an Op's C code. " | |
187 "No Python exception was set."); | |
188 } | |
189 goto __label_2;}; | |
190 } | |
191 //std::cerr << "c_extract " << V1 << " nd check passed\n"; | |
192 | |
193 | |
194 assert(V1); | |
195 Py_INCREF(py_V1); | |
196 } | |
197 else if (py_V1 == Py_None) | |
198 { | |
199 PyErr_SetString(PyExc_TypeError, | |
200 "expected a CudaNdarray, not None"); | |
201 V1 = NULL; | |
202 { | |
203 __failure = 2; | |
204 if (!PyErr_Occurred()) { | |
205 PyErr_SetString(PyExc_RuntimeError, | |
206 "Unexpected error in an Op's C code. " | |
207 "No Python exception was set."); | |
208 } | |
209 goto __label_2;}; | |
210 } | |
211 else | |
212 { | |
213 //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
214 PyErr_SetString(PyExc_TypeError, "Argument not a CudaNdarray"); | |
215 V1 = NULL; | |
216 { | |
217 __failure = 2; | |
218 if (!PyErr_Occurred()) { | |
219 PyErr_SetString(PyExc_RuntimeError, | |
220 "Unexpected error in an Op's C code. " | |
221 "No Python exception was set."); | |
222 } | |
223 goto __label_2;}; | |
224 } | |
225 //std::cerr << "c_extract done " << V1 << '\n'; | |
226 | |
227 | |
228 } | |
229 | |
230 { | |
231 | |
232 py_V3 = PyList_GET_ITEM(storage_V3, 0); | |
233 {Py_XINCREF(py_V3);} | |
234 | |
235 Py_INCREF(py_V3); | |
236 V3 = py_V3; | |
237 | |
238 { | |
239 | |
240 py_V5 = PyList_GET_ITEM(storage_V5, 0); | |
241 {Py_XINCREF(py_V5);} | |
242 | |
243 V5 = NULL; | |
244 if (py_V5 == Py_None) { | |
245 // We can either fail here or set V5 to NULL and rely on Ops | |
246 // using tensors to handle the NULL case, but if they fail to do so | |
247 // they'll end up with nasty segfaults, so this is public service. | |
248 PyErr_SetString(PyExc_ValueError, "expected an ndarray, not None"); | |
249 { | |
250 __failure = 6; | |
251 if (!PyErr_Occurred()) { | |
252 PyErr_SetString(PyExc_RuntimeError, | |
253 "Unexpected error in an Op's C code. " | |
254 "No Python exception was set."); | |
255 } | |
256 goto __label_6;} | |
257 } | |
258 if (!PyArray_Check(py_V5)) { | |
259 PyErr_SetString(PyExc_ValueError, "expected an ndarray"); | |
260 { | |
261 __failure = 6; | |
262 if (!PyErr_Occurred()) { | |
263 PyErr_SetString(PyExc_RuntimeError, | |
264 "Unexpected error in an Op's C code. " | |
265 "No Python exception was set."); | |
266 } | |
267 goto __label_6;} | |
268 } | |
269 // We expect NPY_INT32 | |
270 type_num_V5 = ((PyArrayObject*)py_V5)->descr->type_num; | |
271 if (!PyArray_ISALIGNED(py_V5)) { | |
272 PyErr_Format(PyExc_NotImplementedError, | |
273 "expected an aligned array of type %ld " | |
274 "(NPY_INT32), got non-aligned array of type %ld" | |
275 " with %ld dimensions, with 3 last dims " | |
276 "%ld, %ld, %ld" | |
277 " and 3 last strides %ld %ld, %ld.", | |
278 (long int) NPY_INT32, | |
279 (long int) type_num_V5, | |
280 (long int) PyArray_NDIM(py_V5), | |
281 (long int) PyArray_NDIM(py_V5) >= 3 ? | |
282 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-3] : -1, | |
283 (long int) PyArray_NDIM(py_V5) >= 2 ? | |
284 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-2] : -1, | |
285 (long int) PyArray_NDIM(py_V5) >= 1 ? | |
286 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-1] : -1, | |
287 (long int) PyArray_NDIM(py_V5) >= 3 ? | |
288 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-3] : -1, | |
289 (long int) PyArray_NDIM(py_V5) >= 2 ? | |
290 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-2] : -1, | |
291 (long int) PyArray_NDIM(py_V5) >= 1 ? | |
292 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-1] : -1 | |
293 ); | |
294 { | |
295 __failure = 6; | |
296 if (!PyErr_Occurred()) { | |
297 PyErr_SetString(PyExc_RuntimeError, | |
298 "Unexpected error in an Op's C code. " | |
299 "No Python exception was set."); | |
300 } | |
301 goto __label_6;} | |
302 } | |
303 // This is a TypeError to be consistent with DEBUG_MODE | |
304 // Note: DEBUG_MODE also tells the name of the container | |
305 if (type_num_V5 != NPY_INT32) { | |
306 PyErr_Format(PyExc_TypeError, | |
307 "expected type_num %d (NPY_INT32) got %d", | |
308 NPY_INT32, type_num_V5); | |
309 { | |
310 __failure = 6; | |
311 if (!PyErr_Occurred()) { | |
312 PyErr_SetString(PyExc_RuntimeError, | |
313 "Unexpected error in an Op's C code. " | |
314 "No Python exception was set."); | |
315 } | |
316 goto __label_6;} | |
317 } | |
318 V5 = (PyArrayObject*)(py_V5); | |
319 Py_XINCREF(V5); | |
320 | |
321 { | |
322 | |
323 py_V7 = Py_None; | |
324 {Py_XINCREF(py_V7);} | |
325 | |
326 V7 = NULL; | |
327 | |
328 { | |
329 | |
330 //////// <code generated by CURAND_Base> | |
331 int odims[2]; | |
332 int n_elements = 1; | |
333 int must_alloc_sample = ((NULL == V1) | |
334 || !CudaNdarray_Check(py_V1) | |
335 || (V1->nd != 2)); | |
336 | |
337 if (V5->nd != 1) | |
338 { | |
339 PyErr_SetString(PyExc_ValueError, "size must be vector"); | |
340 { | |
341 __failure = 9; | |
342 if (!PyErr_Occurred()) { | |
343 PyErr_SetString(PyExc_RuntimeError, | |
344 "Unexpected error in an Op's C code. " | |
345 "No Python exception was set."); | |
346 } | |
347 goto __label_9;} | |
348 } | |
349 if (V5->dimensions[0] != 2) | |
350 { | |
351 PyErr_Format(PyExc_ValueError, "size must have length %i (not %i)", | |
352 2, V5->dimensions[0]); | |
353 { | |
354 __failure = 9; | |
355 if (!PyErr_Occurred()) { | |
356 PyErr_SetString(PyExc_RuntimeError, | |
357 "Unexpected error in an Op's C code. " | |
358 "No Python exception was set."); | |
359 } | |
360 goto __label_9;} | |
361 } | |
362 if (PyArray_DESCR(V5)->type_num != NPY_INT32) | |
363 { | |
364 PyErr_SetString(PyExc_ValueError, "size must be int32"); | |
365 { | |
366 __failure = 9; | |
367 if (!PyErr_Occurred()) { | |
368 PyErr_SetString(PyExc_RuntimeError, | |
369 "Unexpected error in an Op's C code. " | |
370 "No Python exception was set."); | |
371 } | |
372 goto __label_9;} | |
373 } | |
374 for (int i = 0; i < 2; ++i) | |
375 { | |
376 odims[i] = ((npy_int32*)(V5->data + V5->strides[0] * i))[0]; | |
377 n_elements *= odims[i]; | |
378 must_alloc_sample = (must_alloc_sample | |
379 || CudaNdarray_HOST_DIMS(V1)[i] != odims[i]); | |
380 } | |
381 if (must_alloc_sample) | |
382 { | |
383 Py_XDECREF(V1); | |
384 V1 = (CudaNdarray*)CudaNdarray_NewDims(2, odims); | |
385 if(!V1) | |
386 { | |
387 { | |
388 __failure = 9; | |
389 if (!PyErr_Occurred()) { | |
390 PyErr_SetString(PyExc_RuntimeError, | |
391 "Unexpected error in an Op's C code. " | |
392 "No Python exception was set."); | |
393 } | |
394 goto __label_9;}; | |
395 } | |
396 } | |
397 if (!PyCObject_Check(V3)) | |
398 { | |
399 // allocate a new generator for o_generator | |
400 Py_XDECREF(V7); | |
401 curandGenerator_t * gen = (curandGenerator_t*)malloc(sizeof(curandGenerator_t)); | |
402 assert(gen); | |
403 if (CURAND_STATUS_SUCCESS != | |
404 curandCreateGenerator(gen, CURAND_RNG_PSEUDO_DEFAULT)) { | |
405 PyErr_Format(PyExc_RuntimeError, "Failed to initialize curand generator"); | |
406 { | |
407 __failure = 9; | |
408 if (!PyErr_Occurred()) { | |
409 PyErr_SetString(PyExc_RuntimeError, | |
410 "Unexpected error in an Op's C code. " | |
411 "No Python exception was set."); | |
412 } | |
413 goto __label_9;}; | |
414 } | |
415 if (CURAND_STATUS_SUCCESS != | |
416 curandSetPseudoRandomGeneratorSeed(*gen,234)) | |
417 { | |
418 PyErr_Format(PyExc_RuntimeError, "Failed to set curand generator seed"); | |
419 { | |
420 __failure = 9; | |
421 if (!PyErr_Occurred()) { | |
422 PyErr_SetString(PyExc_RuntimeError, | |
423 "Unexpected error in an Op's C code. " | |
424 "No Python exception was set."); | |
425 } | |
426 goto __label_9;}; | |
427 } | |
428 V7 = PyCObject_FromVoidPtr(gen, &free_generator); | |
429 assert (V3 == Py_False); | |
430 } | |
431 else if (1) | |
432 { | |
433 // use i_generator for o_generator | |
434 Py_XDECREF(V7); | |
435 Py_INCREF(V3); | |
436 V7 = V3; | |
437 } | |
438 else | |
439 { | |
440 // copy i_generator for o_generator | |
441 PyErr_Format(PyExc_NotImplementedError, "non-destructive CURAND generation"); | |
442 { | |
443 __failure = 9; | |
444 if (!PyErr_Occurred()) { | |
445 PyErr_SetString(PyExc_RuntimeError, | |
446 "Unexpected error in an Op's C code. " | |
447 "No Python exception was set."); | |
448 } | |
449 goto __label_9;}; | |
450 } | |
451 { | |
452 curandGenerator_t * gen = (curandGenerator_t*)PyCObject_AsVoidPtr(V7); | |
453 curandStatus_t err = curandGenerateUniform(*gen, | |
454 CudaNdarray_DEV_DATA(V1), | |
455 n_elements); | |
456 | |
457 | |
458 if (err != CURAND_STATUS_SUCCESS) | |
459 { | |
460 PyErr_Format(PyExc_RuntimeError, "curand error generating random normals %i", (int)err); | |
461 { | |
462 __failure = 9; | |
463 if (!PyErr_Occurred()) { | |
464 PyErr_SetString(PyExc_RuntimeError, | |
465 "Unexpected error in an Op's C code. " | |
466 "No Python exception was set."); | |
467 } | |
468 goto __label_9;}; | |
469 } | |
470 cudaThreadSynchronize(); | |
471 } | |
472 //////// </ code generated by CURAND_Base> | |
473 __label_9: | |
474 | |
475 double __DUMMY_9; | |
476 | |
477 } | |
478 __label_8: | |
479 | |
480 if (!__failure) { | |
481 | |
482 assert(py_V7->ob_refcnt > 1); | |
483 Py_DECREF(py_V7); | |
484 py_V7 = V7 ? V7 : Py_None; | |
485 Py_INCREF(py_V7); | |
486 | |
487 PyObject* old = PyList_GET_ITEM(storage_V7, 0); | |
488 {Py_XINCREF(py_V7);} | |
489 PyList_SET_ITEM(storage_V7, 0, py_V7); | |
490 {Py_XDECREF(old);} | |
491 } | |
492 | |
493 Py_XDECREF(V7); | |
494 | |
495 {Py_XDECREF(py_V7);} | |
496 | |
497 double __DUMMY_8; | |
498 | |
499 } | |
500 __label_6: | |
501 | |
502 if (V5) { | |
503 Py_XDECREF(V5); | |
504 } | |
505 | |
506 {Py_XDECREF(py_V5);} | |
507 | |
508 double __DUMMY_6; | |
509 | |
510 } | |
511 __label_4: | |
512 | |
513 Py_XDECREF(V3); | |
514 | |
515 {Py_XDECREF(py_V3);} | |
516 | |
517 double __DUMMY_4; | |
518 | |
519 } | |
520 __label_2: | |
521 | |
522 if (!__failure) { | |
523 | |
524 //std::cerr << "sync\n"; | |
525 if (NULL == V1) { | |
526 // failure: sync None to storage | |
527 Py_XDECREF(py_V1); | |
528 py_V1 = Py_None; | |
529 Py_INCREF(py_V1); | |
530 } | |
531 else | |
532 { | |
533 if (py_V1 != (PyObject*)V1) | |
534 { | |
535 Py_XDECREF(py_V1); | |
536 py_V1 = (PyObject*)V1; | |
537 Py_INCREF(py_V1); | |
538 } | |
539 assert(py_V1->ob_refcnt); | |
540 } | |
541 | |
542 PyObject* old = PyList_GET_ITEM(storage_V1, 0); | |
543 {Py_XINCREF(py_V1);} | |
544 PyList_SET_ITEM(storage_V1, 0, py_V1); | |
545 {Py_XDECREF(old);} | |
546 } | |
547 | |
548 //std::cerr << "cleanup " << py_V1 << " " << V1 << "\n"; | |
549 //fprintf(stderr, "c_cleanup CNDA py_object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
550 if (V1) | |
551 { | |
552 //fprintf(stderr, "c_cleanup CNDA cn_object w refcnt %p %i\n", V1, (V1->ob_refcnt)); | |
553 Py_XDECREF(V1); | |
554 } | |
555 //std::cerr << "cleanup done" << py_V1 << "\n"; | |
556 | |
557 {Py_XDECREF(py_V1);} | |
558 | |
559 double __DUMMY_2; | |
560 | |
561 } | |
562 | |
563 | |
564 if (__failure) { | |
565 // When there is a failure, this code puts the exception | |
566 // in __ERROR. | |
567 PyObject* err_type = NULL; | |
568 PyObject* err_msg = NULL; | |
569 PyObject* err_traceback = NULL; | |
570 PyErr_Fetch(&err_type, &err_msg, &err_traceback); | |
571 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);} | |
572 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);} | |
573 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);} | |
574 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0); | |
575 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1); | |
576 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2); | |
577 PyList_SET_ITEM(__ERROR, 0, err_type); | |
578 PyList_SET_ITEM(__ERROR, 1, err_msg); | |
579 PyList_SET_ITEM(__ERROR, 2, err_traceback); | |
580 {Py_XDECREF(old_err_type);} | |
581 {Py_XDECREF(old_err_msg);} | |
582 {Py_XDECREF(old_err_traceback);} | |
583 } | |
584 // The failure code is returned to index what code block failed. | |
585 return __failure; | |
586 | |
587 } | |
588 }; | |
589 | |
590 | |
591 int __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4_executor(__struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4* self) { | |
592 return self->run(); | |
593 } | |
594 | |
595 void __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4_destructor(void* executor, void* self) { | |
596 delete ((__struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4*)self); | |
597 } | |
598 | |
599 ////////////////////// | |
600 //// Functions | |
601 ////////////////////// | |
602 static PyObject * instantiate(PyObject * self, PyObject *argtuple) { | |
603 assert(PyTuple_Check(argtuple)); | |
604 if (5 != PyTuple_Size(argtuple)){ | |
605 PyErr_Format(PyExc_TypeError, "Wrong number of arguments, expected 5, got %i", (int)PyTuple_Size(argtuple)); | |
606 return NULL; | |
607 } | |
608 __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4* struct_ptr = new __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4(); | |
609 struct_ptr->init( PyTuple_GET_ITEM(argtuple, 0),PyTuple_GET_ITEM(argtuple, 1),PyTuple_GET_ITEM(argtuple, 2),PyTuple_GET_ITEM(argtuple, 3),PyTuple_GET_ITEM(argtuple, 4) ); | |
610 PyObject* thunk = PyCObject_FromVoidPtrAndDesc((void*)(&__struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4_executor), struct_ptr, __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4_destructor); | |
611 return thunk; } | |
612 | |
613 ////////////////////// | |
614 //// Module init | |
615 ////////////////////// | |
616 static PyMethodDef MyMethods[] = { | |
617 {"instantiate", instantiate, METH_VARARGS, "undocumented"} , | |
618 {NULL, NULL, 0, NULL} | |
619 }; | |
620 PyMODINIT_FUNC inite89e1fed0e21a65d4b9fbb16fea234f4(void){ | |
621 import_array(); | |
622 (void) Py_InitModule("e89e1fed0e21a65d4b9fbb16fea234f4", MyMethods); | |
623 } | |
624 | |
=============================== | |
In file included from /usr/include/python2.7/Python.h:8:0, | |
from mod.cu:1: | |
/usr/include/python2.7/pyconfig.h:1161:0: warning: "_POSIX_C_SOURCE" redefined [enabled by default] | |
/usr/include/features.h:164:0: note: this is the location of the previous definition | |
/usr/include/python2.7/pyconfig.h:1183:0: warning: "_XOPEN_SOURCE" redefined [enabled by default] | |
/usr/include/features.h:166:0: note: this is the location of the previous definition | |
mod.cu:5:20: fatal error: /usr/local/cuda-5.5/include/curand.h: Permission denied | |
compilation terminated. | |
['nvcc', '-shared', '-g', '-O3', '-arch=sm_30', '-m64', '-Xcompiler', '-Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=cdfd37325f98c49dfd27419bb10b2bac,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC', '-Xlinker', '-rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-I/usr/local/cuda-5.5/include', '-I/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-I/usr/lib/python2.7/dist-packages/numpy/core/include', '-I/usr/include/python2.7', '-I/home/ludwig/Theano/theano/sandbox/cuda', '-o', '/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/tmp7caAcY/e89e1fed0e21a65d4b9fbb16fea234f4.so', 'mod.cu', '-L/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib64', '-L/usr/lib', '-lpython2.7', '-lcudart', '-lcublas', '-lcurand', '-lcuda_ndarray'] | |
E1 #include <Python.h> | |
2 #include <iostream> | |
3 #include <numpy/arrayobject.h> | |
4 #include <math.h> | |
5 #include "curand.h" | |
6 #include <numpy/arrayscalars.h> | |
7 #include "cuda_ndarray.cuh" | |
8 ////////////////////// | |
9 //// Support Code | |
10 ////////////////////// | |
11 | |
12 | |
13 #if PY_MAJOR_VERSION >= 3 | |
14 void free_generator(PyObject *_gen) | |
15 { | |
16 curandGenerator_t * gen = (curandGenerator_t*)NpyCapsule_AsVoidPtr(_gen); | |
17 #else | |
18 void free_generator(void *_gen) | |
19 { | |
20 curandGenerator_t * gen = (curandGenerator_t*)_gen; | |
21 #endif | |
22 | |
23 curandStatus_t err = curandDestroyGenerator(*gen); | |
24 if (err != CURAND_STATUS_SUCCESS) | |
25 { | |
26 fprintf(stderr, "Failure (%i) in destroying CURAND generator.\n", | |
27 (int)err); | |
28 } | |
29 free(gen); | |
30 } | |
31 | |
32 | |
33 struct __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4 { | |
34 PyObject* __ERROR; | |
35 | |
36 PyObject* storage_V3; | |
37 PyObject* storage_V5; | |
38 PyObject* storage_V7; | |
39 PyObject* storage_V1; | |
40 | |
41 | |
42 __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4() {} | |
43 ~__struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4(void) { | |
44 cleanup(); | |
45 } | |
46 | |
47 int init(PyObject* __ERROR, PyObject* storage_V3, PyObject* storage_V5, PyObject* storage_V7, PyObject* storage_V1) { | |
48 Py_XINCREF(storage_V3); | |
49 Py_XINCREF(storage_V5); | |
50 Py_XINCREF(storage_V7); | |
51 Py_XINCREF(storage_V1); | |
52 this->storage_V3 = storage_V3; | |
53 this->storage_V5 = storage_V5; | |
54 this->storage_V7 = storage_V7; | |
55 this->storage_V1 = storage_V1; | |
56 int __failure = 0; | |
57 | |
58 { | |
59 | |
60 { | |
61 | |
62 { | |
63 | |
64 { | |
65 | |
66 this->__ERROR = __ERROR; | |
67 return 0; | |
68 __label_7: | |
69 | |
70 double __DUMMY_7; | |
71 | |
72 } | |
73 __label_5: | |
74 | |
75 double __DUMMY_5; | |
76 | |
77 } | |
78 __label_3: | |
79 | |
80 double __DUMMY_3; | |
81 | |
82 } | |
83 __label_1: | |
84 | |
85 double __DUMMY_1; | |
86 | |
87 } | |
88 | |
89 Py_XDECREF(this->storage_V3); | |
90 Py_XDECREF(this->storage_V5); | |
91 Py_XDECREF(this->storage_V7); | |
92 Py_XDECREF(this->storage_V1); | |
93 | |
94 if (__failure) { | |
95 // When there is a failure, this code puts the exception | |
96 // in __ERROR. | |
97 PyObject* err_type = NULL; | |
98 PyObject* err_msg = NULL; | |
99 PyObject* err_traceback = NULL; | |
100 PyErr_Fetch(&err_type, &err_msg, &err_traceback); | |
101 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);} | |
102 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);} | |
103 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);} | |
104 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0); | |
105 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1); | |
106 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2); | |
107 PyList_SET_ITEM(__ERROR, 0, err_type); | |
108 PyList_SET_ITEM(__ERROR, 1, err_msg); | |
109 PyList_SET_ITEM(__ERROR, 2, err_traceback); | |
110 {Py_XDECREF(old_err_type);} | |
111 {Py_XDECREF(old_err_msg);} | |
112 {Py_XDECREF(old_err_traceback);} | |
113 } | |
114 // The failure code is returned to index what code block failed. | |
115 return __failure; | |
116 | |
117 } | |
118 void cleanup(void) { | |
119 __label_1: | |
120 | |
121 double __DUMMY_1; | |
122 __label_3: | |
123 | |
124 double __DUMMY_3; | |
125 __label_5: | |
126 | |
127 double __DUMMY_5; | |
128 __label_7: | |
129 | |
130 double __DUMMY_7; | |
131 | |
132 Py_XDECREF(this->storage_V3); | |
133 Py_XDECREF(this->storage_V5); | |
134 Py_XDECREF(this->storage_V7); | |
135 Py_XDECREF(this->storage_V1); | |
136 } | |
137 int run(void) { | |
138 int __failure = 0; | |
139 | |
140 PyObject* py_V1; | |
141 CudaNdarray * V1; | |
142 PyObject* py_V3; | |
143 | |
144 PyObject* V3; | |
145 | |
146 PyObject* py_V5; | |
147 | |
148 PyArrayObject* V5; | |
149 int type_num_V5; | |
150 typedef npy_int32 dtype_V5; | |
151 | |
152 PyObject* py_V7; | |
153 | |
154 PyObject* V7; | |
155 | |
156 { | |
157 | |
158 py_V1 = PyList_GET_ITEM(storage_V1, 0); | |
159 {Py_XINCREF(py_V1);} | |
160 | |
161 if (py_V1 == Py_None) | |
162 { | |
163 V1 = NULL; | |
164 } | |
165 else | |
166 { | |
167 | |
168 assert(py_V1->ob_refcnt >= 2); // There should be at least one ref from the container object, | |
169 // and one ref from the local scope. | |
170 | |
171 if (CudaNdarray_Check(py_V1)) | |
172 { | |
173 //fprintf(stderr, "c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
174 V1 = (CudaNdarray*)py_V1; | |
175 //std::cerr << "c_extract " << V1 << '\n'; | |
176 if (V1->nd != 2) | |
177 { | |
178 PyErr_Format(PyExc_RuntimeError, | |
179 "c_extract: Some CudaNdarray has rank %i, it was supposed to have rank 2", | |
180 V1->nd); | |
181 V1 = NULL; | |
182 { | |
183 __failure = 2; | |
184 if (!PyErr_Occurred()) { | |
185 PyErr_SetString(PyExc_RuntimeError, | |
186 "Unexpected error in an Op's C code. " | |
187 "No Python exception was set."); | |
188 } | |
189 goto __label_2;}; | |
190 } | |
191 //std::cerr << "c_extract " << V1 << " nd check passed\n"; | |
192 | |
193 | |
194 assert(V1); | |
195 Py_INCREF(py_V1); | |
196 } | |
197 else if (py_V1 == Py_None) | |
198 { | |
199 PyErr_SetString(PyExc_TypeError, | |
200 "expected a CudaNdarray, not None"); | |
201 V1 = NULL; | |
202 { | |
203 __failure = 2; | |
204 if (!PyErr_Occurred()) { | |
205 PyErr_SetString(PyExc_RuntimeError, | |
206 "Unexpected error in an Op's C code. " | |
207 "No Python exception was set."); | |
208 } | |
209 goto __label_2;}; | |
210 } | |
211 else | |
212 { | |
213 //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
214 PyErr_SetString(PyExc_TypeError, "Argument not a CudaNdarray"); | |
215 V1 = NULL; | |
216 { | |
217 __failure = 2; | |
218 if (!PyErr_Occurred()) { | |
219 PyErr_SetString(PyExc_RuntimeError, | |
220 "Unexpected error in an Op's C code. " | |
221 "No Python exception was set."); | |
222 } | |
223 goto __label_2;}; | |
224 } | |
225 //std::cerr << "c_extract done " << V1 << '\n'; | |
226 | |
227 | |
228 } | |
229 | |
230 { | |
231 | |
232 py_V3 = PyList_GET_ITEM(storage_V3, 0); | |
233 {Py_XINCREF(py_V3);} | |
234 | |
235 Py_INCREF(py_V3); | |
236 V3 = py_V3; | |
237 | |
238 { | |
239 | |
240 py_V5 = PyList_GET_ITEM(storage_V5, 0); | |
241 {Py_XINCREF(py_V5);} | |
242 | |
243 V5 = NULL; | |
244 if (py_V5 == Py_None) { | |
245 // We can either fail here or set V5 to NULL and rely on Ops | |
246 // using tensors to handle the NULL case, but if they fail to do so | |
247 // they'll end up with nasty segfaults, so this is public service. | |
248 PyErr_SetString(PyExc_ValueError, "expected an ndarray, not None"); | |
249 { | |
250 __failure = 6; | |
251 if (!PyErr_Occurred()) { | |
252 PyErr_SetString(PyExc_RuntimeError, | |
253 "Unexpected error in an Op's C code. " | |
254 "No Python exception was set."); | |
255 } | |
256 goto __label_6;} | |
257 } | |
258 if (!PyArray_Check(py_V5)) { | |
259 PyErr_SetString(PyExc_ValueError, "expected an ndarray"); | |
260 { | |
261 __failure = 6; | |
262 if (!PyErr_Occurred()) { | |
263 PyErr_SetString(PyExc_RuntimeError, | |
264 "Unexpected error in an Op's C code. " | |
265 "No Python exception was set."); | |
266 } | |
267 goto __label_6;} | |
268 } | |
269 // We expect NPY_INT32 | |
270 type_num_V5 = ((PyArrayObject*)py_V5)->descr->type_num; | |
271 if (!PyArray_ISALIGNED(py_V5)) { | |
272 PyErr_Format(PyExc_NotImplementedError, | |
273 "expected an aligned array of type %ld " | |
274 "(NPY_INT32), got non-aligned array of type %ld" | |
275 " with %ld dimensions, with 3 last dims " | |
276 "%ld, %ld, %ld" | |
277 " and 3 last strides %ld %ld, %ld.", | |
278 (long int) NPY_INT32, | |
279 (long int) type_num_V5, | |
280 (long int) PyArray_NDIM(py_V5), | |
281 (long int) PyArray_NDIM(py_V5) >= 3 ? | |
282 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-3] : -1, | |
283 (long int) PyArray_NDIM(py_V5) >= 2 ? | |
284 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-2] : -1, | |
285 (long int) PyArray_NDIM(py_V5) >= 1 ? | |
286 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-1] : -1, | |
287 (long int) PyArray_NDIM(py_V5) >= 3 ? | |
288 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-3] : -1, | |
289 (long int) PyArray_NDIM(py_V5) >= 2 ? | |
290 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-2] : -1, | |
291 (long int) PyArray_NDIM(py_V5) >= 1 ? | |
292 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-1] : -1 | |
293 ); | |
294 { | |
295 __failure = 6; | |
296 if (!PyErr_Occurred()) { | |
297 PyErr_SetString(PyExc_RuntimeError, | |
298 "Unexpected error in an Op's C code. " | |
299 "No Python exception was set."); | |
300 } | |
301 goto __label_6;} | |
302 } | |
303 // This is a TypeError to be consistent with DEBUG_MODE | |
304 // Note: DEBUG_MODE also tells the name of the container | |
305 if (type_num_V5 != NPY_INT32) { | |
306 PyErr_Format(PyExc_TypeError, | |
307 "expected type_num %d (NPY_INT32) got %d", | |
308 NPY_INT32, type_num_V5); | |
309 { | |
310 __failure = 6; | |
311 if (!PyErr_Occurred()) { | |
312 PyErr_SetString(PyExc_RuntimeError, | |
313 "Unexpected error in an Op's C code. " | |
314 "No Python exception was set."); | |
315 } | |
316 goto __label_6;} | |
317 } | |
318 V5 = (PyArrayObject*)(py_V5); | |
319 Py_XINCREF(V5); | |
320 | |
321 { | |
322 | |
323 py_V7 = Py_None; | |
324 {Py_XINCREF(py_V7);} | |
325 | |
326 V7 = NULL; | |
327 | |
328 { | |
329 | |
330 //////// <code generated by CURAND_Base> | |
331 int odims[2]; | |
332 int n_elements = 1; | |
333 int must_alloc_sample = ((NULL == V1) | |
334 || !CudaNdarray_Check(py_V1) | |
335 || (V1->nd != 2)); | |
336 | |
337 if (V5->nd != 1) | |
338 { | |
339 PyErr_SetString(PyExc_ValueError, "size must be vector"); | |
340 { | |
341 __failure = 9; | |
342 if (!PyErr_Occurred()) { | |
343 PyErr_SetString(PyExc_RuntimeError, | |
344 "Unexpected error in an Op's C code. " | |
345 "No Python exception was set."); | |
346 } | |
347 goto __label_9;} | |
348 } | |
349 if (V5->dimensions[0] != 2) | |
350 { | |
351 PyErr_Format(PyExc_ValueError, "size must have length %i (not %i)", | |
352 2, V5->dimensions[0]); | |
353 { | |
354 __failure = 9; | |
355 if (!PyErr_Occurred()) { | |
356 PyErr_SetString(PyExc_RuntimeError, | |
357 "Unexpected error in an Op's C code. " | |
358 "No Python exception was set."); | |
359 } | |
360 goto __label_9;} | |
361 } | |
362 if (PyArray_DESCR(V5)->type_num != NPY_INT32) | |
363 { | |
364 PyErr_SetString(PyExc_ValueError, "size must be int32"); | |
365 { | |
366 __failure = 9; | |
367 if (!PyErr_Occurred()) { | |
368 PyErr_SetString(PyExc_RuntimeError, | |
369 "Unexpected error in an Op's C code. " | |
370 "No Python exception was set."); | |
371 } | |
372 goto __label_9;} | |
373 } | |
374 for (int i = 0; i < 2; ++i) | |
375 { | |
376 odims[i] = ((npy_int32*)(V5->data + V5->strides[0] * i))[0]; | |
377 n_elements *= odims[i]; | |
378 must_alloc_sample = (must_alloc_sample | |
379 || CudaNdarray_HOST_DIMS(V1)[i] != odims[i]); | |
380 } | |
381 if (must_alloc_sample) | |
382 { | |
383 Py_XDECREF(V1); | |
384 V1 = (CudaNdarray*)CudaNdarray_NewDims(2, odims); | |
385 if(!V1) | |
386 { | |
387 { | |
388 __failure = 9; | |
389 if (!PyErr_Occurred()) { | |
390 PyErr_SetString(PyExc_RuntimeError, | |
391 "Unexpected error in an Op's C code. " | |
392 "No Python exception was set."); | |
393 } | |
394 goto __label_9;}; | |
395 } | |
396 } | |
397 if (!PyCObject_Check(V3)) | |
398 { | |
399 // allocate a new generator for o_generator | |
400 Py_XDECREF(V7); | |
401 curandGenerator_t * gen = (curandGenerator_t*)malloc(sizeof(curandGenerator_t)); | |
402 assert(gen); | |
403 if (CURAND_STATUS_SUCCESS != | |
404 curandCreateGenerator(gen, CURAND_RNG_PSEUDO_DEFAULT)) { | |
405 PyErr_Format(PyExc_RuntimeError, "Failed to initialize curand generator"); | |
406 { | |
407 __failure = 9; | |
408 if (!PyErr_Occurred()) { | |
409 PyErr_SetString(PyExc_RuntimeError, | |
410 "Unexpected error in an Op's C code. " | |
411 "No Python exception was set."); | |
412 } | |
413 goto __label_9;}; | |
414 } | |
415 if (CURAND_STATUS_SUCCESS != | |
416 curandSetPseudoRandomGeneratorSeed(*gen,234)) | |
417 { | |
418 PyErr_Format(PyExc_RuntimeError, "Failed to set curand generator seed"); | |
419 { | |
420 __failure = 9; | |
421 if (!PyErr_Occurred()) { | |
422 PyErr_SetString(PyExc_RuntimeError, | |
423 "Unexpected error in an Op's C code. " | |
424 "No Python exception was set."); | |
425 } | |
426 goto __label_9;}; | |
427 } | |
428 V7 = PyCObject_FromVoidPtr(gen, &free_generator); | |
429 assert (V3 == Py_False); | |
430 } | |
431 else if (1) | |
432 { | |
433 // use i_generator for o_generator | |
434 Py_XDECREF(V7); | |
435 Py_INCREF(V3); | |
436 V7 = V3; | |
437 } | |
438 else | |
439 { | |
440 // copy i_generator for o_generator | |
441 PyErr_Format(PyExc_NotImplementedError, "non-destructive CURAND generation"); | |
442 { | |
443 __failure = 9; | |
444 if (!PyErr_Occurred()) { | |
445 PyErr_SetString(PyExc_RuntimeError, | |
446 "Unexpected error in an Op's C code. " | |
447 "No Python exception was set."); | |
448 } | |
449 goto __label_9;}; | |
450 } | |
451 { | |
452 curandGenerator_t * gen = (curandGenerator_t*)PyCObject_AsVoidPtr(V7); | |
453 curandStatus_t err = curandGenerateUniform(*gen, | |
454 CudaNdarray_DEV_DATA(V1), | |
455 n_elements); | |
456 | |
457 | |
458 if (err != CURAND_STATUS_SUCCESS) | |
459 { | |
460 PyErr_Format(PyExc_RuntimeError, "curand error generating random normals %i", (int)err); | |
461 { | |
462 __failure = 9; | |
463 if (!PyErr_Occurred()) { | |
464 PyErr_SetString(PyExc_RuntimeError, | |
465 "Unexpected error in an Op's C code. " | |
466 "No Python exception was set."); | |
467 } | |
468 goto __label_9;}; | |
469 } | |
470 cudaThreadSynchronize(); | |
471 } | |
472 //////// </ code generated by CURAND_Base> | |
473 __label_9: | |
474 | |
475 double __DUMMY_9; | |
476 | |
477 } | |
478 __label_8: | |
479 | |
480 if (!__failure) { | |
481 | |
482 assert(py_V7->ob_refcnt > 1); | |
483 Py_DECREF(py_V7); | |
484 py_V7 = V7 ? V7 : Py_None; | |
485 Py_INCREF(py_V7); | |
486 | |
487 PyObject* old = PyList_GET_ITEM(storage_V7, 0); | |
488 {Py_XINCREF(py_V7);} | |
489 PyList_SET_ITEM(storage_V7, 0, py_V7); | |
490 {Py_XDECREF(old);} | |
491 } | |
492 | |
493 Py_XDECREF(V7); | |
494 | |
495 {Py_XDECREF(py_V7);} | |
496 | |
497 double __DUMMY_8; | |
498 | |
499 } | |
500 __label_6: | |
501 | |
502 if (V5) { | |
503 Py_XDECREF(V5); | |
504 } | |
505 | |
506 {Py_XDECREF(py_V5);} | |
507 | |
508 double __DUMMY_6; | |
509 | |
510 } | |
511 __label_4: | |
512 | |
513 Py_XDECREF(V3); | |
514 | |
515 {Py_XDECREF(py_V3);} | |
516 | |
517 double __DUMMY_4; | |
518 | |
519 } | |
520 __label_2: | |
521 | |
522 if (!__failure) { | |
523 | |
524 //std::cerr << "sync\n"; | |
525 if (NULL == V1) { | |
526 // failure: sync None to storage | |
527 Py_XDECREF(py_V1); | |
528 py_V1 = Py_None; | |
529 Py_INCREF(py_V1); | |
530 } | |
531 else | |
532 { | |
533 if (py_V1 != (PyObject*)V1) | |
534 { | |
535 Py_XDECREF(py_V1); | |
536 py_V1 = (PyObject*)V1; | |
537 Py_INCREF(py_V1); | |
538 } | |
539 assert(py_V1->ob_refcnt); | |
540 } | |
541 | |
542 PyObject* old = PyList_GET_ITEM(storage_V1, 0); | |
543 {Py_XINCREF(py_V1);} | |
544 PyList_SET_ITEM(storage_V1, 0, py_V1); | |
545 {Py_XDECREF(old);} | |
546 } | |
547 | |
548 //std::cerr << "cleanup " << py_V1 << " " << V1 << "\n"; | |
549 //fprintf(stderr, "c_cleanup CNDA py_object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
550 if (V1) | |
551 { | |
552 //fprintf(stderr, "c_cleanup CNDA cn_object w refcnt %p %i\n", V1, (V1->ob_refcnt)); | |
553 Py_XDECREF(V1); | |
554 } | |
555 //std::cerr << "cleanup done" << py_V1 << "\n"; | |
556 | |
557 {Py_XDECREF(py_V1);} | |
558 | |
559 double __DUMMY_2; | |
560 | |
561 } | |
562 | |
563 | |
564 if (__failure) { | |
565 // When there is a failure, this code puts the exception | |
566 // in __ERROR. | |
567 PyObject* err_type = NULL; | |
568 PyObject* err_msg = NULL; | |
569 PyObject* err_traceback = NULL; | |
570 PyErr_Fetch(&err_type, &err_msg, &err_traceback); | |
571 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);} | |
572 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);} | |
573 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);} | |
574 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0); | |
575 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1); | |
576 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2); | |
577 PyList_SET_ITEM(__ERROR, 0, err_type); | |
578 PyList_SET_ITEM(__ERROR, 1, err_msg); | |
579 PyList_SET_ITEM(__ERROR, 2, err_traceback); | |
580 {Py_XDECREF(old_err_type);} | |
581 {Py_XDECREF(old_err_msg);} | |
582 {Py_XDECREF(old_err_traceback);} | |
583 } | |
584 // The failure code is returned to index what code block failed. | |
585 return __failure; | |
586 | |
587 } | |
588 }; | |
589 | |
590 | |
591 int __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4_executor(__struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4* self) { | |
592 return self->run(); | |
593 } | |
594 | |
595 void __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4_destructor(void* executor, void* self) { | |
596 delete ((__struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4*)self); | |
597 } | |
598 | |
599 ////////////////////// | |
600 //// Functions | |
601 ////////////////////// | |
602 static PyObject * instantiate(PyObject * self, PyObject *argtuple) { | |
603 assert(PyTuple_Check(argtuple)); | |
604 if (5 != PyTuple_Size(argtuple)){ | |
605 PyErr_Format(PyExc_TypeError, "Wrong number of arguments, expected 5, got %i", (int)PyTuple_Size(argtuple)); | |
606 return NULL; | |
607 } | |
608 __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4* struct_ptr = new __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4(); | |
609 struct_ptr->init( PyTuple_GET_ITEM(argtuple, 0),PyTuple_GET_ITEM(argtuple, 1),PyTuple_GET_ITEM(argtuple, 2),PyTuple_GET_ITEM(argtuple, 3),PyTuple_GET_ITEM(argtuple, 4) ); | |
610 PyObject* thunk = PyCObject_FromVoidPtrAndDesc((void*)(&__struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4_executor), struct_ptr, __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4_destructor); | |
611 return thunk; } | |
612 | |
613 ////////////////////// | |
614 //// Module init | |
615 ////////////////////// | |
616 static PyMethodDef MyMethods[] = { | |
617 {"instantiate", instantiate, METH_VARARGS, "undocumented"} , | |
618 {NULL, NULL, 0, NULL} | |
619 }; | |
620 PyMODINIT_FUNC inite89e1fed0e21a65d4b9fbb16fea234f4(void){ | |
621 import_array(); | |
622 (void) Py_InitModule("e89e1fed0e21a65d4b9fbb16fea234f4", MyMethods); | |
623 } | |
624 | |
=============================== | |
In file included from /usr/include/python2.7/Python.h:8:0, | |
from mod.cu:1: | |
/usr/include/python2.7/pyconfig.h:1161:0: warning: "_POSIX_C_SOURCE" redefined [enabled by default] | |
/usr/include/features.h:164:0: note: this is the location of the previous definition | |
/usr/include/python2.7/pyconfig.h:1183:0: warning: "_XOPEN_SOURCE" redefined [enabled by default] | |
/usr/include/features.h:166:0: note: this is the location of the previous definition | |
mod.cu:5:20: fatal error: /usr/local/cuda-5.5/include/curand.h: Permission denied | |
compilation terminated. | |
['nvcc', '-shared', '-g', '-O3', '-arch=sm_30', '-m64', '-Xcompiler', '-Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=cdfd37325f98c49dfd27419bb10b2bac,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC', '-Xlinker', '-rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-I/usr/local/cuda-5.5/include', '-I/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-I/usr/lib/python2.7/dist-packages/numpy/core/include', '-I/usr/include/python2.7', '-I/home/ludwig/Theano/theano/sandbox/cuda', '-o', '/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/tmp2_748k/e89e1fed0e21a65d4b9fbb16fea234f4.so', 'mod.cu', '-L/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib64', '-L/usr/lib', '-lpython2.7', '-lcudart', '-lcublas', '-lcurand', '-lcuda_ndarray'] | |
E1 #include <Python.h> | |
2 #include <iostream> | |
3 #include <numpy/arrayobject.h> | |
4 #include <math.h> | |
5 #include "curand.h" | |
6 #include <numpy/arrayscalars.h> | |
7 #include "cuda_ndarray.cuh" | |
8 ////////////////////// | |
9 //// Support Code | |
10 ////////////////////// | |
11 | |
12 | |
13 #if PY_MAJOR_VERSION >= 3 | |
14 void free_generator(PyObject *_gen) | |
15 { | |
16 curandGenerator_t * gen = (curandGenerator_t*)NpyCapsule_AsVoidPtr(_gen); | |
17 #else | |
18 void free_generator(void *_gen) | |
19 { | |
20 curandGenerator_t * gen = (curandGenerator_t*)_gen; | |
21 #endif | |
22 | |
23 curandStatus_t err = curandDestroyGenerator(*gen); | |
24 if (err != CURAND_STATUS_SUCCESS) | |
25 { | |
26 fprintf(stderr, "Failure (%i) in destroying CURAND generator.\n", | |
27 (int)err); | |
28 } | |
29 free(gen); | |
30 } | |
31 | |
32 | |
33 struct __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4 { | |
34 PyObject* __ERROR; | |
35 | |
36 PyObject* storage_V3; | |
37 PyObject* storage_V5; | |
38 PyObject* storage_V7; | |
39 PyObject* storage_V1; | |
40 | |
41 | |
42 __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4() {} | |
43 ~__struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4(void) { | |
44 cleanup(); | |
45 } | |
46 | |
47 int init(PyObject* __ERROR, PyObject* storage_V3, PyObject* storage_V5, PyObject* storage_V7, PyObject* storage_V1) { | |
48 Py_XINCREF(storage_V3); | |
49 Py_XINCREF(storage_V5); | |
50 Py_XINCREF(storage_V7); | |
51 Py_XINCREF(storage_V1); | |
52 this->storage_V3 = storage_V3; | |
53 this->storage_V5 = storage_V5; | |
54 this->storage_V7 = storage_V7; | |
55 this->storage_V1 = storage_V1; | |
56 int __failure = 0; | |
57 | |
58 { | |
59 | |
60 { | |
61 | |
62 { | |
63 | |
64 { | |
65 | |
66 this->__ERROR = __ERROR; | |
67 return 0; | |
68 __label_7: | |
69 | |
70 double __DUMMY_7; | |
71 | |
72 } | |
73 __label_5: | |
74 | |
75 double __DUMMY_5; | |
76 | |
77 } | |
78 __label_3: | |
79 | |
80 double __DUMMY_3; | |
81 | |
82 } | |
83 __label_1: | |
84 | |
85 double __DUMMY_1; | |
86 | |
87 } | |
88 | |
89 Py_XDECREF(this->storage_V3); | |
90 Py_XDECREF(this->storage_V5); | |
91 Py_XDECREF(this->storage_V7); | |
92 Py_XDECREF(this->storage_V1); | |
93 | |
94 if (__failure) { | |
95 // When there is a failure, this code puts the exception | |
96 // in __ERROR. | |
97 PyObject* err_type = NULL; | |
98 PyObject* err_msg = NULL; | |
99 PyObject* err_traceback = NULL; | |
100 PyErr_Fetch(&err_type, &err_msg, &err_traceback); | |
101 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);} | |
102 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);} | |
103 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);} | |
104 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0); | |
105 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1); | |
106 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2); | |
107 PyList_SET_ITEM(__ERROR, 0, err_type); | |
108 PyList_SET_ITEM(__ERROR, 1, err_msg); | |
109 PyList_SET_ITEM(__ERROR, 2, err_traceback); | |
110 {Py_XDECREF(old_err_type);} | |
111 {Py_XDECREF(old_err_msg);} | |
112 {Py_XDECREF(old_err_traceback);} | |
113 } | |
114 // The failure code is returned to index what code block failed. | |
115 return __failure; | |
116 | |
117 } | |
118 void cleanup(void) { | |
119 __label_1: | |
120 | |
121 double __DUMMY_1; | |
122 __label_3: | |
123 | |
124 double __DUMMY_3; | |
125 __label_5: | |
126 | |
127 double __DUMMY_5; | |
128 __label_7: | |
129 | |
130 double __DUMMY_7; | |
131 | |
132 Py_XDECREF(this->storage_V3); | |
133 Py_XDECREF(this->storage_V5); | |
134 Py_XDECREF(this->storage_V7); | |
135 Py_XDECREF(this->storage_V1); | |
136 } | |
137 int run(void) { | |
138 int __failure = 0; | |
139 | |
140 PyObject* py_V1; | |
141 CudaNdarray * V1; | |
142 PyObject* py_V3; | |
143 | |
144 PyObject* V3; | |
145 | |
146 PyObject* py_V5; | |
147 | |
148 PyArrayObject* V5; | |
149 int type_num_V5; | |
150 typedef npy_int32 dtype_V5; | |
151 | |
152 PyObject* py_V7; | |
153 | |
154 PyObject* V7; | |
155 | |
156 { | |
157 | |
158 py_V1 = PyList_GET_ITEM(storage_V1, 0); | |
159 {Py_XINCREF(py_V1);} | |
160 | |
161 if (py_V1 == Py_None) | |
162 { | |
163 V1 = NULL; | |
164 } | |
165 else | |
166 { | |
167 | |
168 assert(py_V1->ob_refcnt >= 2); // There should be at least one ref from the container object, | |
169 // and one ref from the local scope. | |
170 | |
171 if (CudaNdarray_Check(py_V1)) | |
172 { | |
173 //fprintf(stderr, "c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
174 V1 = (CudaNdarray*)py_V1; | |
175 //std::cerr << "c_extract " << V1 << '\n'; | |
176 if (V1->nd != 2) | |
177 { | |
178 PyErr_Format(PyExc_RuntimeError, | |
179 "c_extract: Some CudaNdarray has rank %i, it was supposed to have rank 2", | |
180 V1->nd); | |
181 V1 = NULL; | |
182 { | |
183 __failure = 2; | |
184 if (!PyErr_Occurred()) { | |
185 PyErr_SetString(PyExc_RuntimeError, | |
186 "Unexpected error in an Op's C code. " | |
187 "No Python exception was set."); | |
188 } | |
189 goto __label_2;}; | |
190 } | |
191 //std::cerr << "c_extract " << V1 << " nd check passed\n"; | |
192 | |
193 | |
194 assert(V1); | |
195 Py_INCREF(py_V1); | |
196 } | |
197 else if (py_V1 == Py_None) | |
198 { | |
199 PyErr_SetString(PyExc_TypeError, | |
200 "expected a CudaNdarray, not None"); | |
201 V1 = NULL; | |
202 { | |
203 __failure = 2; | |
204 if (!PyErr_Occurred()) { | |
205 PyErr_SetString(PyExc_RuntimeError, | |
206 "Unexpected error in an Op's C code. " | |
207 "No Python exception was set."); | |
208 } | |
209 goto __label_2;}; | |
210 } | |
211 else | |
212 { | |
213 //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
214 PyErr_SetString(PyExc_TypeError, "Argument not a CudaNdarray"); | |
215 V1 = NULL; | |
216 { | |
217 __failure = 2; | |
218 if (!PyErr_Occurred()) { | |
219 PyErr_SetString(PyExc_RuntimeError, | |
220 "Unexpected error in an Op's C code. " | |
221 "No Python exception was set."); | |
222 } | |
223 goto __label_2;}; | |
224 } | |
225 //std::cerr << "c_extract done " << V1 << '\n'; | |
226 | |
227 | |
228 } | |
229 | |
230 { | |
231 | |
232 py_V3 = PyList_GET_ITEM(storage_V3, 0); | |
233 {Py_XINCREF(py_V3);} | |
234 | |
235 Py_INCREF(py_V3); | |
236 V3 = py_V3; | |
237 | |
238 { | |
239 | |
240 py_V5 = PyList_GET_ITEM(storage_V5, 0); | |
241 {Py_XINCREF(py_V5);} | |
242 | |
243 V5 = NULL; | |
244 if (py_V5 == Py_None) { | |
245 // We can either fail here or set V5 to NULL and rely on Ops | |
246 // using tensors to handle the NULL case, but if they fail to do so | |
247 // they'll end up with nasty segfaults, so this is public service. | |
248 PyErr_SetString(PyExc_ValueError, "expected an ndarray, not None"); | |
249 { | |
250 __failure = 6; | |
251 if (!PyErr_Occurred()) { | |
252 PyErr_SetString(PyExc_RuntimeError, | |
253 "Unexpected error in an Op's C code. " | |
254 "No Python exception was set."); | |
255 } | |
256 goto __label_6;} | |
257 } | |
258 if (!PyArray_Check(py_V5)) { | |
259 PyErr_SetString(PyExc_ValueError, "expected an ndarray"); | |
260 { | |
261 __failure = 6; | |
262 if (!PyErr_Occurred()) { | |
263 PyErr_SetString(PyExc_RuntimeError, | |
264 "Unexpected error in an Op's C code. " | |
265 "No Python exception was set."); | |
266 } | |
267 goto __label_6;} | |
268 } | |
269 // We expect NPY_INT32 | |
270 type_num_V5 = ((PyArrayObject*)py_V5)->descr->type_num; | |
271 if (!PyArray_ISALIGNED(py_V5)) { | |
272 PyErr_Format(PyExc_NotImplementedError, | |
273 "expected an aligned array of type %ld " | |
274 "(NPY_INT32), got non-aligned array of type %ld" | |
275 " with %ld dimensions, with 3 last dims " | |
276 "%ld, %ld, %ld" | |
277 " and 3 last strides %ld %ld, %ld.", | |
278 (long int) NPY_INT32, | |
279 (long int) type_num_V5, | |
280 (long int) PyArray_NDIM(py_V5), | |
281 (long int) PyArray_NDIM(py_V5) >= 3 ? | |
282 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-3] : -1, | |
283 (long int) PyArray_NDIM(py_V5) >= 2 ? | |
284 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-2] : -1, | |
285 (long int) PyArray_NDIM(py_V5) >= 1 ? | |
286 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-1] : -1, | |
287 (long int) PyArray_NDIM(py_V5) >= 3 ? | |
288 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-3] : -1, | |
289 (long int) PyArray_NDIM(py_V5) >= 2 ? | |
290 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-2] : -1, | |
291 (long int) PyArray_NDIM(py_V5) >= 1 ? | |
292 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-1] : -1 | |
293 ); | |
294 { | |
295 __failure = 6; | |
296 if (!PyErr_Occurred()) { | |
297 PyErr_SetString(PyExc_RuntimeError, | |
298 "Unexpected error in an Op's C code. " | |
299 "No Python exception was set."); | |
300 } | |
301 goto __label_6;} | |
302 } | |
303 // This is a TypeError to be consistent with DEBUG_MODE | |
304 // Note: DEBUG_MODE also tells the name of the container | |
305 if (type_num_V5 != NPY_INT32) { | |
306 PyErr_Format(PyExc_TypeError, | |
307 "expected type_num %d (NPY_INT32) got %d", | |
308 NPY_INT32, type_num_V5); | |
309 { | |
310 __failure = 6; | |
311 if (!PyErr_Occurred()) { | |
312 PyErr_SetString(PyExc_RuntimeError, | |
313 "Unexpected error in an Op's C code. " | |
314 "No Python exception was set."); | |
315 } | |
316 goto __label_6;} | |
317 } | |
318 V5 = (PyArrayObject*)(py_V5); | |
319 Py_XINCREF(V5); | |
320 | |
321 { | |
322 | |
323 py_V7 = Py_None; | |
324 {Py_XINCREF(py_V7);} | |
325 | |
326 V7 = NULL; | |
327 | |
328 { | |
329 | |
330 //////// <code generated by CURAND_Base> | |
331 int odims[2]; | |
332 int n_elements = 1; | |
333 int must_alloc_sample = ((NULL == V1) | |
334 || !CudaNdarray_Check(py_V1) | |
335 || (V1->nd != 2)); | |
336 | |
337 if (V5->nd != 1) | |
338 { | |
339 PyErr_SetString(PyExc_ValueError, "size must be vector"); | |
340 { | |
341 __failure = 9; | |
342 if (!PyErr_Occurred()) { | |
343 PyErr_SetString(PyExc_RuntimeError, | |
344 "Unexpected error in an Op's C code. " | |
345 "No Python exception was set."); | |
346 } | |
347 goto __label_9;} | |
348 } | |
349 if (V5->dimensions[0] != 2) | |
350 { | |
351 PyErr_Format(PyExc_ValueError, "size must have length %i (not %i)", | |
352 2, V5->dimensions[0]); | |
353 { | |
354 __failure = 9; | |
355 if (!PyErr_Occurred()) { | |
356 PyErr_SetString(PyExc_RuntimeError, | |
357 "Unexpected error in an Op's C code. " | |
358 "No Python exception was set."); | |
359 } | |
360 goto __label_9;} | |
361 } | |
362 if (PyArray_DESCR(V5)->type_num != NPY_INT32) | |
363 { | |
364 PyErr_SetString(PyExc_ValueError, "size must be int32"); | |
365 { | |
366 __failure = 9; | |
367 if (!PyErr_Occurred()) { | |
368 PyErr_SetString(PyExc_RuntimeError, | |
369 "Unexpected error in an Op's C code. " | |
370 "No Python exception was set."); | |
371 } | |
372 goto __label_9;} | |
373 } | |
374 for (int i = 0; i < 2; ++i) | |
375 { | |
376 odims[i] = ((npy_int32*)(V5->data + V5->strides[0] * i))[0]; | |
377 n_elements *= odims[i]; | |
378 must_alloc_sample = (must_alloc_sample | |
379 || CudaNdarray_HOST_DIMS(V1)[i] != odims[i]); | |
380 } | |
381 if (must_alloc_sample) | |
382 { | |
383 Py_XDECREF(V1); | |
384 V1 = (CudaNdarray*)CudaNdarray_NewDims(2, odims); | |
385 if(!V1) | |
386 { | |
387 { | |
388 __failure = 9; | |
389 if (!PyErr_Occurred()) { | |
390 PyErr_SetString(PyExc_RuntimeError, | |
391 "Unexpected error in an Op's C code. " | |
392 "No Python exception was set."); | |
393 } | |
394 goto __label_9;}; | |
395 } | |
396 } | |
397 if (!PyCObject_Check(V3)) | |
398 { | |
399 // allocate a new generator for o_generator | |
400 Py_XDECREF(V7); | |
401 curandGenerator_t * gen = (curandGenerator_t*)malloc(sizeof(curandGenerator_t)); | |
402 assert(gen); | |
403 if (CURAND_STATUS_SUCCESS != | |
404 curandCreateGenerator(gen, CURAND_RNG_PSEUDO_DEFAULT)) { | |
405 PyErr_Format(PyExc_RuntimeError, "Failed to initialize curand generator"); | |
406 { | |
407 __failure = 9; | |
408 if (!PyErr_Occurred()) { | |
409 PyErr_SetString(PyExc_RuntimeError, | |
410 "Unexpected error in an Op's C code. " | |
411 "No Python exception was set."); | |
412 } | |
413 goto __label_9;}; | |
414 } | |
415 if (CURAND_STATUS_SUCCESS != | |
416 curandSetPseudoRandomGeneratorSeed(*gen,234)) | |
417 { | |
418 PyErr_Format(PyExc_RuntimeError, "Failed to set curand generator seed"); | |
419 { | |
420 __failure = 9; | |
421 if (!PyErr_Occurred()) { | |
422 PyErr_SetString(PyExc_RuntimeError, | |
423 "Unexpected error in an Op's C code. " | |
424 "No Python exception was set."); | |
425 } | |
426 goto __label_9;}; | |
427 } | |
428 V7 = PyCObject_FromVoidPtr(gen, &free_generator); | |
429 assert (V3 == Py_False); | |
430 } | |
431 else if (1) | |
432 { | |
433 // use i_generator for o_generator | |
434 Py_XDECREF(V7); | |
435 Py_INCREF(V3); | |
436 V7 = V3; | |
437 } | |
438 else | |
439 { | |
440 // copy i_generator for o_generator | |
441 PyErr_Format(PyExc_NotImplementedError, "non-destructive CURAND generation"); | |
442 { | |
443 __failure = 9; | |
444 if (!PyErr_Occurred()) { | |
445 PyErr_SetString(PyExc_RuntimeError, | |
446 "Unexpected error in an Op's C code. " | |
447 "No Python exception was set."); | |
448 } | |
449 goto __label_9;}; | |
450 } | |
451 { | |
452 curandGenerator_t * gen = (curandGenerator_t*)PyCObject_AsVoidPtr(V7); | |
453 curandStatus_t err = curandGenerateUniform(*gen, | |
454 CudaNdarray_DEV_DATA(V1), | |
455 n_elements); | |
456 | |
457 | |
458 if (err != CURAND_STATUS_SUCCESS) | |
459 { | |
460 PyErr_Format(PyExc_RuntimeError, "curand error generating random normals %i", (int)err); | |
461 { | |
462 __failure = 9; | |
463 if (!PyErr_Occurred()) { | |
464 PyErr_SetString(PyExc_RuntimeError, | |
465 "Unexpected error in an Op's C code. " | |
466 "No Python exception was set."); | |
467 } | |
468 goto __label_9;}; | |
469 } | |
470 cudaThreadSynchronize(); | |
471 } | |
472 //////// </ code generated by CURAND_Base> | |
473 __label_9: | |
474 | |
475 double __DUMMY_9; | |
476 | |
477 } | |
478 __label_8: | |
479 | |
480 if (!__failure) { | |
481 | |
482 assert(py_V7->ob_refcnt > 1); | |
483 Py_DECREF(py_V7); | |
484 py_V7 = V7 ? V7 : Py_None; | |
485 Py_INCREF(py_V7); | |
486 | |
487 PyObject* old = PyList_GET_ITEM(storage_V7, 0); | |
488 {Py_XINCREF(py_V7);} | |
489 PyList_SET_ITEM(storage_V7, 0, py_V7); | |
490 {Py_XDECREF(old);} | |
491 } | |
492 | |
493 Py_XDECREF(V7); | |
494 | |
495 {Py_XDECREF(py_V7);} | |
496 | |
497 double __DUMMY_8; | |
498 | |
499 } | |
500 __label_6: | |
501 | |
502 if (V5) { | |
503 Py_XDECREF(V5); | |
504 } | |
505 | |
506 {Py_XDECREF(py_V5);} | |
507 | |
508 double __DUMMY_6; | |
509 | |
510 } | |
511 __label_4: | |
512 | |
513 Py_XDECREF(V3); | |
514 | |
515 {Py_XDECREF(py_V3);} | |
516 | |
517 double __DUMMY_4; | |
518 | |
519 } | |
520 __label_2: | |
521 | |
522 if (!__failure) { | |
523 | |
524 //std::cerr << "sync\n"; | |
525 if (NULL == V1) { | |
526 // failure: sync None to storage | |
527 Py_XDECREF(py_V1); | |
528 py_V1 = Py_None; | |
529 Py_INCREF(py_V1); | |
530 } | |
531 else | |
532 { | |
533 if (py_V1 != (PyObject*)V1) | |
534 { | |
535 Py_XDECREF(py_V1); | |
536 py_V1 = (PyObject*)V1; | |
537 Py_INCREF(py_V1); | |
538 } | |
539 assert(py_V1->ob_refcnt); | |
540 } | |
541 | |
542 PyObject* old = PyList_GET_ITEM(storage_V1, 0); | |
543 {Py_XINCREF(py_V1);} | |
544 PyList_SET_ITEM(storage_V1, 0, py_V1); | |
545 {Py_XDECREF(old);} | |
546 } | |
547 | |
548 //std::cerr << "cleanup " << py_V1 << " " << V1 << "\n"; | |
549 //fprintf(stderr, "c_cleanup CNDA py_object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
550 if (V1) | |
551 { | |
552 //fprintf(stderr, "c_cleanup CNDA cn_object w refcnt %p %i\n", V1, (V1->ob_refcnt)); | |
553 Py_XDECREF(V1); | |
554 } | |
555 //std::cerr << "cleanup done" << py_V1 << "\n"; | |
556 | |
557 {Py_XDECREF(py_V1);} | |
558 | |
559 double __DUMMY_2; | |
560 | |
561 } | |
562 | |
563 | |
564 if (__failure) { | |
565 // When there is a failure, this code puts the exception | |
566 // in __ERROR. | |
567 PyObject* err_type = NULL; | |
568 PyObject* err_msg = NULL; | |
569 PyObject* err_traceback = NULL; | |
570 PyErr_Fetch(&err_type, &err_msg, &err_traceback); | |
571 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);} | |
572 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);} | |
573 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);} | |
574 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0); | |
575 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1); | |
576 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2); | |
577 PyList_SET_ITEM(__ERROR, 0, err_type); | |
578 PyList_SET_ITEM(__ERROR, 1, err_msg); | |
579 PyList_SET_ITEM(__ERROR, 2, err_traceback); | |
580 {Py_XDECREF(old_err_type);} | |
581 {Py_XDECREF(old_err_msg);} | |
582 {Py_XDECREF(old_err_traceback);} | |
583 } | |
584 // The failure code is returned to index what code block failed. | |
585 return __failure; | |
586 | |
587 } | |
588 }; | |
589 | |
590 | |
591 int __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4_executor(__struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4* self) { | |
592 return self->run(); | |
593 } | |
594 | |
595 void __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4_destructor(void* executor, void* self) { | |
596 delete ((__struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4*)self); | |
597 } | |
598 | |
599 ////////////////////// | |
600 //// Functions | |
601 ////////////////////// | |
602 static PyObject * instantiate(PyObject * self, PyObject *argtuple) { | |
603 assert(PyTuple_Check(argtuple)); | |
604 if (5 != PyTuple_Size(argtuple)){ | |
605 PyErr_Format(PyExc_TypeError, "Wrong number of arguments, expected 5, got %i", (int)PyTuple_Size(argtuple)); | |
606 return NULL; | |
607 } | |
608 __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4* struct_ptr = new __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4(); | |
609 struct_ptr->init( PyTuple_GET_ITEM(argtuple, 0),PyTuple_GET_ITEM(argtuple, 1),PyTuple_GET_ITEM(argtuple, 2),PyTuple_GET_ITEM(argtuple, 3),PyTuple_GET_ITEM(argtuple, 4) ); | |
610 PyObject* thunk = PyCObject_FromVoidPtrAndDesc((void*)(&__struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4_executor), struct_ptr, __struct_compiled_op_e89e1fed0e21a65d4b9fbb16fea234f4_destructor); | |
611 return thunk; } | |
612 | |
613 ////////////////////// | |
614 //// Module init | |
615 ////////////////////// | |
616 static PyMethodDef MyMethods[] = { | |
617 {"instantiate", instantiate, METH_VARARGS, "undocumented"} , | |
618 {NULL, NULL, 0, NULL} | |
619 }; | |
620 PyMODINIT_FUNC inite89e1fed0e21a65d4b9fbb16fea234f4(void){ | |
621 import_array(); | |
622 (void) Py_InitModule("e89e1fed0e21a65d4b9fbb16fea234f4", MyMethods); | |
623 } | |
624 | |
=============================== | |
In file included from /usr/include/python2.7/Python.h:8:0, | |
from mod.cu:1: | |
/usr/include/python2.7/pyconfig.h:1161:0: warning: "_POSIX_C_SOURCE" redefined [enabled by default] | |
/usr/include/features.h:164:0: note: this is the location of the previous definition | |
/usr/include/python2.7/pyconfig.h:1183:0: warning: "_XOPEN_SOURCE" redefined [enabled by default] | |
/usr/include/features.h:166:0: note: this is the location of the previous definition | |
mod.cu:5:20: fatal error: /usr/local/cuda-5.5/include/curand.h: Permission denied | |
compilation terminated. | |
['nvcc', '-shared', '-g', '-O3', '-arch=sm_30', '-m64', '-Xcompiler', '-Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=cdfd37325f98c49dfd27419bb10b2bac,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC', '-Xlinker', '-rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-I/usr/local/cuda-5.5/include', '-I/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-I/usr/lib/python2.7/dist-packages/numpy/core/include', '-I/usr/include/python2.7', '-I/home/ludwig/Theano/theano/sandbox/cuda', '-o', '/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/tmptY4XQ8/e89e1fed0e21a65d4b9fbb16fea234f4.so', 'mod.cu', '-L/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib64', '-L/usr/lib', '-lpython2.7', '-lcudart', '-lcublas', '-lcurand', '-lcuda_ndarray'] | |
E1 #include <Python.h> | |
2 #include <iostream> | |
3 #include <numpy/arrayobject.h> | |
4 #include <math.h> | |
5 #include "curand.h" | |
6 #include <numpy/arrayscalars.h> | |
7 #include "cuda_ndarray.cuh" | |
8 ////////////////////// | |
9 //// Support Code | |
10 ////////////////////// | |
11 | |
12 | |
13 #if PY_MAJOR_VERSION >= 3 | |
14 void free_generator(PyObject *_gen) | |
15 { | |
16 curandGenerator_t * gen = (curandGenerator_t*)NpyCapsule_AsVoidPtr(_gen); | |
17 #else | |
18 void free_generator(void *_gen) | |
19 { | |
20 curandGenerator_t * gen = (curandGenerator_t*)_gen; | |
21 #endif | |
22 | |
23 curandStatus_t err = curandDestroyGenerator(*gen); | |
24 if (err != CURAND_STATUS_SUCCESS) | |
25 { | |
26 fprintf(stderr, "Failure (%i) in destroying CURAND generator.\n", | |
27 (int)err); | |
28 } | |
29 free(gen); | |
30 } | |
31 | |
32 | |
33 struct __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6 { | |
34 PyObject* __ERROR; | |
35 | |
36 PyObject* storage_V3; | |
37 PyObject* storage_V5; | |
38 PyObject* storage_V7; | |
39 PyObject* storage_V1; | |
40 | |
41 | |
42 __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6() {} | |
43 ~__struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6(void) { | |
44 cleanup(); | |
45 } | |
46 | |
47 int init(PyObject* __ERROR, PyObject* storage_V3, PyObject* storage_V5, PyObject* storage_V7, PyObject* storage_V1) { | |
48 Py_XINCREF(storage_V3); | |
49 Py_XINCREF(storage_V5); | |
50 Py_XINCREF(storage_V7); | |
51 Py_XINCREF(storage_V1); | |
52 this->storage_V3 = storage_V3; | |
53 this->storage_V5 = storage_V5; | |
54 this->storage_V7 = storage_V7; | |
55 this->storage_V1 = storage_V1; | |
56 int __failure = 0; | |
57 | |
58 { | |
59 | |
60 { | |
61 | |
62 { | |
63 | |
64 { | |
65 | |
66 this->__ERROR = __ERROR; | |
67 return 0; | |
68 __label_7: | |
69 | |
70 double __DUMMY_7; | |
71 | |
72 } | |
73 __label_5: | |
74 | |
75 double __DUMMY_5; | |
76 | |
77 } | |
78 __label_3: | |
79 | |
80 double __DUMMY_3; | |
81 | |
82 } | |
83 __label_1: | |
84 | |
85 double __DUMMY_1; | |
86 | |
87 } | |
88 | |
89 Py_XDECREF(this->storage_V3); | |
90 Py_XDECREF(this->storage_V5); | |
91 Py_XDECREF(this->storage_V7); | |
92 Py_XDECREF(this->storage_V1); | |
93 | |
94 if (__failure) { | |
95 // When there is a failure, this code puts the exception | |
96 // in __ERROR. | |
97 PyObject* err_type = NULL; | |
98 PyObject* err_msg = NULL; | |
99 PyObject* err_traceback = NULL; | |
100 PyErr_Fetch(&err_type, &err_msg, &err_traceback); | |
101 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);} | |
102 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);} | |
103 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);} | |
104 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0); | |
105 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1); | |
106 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2); | |
107 PyList_SET_ITEM(__ERROR, 0, err_type); | |
108 PyList_SET_ITEM(__ERROR, 1, err_msg); | |
109 PyList_SET_ITEM(__ERROR, 2, err_traceback); | |
110 {Py_XDECREF(old_err_type);} | |
111 {Py_XDECREF(old_err_msg);} | |
112 {Py_XDECREF(old_err_traceback);} | |
113 } | |
114 // The failure code is returned to index what code block failed. | |
115 return __failure; | |
116 | |
117 } | |
118 void cleanup(void) { | |
119 __label_1: | |
120 | |
121 double __DUMMY_1; | |
122 __label_3: | |
123 | |
124 double __DUMMY_3; | |
125 __label_5: | |
126 | |
127 double __DUMMY_5; | |
128 __label_7: | |
129 | |
130 double __DUMMY_7; | |
131 | |
132 Py_XDECREF(this->storage_V3); | |
133 Py_XDECREF(this->storage_V5); | |
134 Py_XDECREF(this->storage_V7); | |
135 Py_XDECREF(this->storage_V1); | |
136 } | |
137 int run(void) { | |
138 int __failure = 0; | |
139 | |
140 PyObject* py_V1; | |
141 CudaNdarray * V1; | |
142 PyObject* py_V3; | |
143 | |
144 PyObject* V3; | |
145 | |
146 PyObject* py_V5; | |
147 | |
148 PyArrayObject* V5; | |
149 int type_num_V5; | |
150 typedef npy_int32 dtype_V5; | |
151 | |
152 PyObject* py_V7; | |
153 | |
154 PyObject* V7; | |
155 | |
156 { | |
157 | |
158 py_V1 = PyList_GET_ITEM(storage_V1, 0); | |
159 {Py_XINCREF(py_V1);} | |
160 | |
161 if (py_V1 == Py_None) | |
162 { | |
163 V1 = NULL; | |
164 } | |
165 else | |
166 { | |
167 | |
168 assert(py_V1->ob_refcnt >= 2); // There should be at least one ref from the container object, | |
169 // and one ref from the local scope. | |
170 | |
171 if (CudaNdarray_Check(py_V1)) | |
172 { | |
173 //fprintf(stderr, "c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
174 V1 = (CudaNdarray*)py_V1; | |
175 //std::cerr << "c_extract " << V1 << '\n'; | |
176 if (V1->nd != 2) | |
177 { | |
178 PyErr_Format(PyExc_RuntimeError, | |
179 "c_extract: Some CudaNdarray has rank %i, it was supposed to have rank 2", | |
180 V1->nd); | |
181 V1 = NULL; | |
182 { | |
183 __failure = 2; | |
184 if (!PyErr_Occurred()) { | |
185 PyErr_SetString(PyExc_RuntimeError, | |
186 "Unexpected error in an Op's C code. " | |
187 "No Python exception was set."); | |
188 } | |
189 goto __label_2;}; | |
190 } | |
191 //std::cerr << "c_extract " << V1 << " nd check passed\n"; | |
192 | |
193 | |
194 assert(V1); | |
195 Py_INCREF(py_V1); | |
196 } | |
197 else if (py_V1 == Py_None) | |
198 { | |
199 PyErr_SetString(PyExc_TypeError, | |
200 "expected a CudaNdarray, not None"); | |
201 V1 = NULL; | |
202 { | |
203 __failure = 2; | |
204 if (!PyErr_Occurred()) { | |
205 PyErr_SetString(PyExc_RuntimeError, | |
206 "Unexpected error in an Op's C code. " | |
207 "No Python exception was set."); | |
208 } | |
209 goto __label_2;}; | |
210 } | |
211 else | |
212 { | |
213 //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
214 PyErr_SetString(PyExc_TypeError, "Argument not a CudaNdarray"); | |
215 V1 = NULL; | |
216 { | |
217 __failure = 2; | |
218 if (!PyErr_Occurred()) { | |
219 PyErr_SetString(PyExc_RuntimeError, | |
220 "Unexpected error in an Op's C code. " | |
221 "No Python exception was set."); | |
222 } | |
223 goto __label_2;}; | |
224 } | |
225 //std::cerr << "c_extract done " << V1 << '\n'; | |
226 | |
227 | |
228 } | |
229 | |
230 { | |
231 | |
232 py_V3 = PyList_GET_ITEM(storage_V3, 0); | |
233 {Py_XINCREF(py_V3);} | |
234 | |
235 Py_INCREF(py_V3); | |
236 V3 = py_V3; | |
237 | |
238 { | |
239 | |
240 py_V5 = PyList_GET_ITEM(storage_V5, 0); | |
241 {Py_XINCREF(py_V5);} | |
242 | |
243 V5 = NULL; | |
244 if (py_V5 == Py_None) { | |
245 // We can either fail here or set V5 to NULL and rely on Ops | |
246 // using tensors to handle the NULL case, but if they fail to do so | |
247 // they'll end up with nasty segfaults, so this is public service. | |
248 PyErr_SetString(PyExc_ValueError, "expected an ndarray, not None"); | |
249 { | |
250 __failure = 6; | |
251 if (!PyErr_Occurred()) { | |
252 PyErr_SetString(PyExc_RuntimeError, | |
253 "Unexpected error in an Op's C code. " | |
254 "No Python exception was set."); | |
255 } | |
256 goto __label_6;} | |
257 } | |
258 if (!PyArray_Check(py_V5)) { | |
259 PyErr_SetString(PyExc_ValueError, "expected an ndarray"); | |
260 { | |
261 __failure = 6; | |
262 if (!PyErr_Occurred()) { | |
263 PyErr_SetString(PyExc_RuntimeError, | |
264 "Unexpected error in an Op's C code. " | |
265 "No Python exception was set."); | |
266 } | |
267 goto __label_6;} | |
268 } | |
269 // We expect NPY_INT32 | |
270 type_num_V5 = ((PyArrayObject*)py_V5)->descr->type_num; | |
271 if (!PyArray_ISALIGNED(py_V5)) { | |
272 PyErr_Format(PyExc_NotImplementedError, | |
273 "expected an aligned array of type %ld " | |
274 "(NPY_INT32), got non-aligned array of type %ld" | |
275 " with %ld dimensions, with 3 last dims " | |
276 "%ld, %ld, %ld" | |
277 " and 3 last strides %ld %ld, %ld.", | |
278 (long int) NPY_INT32, | |
279 (long int) type_num_V5, | |
280 (long int) PyArray_NDIM(py_V5), | |
281 (long int) PyArray_NDIM(py_V5) >= 3 ? | |
282 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-3] : -1, | |
283 (long int) PyArray_NDIM(py_V5) >= 2 ? | |
284 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-2] : -1, | |
285 (long int) PyArray_NDIM(py_V5) >= 1 ? | |
286 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-1] : -1, | |
287 (long int) PyArray_NDIM(py_V5) >= 3 ? | |
288 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-3] : -1, | |
289 (long int) PyArray_NDIM(py_V5) >= 2 ? | |
290 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-2] : -1, | |
291 (long int) PyArray_NDIM(py_V5) >= 1 ? | |
292 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-1] : -1 | |
293 ); | |
294 { | |
295 __failure = 6; | |
296 if (!PyErr_Occurred()) { | |
297 PyErr_SetString(PyExc_RuntimeError, | |
298 "Unexpected error in an Op's C code. " | |
299 "No Python exception was set."); | |
300 } | |
301 goto __label_6;} | |
302 } | |
303 // This is a TypeError to be consistent with DEBUG_MODE | |
304 // Note: DEBUG_MODE also tells the name of the container | |
305 if (type_num_V5 != NPY_INT32) { | |
306 PyErr_Format(PyExc_TypeError, | |
307 "expected type_num %d (NPY_INT32) got %d", | |
308 NPY_INT32, type_num_V5); | |
309 { | |
310 __failure = 6; | |
311 if (!PyErr_Occurred()) { | |
312 PyErr_SetString(PyExc_RuntimeError, | |
313 "Unexpected error in an Op's C code. " | |
314 "No Python exception was set."); | |
315 } | |
316 goto __label_6;} | |
317 } | |
318 V5 = (PyArrayObject*)(py_V5); | |
319 Py_XINCREF(V5); | |
320 | |
321 { | |
322 | |
323 py_V7 = Py_None; | |
324 {Py_XINCREF(py_V7);} | |
325 | |
326 V7 = NULL; | |
327 | |
328 { | |
329 | |
330 //////// <code generated by CURAND_Base> | |
331 int odims[2]; | |
332 int n_elements = 1; | |
333 int must_alloc_sample = ((NULL == V1) | |
334 || !CudaNdarray_Check(py_V1) | |
335 || (V1->nd != 2)); | |
336 | |
337 if (V5->nd != 1) | |
338 { | |
339 PyErr_SetString(PyExc_ValueError, "size must be vector"); | |
340 { | |
341 __failure = 9; | |
342 if (!PyErr_Occurred()) { | |
343 PyErr_SetString(PyExc_RuntimeError, | |
344 "Unexpected error in an Op's C code. " | |
345 "No Python exception was set."); | |
346 } | |
347 goto __label_9;} | |
348 } | |
349 if (V5->dimensions[0] != 2) | |
350 { | |
351 PyErr_Format(PyExc_ValueError, "size must have length %i (not %i)", | |
352 2, V5->dimensions[0]); | |
353 { | |
354 __failure = 9; | |
355 if (!PyErr_Occurred()) { | |
356 PyErr_SetString(PyExc_RuntimeError, | |
357 "Unexpected error in an Op's C code. " | |
358 "No Python exception was set."); | |
359 } | |
360 goto __label_9;} | |
361 } | |
362 if (PyArray_DESCR(V5)->type_num != NPY_INT32) | |
363 { | |
364 PyErr_SetString(PyExc_ValueError, "size must be int32"); | |
365 { | |
366 __failure = 9; | |
367 if (!PyErr_Occurred()) { | |
368 PyErr_SetString(PyExc_RuntimeError, | |
369 "Unexpected error in an Op's C code. " | |
370 "No Python exception was set."); | |
371 } | |
372 goto __label_9;} | |
373 } | |
374 for (int i = 0; i < 2; ++i) | |
375 { | |
376 odims[i] = ((npy_int32*)(V5->data + V5->strides[0] * i))[0]; | |
377 n_elements *= odims[i]; | |
378 must_alloc_sample = (must_alloc_sample | |
379 || CudaNdarray_HOST_DIMS(V1)[i] != odims[i]); | |
380 } | |
381 if (must_alloc_sample) | |
382 { | |
383 Py_XDECREF(V1); | |
384 V1 = (CudaNdarray*)CudaNdarray_NewDims(2, odims); | |
385 if(!V1) | |
386 { | |
387 { | |
388 __failure = 9; | |
389 if (!PyErr_Occurred()) { | |
390 PyErr_SetString(PyExc_RuntimeError, | |
391 "Unexpected error in an Op's C code. " | |
392 "No Python exception was set."); | |
393 } | |
394 goto __label_9;}; | |
395 } | |
396 } | |
397 if (!PyCObject_Check(V3)) | |
398 { | |
399 // allocate a new generator for o_generator | |
400 Py_XDECREF(V7); | |
401 curandGenerator_t * gen = (curandGenerator_t*)malloc(sizeof(curandGenerator_t)); | |
402 assert(gen); | |
403 if (CURAND_STATUS_SUCCESS != | |
404 curandCreateGenerator(gen, CURAND_RNG_PSEUDO_DEFAULT)) { | |
405 PyErr_Format(PyExc_RuntimeError, "Failed to initialize curand generator"); | |
406 { | |
407 __failure = 9; | |
408 if (!PyErr_Occurred()) { | |
409 PyErr_SetString(PyExc_RuntimeError, | |
410 "Unexpected error in an Op's C code. " | |
411 "No Python exception was set."); | |
412 } | |
413 goto __label_9;}; | |
414 } | |
415 if (CURAND_STATUS_SUCCESS != | |
416 curandSetPseudoRandomGeneratorSeed(*gen,234)) | |
417 { | |
418 PyErr_Format(PyExc_RuntimeError, "Failed to set curand generator seed"); | |
419 { | |
420 __failure = 9; | |
421 if (!PyErr_Occurred()) { | |
422 PyErr_SetString(PyExc_RuntimeError, | |
423 "Unexpected error in an Op's C code. " | |
424 "No Python exception was set."); | |
425 } | |
426 goto __label_9;}; | |
427 } | |
428 V7 = PyCObject_FromVoidPtr(gen, &free_generator); | |
429 assert (V3 == Py_False); | |
430 } | |
431 else if (1) | |
432 { | |
433 // use i_generator for o_generator | |
434 Py_XDECREF(V7); | |
435 Py_INCREF(V3); | |
436 V7 = V3; | |
437 } | |
438 else | |
439 { | |
440 // copy i_generator for o_generator | |
441 PyErr_Format(PyExc_NotImplementedError, "non-destructive CURAND generation"); | |
442 { | |
443 __failure = 9; | |
444 if (!PyErr_Occurred()) { | |
445 PyErr_SetString(PyExc_RuntimeError, | |
446 "Unexpected error in an Op's C code. " | |
447 "No Python exception was set."); | |
448 } | |
449 goto __label_9;}; | |
450 } | |
451 { | |
452 curandGenerator_t * gen = (curandGenerator_t*)PyCObject_AsVoidPtr(V7); | |
453 curandStatus_t err = curandGenerateNormal(*gen, | |
454 CudaNdarray_DEV_DATA(V1), | |
455 n_elements, | |
456 0.0, 1.0); | |
457 | |
458 | |
459 if (err != CURAND_STATUS_SUCCESS) | |
460 { | |
461 PyErr_Format(PyExc_RuntimeError, "curand error generating random normals %i", (int)err); | |
462 { | |
463 __failure = 9; | |
464 if (!PyErr_Occurred()) { | |
465 PyErr_SetString(PyExc_RuntimeError, | |
466 "Unexpected error in an Op's C code. " | |
467 "No Python exception was set."); | |
468 } | |
469 goto __label_9;}; | |
470 } | |
471 cudaThreadSynchronize(); | |
472 } | |
473 //////// </ code generated by CURAND_Base> | |
474 __label_9: | |
475 | |
476 double __DUMMY_9; | |
477 | |
478 } | |
479 __label_8: | |
480 | |
481 if (!__failure) { | |
482 | |
483 assert(py_V7->ob_refcnt > 1); | |
484 Py_DECREF(py_V7); | |
485 py_V7 = V7 ? V7 : Py_None; | |
486 Py_INCREF(py_V7); | |
487 | |
488 PyObject* old = PyList_GET_ITEM(storage_V7, 0); | |
489 {Py_XINCREF(py_V7);} | |
490 PyList_SET_ITEM(storage_V7, 0, py_V7); | |
491 {Py_XDECREF(old);} | |
492 } | |
493 | |
494 Py_XDECREF(V7); | |
495 | |
496 {Py_XDECREF(py_V7);} | |
497 | |
498 double __DUMMY_8; | |
499 | |
500 } | |
501 __label_6: | |
502 | |
503 if (V5) { | |
504 Py_XDECREF(V5); | |
505 } | |
506 | |
507 {Py_XDECREF(py_V5);} | |
508 | |
509 double __DUMMY_6; | |
510 | |
511 } | |
512 __label_4: | |
513 | |
514 Py_XDECREF(V3); | |
515 | |
516 {Py_XDECREF(py_V3);} | |
517 | |
518 double __DUMMY_4; | |
519 | |
520 } | |
521 __label_2: | |
522 | |
523 if (!__failure) { | |
524 | |
525 //std::cerr << "sync\n"; | |
526 if (NULL == V1) { | |
527 // failure: sync None to storage | |
528 Py_XDECREF(py_V1); | |
529 py_V1 = Py_None; | |
530 Py_INCREF(py_V1); | |
531 } | |
532 else | |
533 { | |
534 if (py_V1 != (PyObject*)V1) | |
535 { | |
536 Py_XDECREF(py_V1); | |
537 py_V1 = (PyObject*)V1; | |
538 Py_INCREF(py_V1); | |
539 } | |
540 assert(py_V1->ob_refcnt); | |
541 } | |
542 | |
543 PyObject* old = PyList_GET_ITEM(storage_V1, 0); | |
544 {Py_XINCREF(py_V1);} | |
545 PyList_SET_ITEM(storage_V1, 0, py_V1); | |
546 {Py_XDECREF(old);} | |
547 } | |
548 | |
549 //std::cerr << "cleanup " << py_V1 << " " << V1 << "\n"; | |
550 //fprintf(stderr, "c_cleanup CNDA py_object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
551 if (V1) | |
552 { | |
553 //fprintf(stderr, "c_cleanup CNDA cn_object w refcnt %p %i\n", V1, (V1->ob_refcnt)); | |
554 Py_XDECREF(V1); | |
555 } | |
556 //std::cerr << "cleanup done" << py_V1 << "\n"; | |
557 | |
558 {Py_XDECREF(py_V1);} | |
559 | |
560 double __DUMMY_2; | |
561 | |
562 } | |
563 | |
564 | |
565 if (__failure) { | |
566 // When there is a failure, this code puts the exception | |
567 // in __ERROR. | |
568 PyObject* err_type = NULL; | |
569 PyObject* err_msg = NULL; | |
570 PyObject* err_traceback = NULL; | |
571 PyErr_Fetch(&err_type, &err_msg, &err_traceback); | |
572 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);} | |
573 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);} | |
574 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);} | |
575 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0); | |
576 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1); | |
577 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2); | |
578 PyList_SET_ITEM(__ERROR, 0, err_type); | |
579 PyList_SET_ITEM(__ERROR, 1, err_msg); | |
580 PyList_SET_ITEM(__ERROR, 2, err_traceback); | |
581 {Py_XDECREF(old_err_type);} | |
582 {Py_XDECREF(old_err_msg);} | |
583 {Py_XDECREF(old_err_traceback);} | |
584 } | |
585 // The failure code is returned to index what code block failed. | |
586 return __failure; | |
587 | |
588 } | |
589 }; | |
590 | |
591 | |
592 int __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6_executor(__struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6* self) { | |
593 return self->run(); | |
594 } | |
595 | |
596 void __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6_destructor(void* executor, void* self) { | |
597 delete ((__struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6*)self); | |
598 } | |
599 | |
600 ////////////////////// | |
601 //// Functions | |
602 ////////////////////// | |
603 static PyObject * instantiate(PyObject * self, PyObject *argtuple) { | |
604 assert(PyTuple_Check(argtuple)); | |
605 if (5 != PyTuple_Size(argtuple)){ | |
606 PyErr_Format(PyExc_TypeError, "Wrong number of arguments, expected 5, got %i", (int)PyTuple_Size(argtuple)); | |
607 return NULL; | |
608 } | |
609 __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6* struct_ptr = new __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6(); | |
610 struct_ptr->init( PyTuple_GET_ITEM(argtuple, 0),PyTuple_GET_ITEM(argtuple, 1),PyTuple_GET_ITEM(argtuple, 2),PyTuple_GET_ITEM(argtuple, 3),PyTuple_GET_ITEM(argtuple, 4) ); | |
611 PyObject* thunk = PyCObject_FromVoidPtrAndDesc((void*)(&__struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6_executor), struct_ptr, __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6_destructor); | |
612 return thunk; } | |
613 | |
614 ////////////////////// | |
615 //// Module init | |
616 ////////////////////// | |
617 static PyMethodDef MyMethods[] = { | |
618 {"instantiate", instantiate, METH_VARARGS, "undocumented"} , | |
619 {NULL, NULL, 0, NULL} | |
620 }; | |
621 PyMODINIT_FUNC init0a2742cf42fdbba4c958f02e9b7af2f6(void){ | |
622 import_array(); | |
623 (void) Py_InitModule("0a2742cf42fdbba4c958f02e9b7af2f6", MyMethods); | |
624 } | |
625 | |
=============================== | |
In file included from /usr/include/python2.7/Python.h:8:0, | |
from mod.cu:1: | |
/usr/include/python2.7/pyconfig.h:1161:0: warning: "_POSIX_C_SOURCE" redefined [enabled by default] | |
/usr/include/features.h:164:0: note: this is the location of the previous definition | |
/usr/include/python2.7/pyconfig.h:1183:0: warning: "_XOPEN_SOURCE" redefined [enabled by default] | |
/usr/include/features.h:166:0: note: this is the location of the previous definition | |
mod.cu:5:20: fatal error: /usr/local/cuda-5.5/include/curand.h: Permission denied | |
compilation terminated. | |
['nvcc', '-shared', '-g', '-O3', '-arch=sm_30', '-m64', '-Xcompiler', '-Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=cdfd37325f98c49dfd27419bb10b2bac,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC', '-Xlinker', '-rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-I/usr/local/cuda-5.5/include', '-I/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-I/usr/lib/python2.7/dist-packages/numpy/core/include', '-I/usr/include/python2.7', '-I/home/ludwig/Theano/theano/sandbox/cuda', '-o', '/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/tmpKnseFy/0a2742cf42fdbba4c958f02e9b7af2f6.so', 'mod.cu', '-L/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib64', '-L/usr/lib', '-lpython2.7', '-lcudart', '-lcublas', '-lcurand', '-lcuda_ndarray'] | |
E1 #include <Python.h> | |
2 #include <iostream> | |
3 #include <numpy/arrayobject.h> | |
4 #include <math.h> | |
5 #include "curand.h" | |
6 #include <numpy/arrayscalars.h> | |
7 #include "cuda_ndarray.cuh" | |
8 ////////////////////// | |
9 //// Support Code | |
10 ////////////////////// | |
11 | |
12 | |
13 #if PY_MAJOR_VERSION >= 3 | |
14 void free_generator(PyObject *_gen) | |
15 { | |
16 curandGenerator_t * gen = (curandGenerator_t*)NpyCapsule_AsVoidPtr(_gen); | |
17 #else | |
18 void free_generator(void *_gen) | |
19 { | |
20 curandGenerator_t * gen = (curandGenerator_t*)_gen; | |
21 #endif | |
22 | |
23 curandStatus_t err = curandDestroyGenerator(*gen); | |
24 if (err != CURAND_STATUS_SUCCESS) | |
25 { | |
26 fprintf(stderr, "Failure (%i) in destroying CURAND generator.\n", | |
27 (int)err); | |
28 } | |
29 free(gen); | |
30 } | |
31 | |
32 | |
33 struct __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6 { | |
34 PyObject* __ERROR; | |
35 | |
36 PyObject* storage_V3; | |
37 PyObject* storage_V5; | |
38 PyObject* storage_V7; | |
39 PyObject* storage_V1; | |
40 | |
41 | |
42 __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6() {} | |
43 ~__struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6(void) { | |
44 cleanup(); | |
45 } | |
46 | |
47 int init(PyObject* __ERROR, PyObject* storage_V3, PyObject* storage_V5, PyObject* storage_V7, PyObject* storage_V1) { | |
48 Py_XINCREF(storage_V3); | |
49 Py_XINCREF(storage_V5); | |
50 Py_XINCREF(storage_V7); | |
51 Py_XINCREF(storage_V1); | |
52 this->storage_V3 = storage_V3; | |
53 this->storage_V5 = storage_V5; | |
54 this->storage_V7 = storage_V7; | |
55 this->storage_V1 = storage_V1; | |
56 int __failure = 0; | |
57 | |
58 { | |
59 | |
60 { | |
61 | |
62 { | |
63 | |
64 { | |
65 | |
66 this->__ERROR = __ERROR; | |
67 return 0; | |
68 __label_7: | |
69 | |
70 double __DUMMY_7; | |
71 | |
72 } | |
73 __label_5: | |
74 | |
75 double __DUMMY_5; | |
76 | |
77 } | |
78 __label_3: | |
79 | |
80 double __DUMMY_3; | |
81 | |
82 } | |
83 __label_1: | |
84 | |
85 double __DUMMY_1; | |
86 | |
87 } | |
88 | |
89 Py_XDECREF(this->storage_V3); | |
90 Py_XDECREF(this->storage_V5); | |
91 Py_XDECREF(this->storage_V7); | |
92 Py_XDECREF(this->storage_V1); | |
93 | |
94 if (__failure) { | |
95 // When there is a failure, this code puts the exception | |
96 // in __ERROR. | |
97 PyObject* err_type = NULL; | |
98 PyObject* err_msg = NULL; | |
99 PyObject* err_traceback = NULL; | |
100 PyErr_Fetch(&err_type, &err_msg, &err_traceback); | |
101 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);} | |
102 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);} | |
103 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);} | |
104 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0); | |
105 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1); | |
106 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2); | |
107 PyList_SET_ITEM(__ERROR, 0, err_type); | |
108 PyList_SET_ITEM(__ERROR, 1, err_msg); | |
109 PyList_SET_ITEM(__ERROR, 2, err_traceback); | |
110 {Py_XDECREF(old_err_type);} | |
111 {Py_XDECREF(old_err_msg);} | |
112 {Py_XDECREF(old_err_traceback);} | |
113 } | |
114 // The failure code is returned to index what code block failed. | |
115 return __failure; | |
116 | |
117 } | |
118 void cleanup(void) { | |
119 __label_1: | |
120 | |
121 double __DUMMY_1; | |
122 __label_3: | |
123 | |
124 double __DUMMY_3; | |
125 __label_5: | |
126 | |
127 double __DUMMY_5; | |
128 __label_7: | |
129 | |
130 double __DUMMY_7; | |
131 | |
132 Py_XDECREF(this->storage_V3); | |
133 Py_XDECREF(this->storage_V5); | |
134 Py_XDECREF(this->storage_V7); | |
135 Py_XDECREF(this->storage_V1); | |
136 } | |
137 int run(void) { | |
138 int __failure = 0; | |
139 | |
140 PyObject* py_V1; | |
141 CudaNdarray * V1; | |
142 PyObject* py_V3; | |
143 | |
144 PyObject* V3; | |
145 | |
146 PyObject* py_V5; | |
147 | |
148 PyArrayObject* V5; | |
149 int type_num_V5; | |
150 typedef npy_int32 dtype_V5; | |
151 | |
152 PyObject* py_V7; | |
153 | |
154 PyObject* V7; | |
155 | |
156 { | |
157 | |
158 py_V1 = PyList_GET_ITEM(storage_V1, 0); | |
159 {Py_XINCREF(py_V1);} | |
160 | |
161 if (py_V1 == Py_None) | |
162 { | |
163 V1 = NULL; | |
164 } | |
165 else | |
166 { | |
167 | |
168 assert(py_V1->ob_refcnt >= 2); // There should be at least one ref from the container object, | |
169 // and one ref from the local scope. | |
170 | |
171 if (CudaNdarray_Check(py_V1)) | |
172 { | |
173 //fprintf(stderr, "c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
174 V1 = (CudaNdarray*)py_V1; | |
175 //std::cerr << "c_extract " << V1 << '\n'; | |
176 if (V1->nd != 2) | |
177 { | |
178 PyErr_Format(PyExc_RuntimeError, | |
179 "c_extract: Some CudaNdarray has rank %i, it was supposed to have rank 2", | |
180 V1->nd); | |
181 V1 = NULL; | |
182 { | |
183 __failure = 2; | |
184 if (!PyErr_Occurred()) { | |
185 PyErr_SetString(PyExc_RuntimeError, | |
186 "Unexpected error in an Op's C code. " | |
187 "No Python exception was set."); | |
188 } | |
189 goto __label_2;}; | |
190 } | |
191 //std::cerr << "c_extract " << V1 << " nd check passed\n"; | |
192 | |
193 | |
194 assert(V1); | |
195 Py_INCREF(py_V1); | |
196 } | |
197 else if (py_V1 == Py_None) | |
198 { | |
199 PyErr_SetString(PyExc_TypeError, | |
200 "expected a CudaNdarray, not None"); | |
201 V1 = NULL; | |
202 { | |
203 __failure = 2; | |
204 if (!PyErr_Occurred()) { | |
205 PyErr_SetString(PyExc_RuntimeError, | |
206 "Unexpected error in an Op's C code. " | |
207 "No Python exception was set."); | |
208 } | |
209 goto __label_2;}; | |
210 } | |
211 else | |
212 { | |
213 //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
214 PyErr_SetString(PyExc_TypeError, "Argument not a CudaNdarray"); | |
215 V1 = NULL; | |
216 { | |
217 __failure = 2; | |
218 if (!PyErr_Occurred()) { | |
219 PyErr_SetString(PyExc_RuntimeError, | |
220 "Unexpected error in an Op's C code. " | |
221 "No Python exception was set."); | |
222 } | |
223 goto __label_2;}; | |
224 } | |
225 //std::cerr << "c_extract done " << V1 << '\n'; | |
226 | |
227 | |
228 } | |
229 | |
230 { | |
231 | |
232 py_V3 = PyList_GET_ITEM(storage_V3, 0); | |
233 {Py_XINCREF(py_V3);} | |
234 | |
235 Py_INCREF(py_V3); | |
236 V3 = py_V3; | |
237 | |
238 { | |
239 | |
240 py_V5 = PyList_GET_ITEM(storage_V5, 0); | |
241 {Py_XINCREF(py_V5);} | |
242 | |
243 V5 = NULL; | |
244 if (py_V5 == Py_None) { | |
245 // We can either fail here or set V5 to NULL and rely on Ops | |
246 // using tensors to handle the NULL case, but if they fail to do so | |
247 // they'll end up with nasty segfaults, so this is public service. | |
248 PyErr_SetString(PyExc_ValueError, "expected an ndarray, not None"); | |
249 { | |
250 __failure = 6; | |
251 if (!PyErr_Occurred()) { | |
252 PyErr_SetString(PyExc_RuntimeError, | |
253 "Unexpected error in an Op's C code. " | |
254 "No Python exception was set."); | |
255 } | |
256 goto __label_6;} | |
257 } | |
258 if (!PyArray_Check(py_V5)) { | |
259 PyErr_SetString(PyExc_ValueError, "expected an ndarray"); | |
260 { | |
261 __failure = 6; | |
262 if (!PyErr_Occurred()) { | |
263 PyErr_SetString(PyExc_RuntimeError, | |
264 "Unexpected error in an Op's C code. " | |
265 "No Python exception was set."); | |
266 } | |
267 goto __label_6;} | |
268 } | |
269 // We expect NPY_INT32 | |
270 type_num_V5 = ((PyArrayObject*)py_V5)->descr->type_num; | |
271 if (!PyArray_ISALIGNED(py_V5)) { | |
272 PyErr_Format(PyExc_NotImplementedError, | |
273 "expected an aligned array of type %ld " | |
274 "(NPY_INT32), got non-aligned array of type %ld" | |
275 " with %ld dimensions, with 3 last dims " | |
276 "%ld, %ld, %ld" | |
277 " and 3 last strides %ld %ld, %ld.", | |
278 (long int) NPY_INT32, | |
279 (long int) type_num_V5, | |
280 (long int) PyArray_NDIM(py_V5), | |
281 (long int) PyArray_NDIM(py_V5) >= 3 ? | |
282 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-3] : -1, | |
283 (long int) PyArray_NDIM(py_V5) >= 2 ? | |
284 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-2] : -1, | |
285 (long int) PyArray_NDIM(py_V5) >= 1 ? | |
286 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-1] : -1, | |
287 (long int) PyArray_NDIM(py_V5) >= 3 ? | |
288 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-3] : -1, | |
289 (long int) PyArray_NDIM(py_V5) >= 2 ? | |
290 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-2] : -1, | |
291 (long int) PyArray_NDIM(py_V5) >= 1 ? | |
292 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-1] : -1 | |
293 ); | |
294 { | |
295 __failure = 6; | |
296 if (!PyErr_Occurred()) { | |
297 PyErr_SetString(PyExc_RuntimeError, | |
298 "Unexpected error in an Op's C code. " | |
299 "No Python exception was set."); | |
300 } | |
301 goto __label_6;} | |
302 } | |
303 // This is a TypeError to be consistent with DEBUG_MODE | |
304 // Note: DEBUG_MODE also tells the name of the container | |
305 if (type_num_V5 != NPY_INT32) { | |
306 PyErr_Format(PyExc_TypeError, | |
307 "expected type_num %d (NPY_INT32) got %d", | |
308 NPY_INT32, type_num_V5); | |
309 { | |
310 __failure = 6; | |
311 if (!PyErr_Occurred()) { | |
312 PyErr_SetString(PyExc_RuntimeError, | |
313 "Unexpected error in an Op's C code. " | |
314 "No Python exception was set."); | |
315 } | |
316 goto __label_6;} | |
317 } | |
318 V5 = (PyArrayObject*)(py_V5); | |
319 Py_XINCREF(V5); | |
320 | |
321 { | |
322 | |
323 py_V7 = Py_None; | |
324 {Py_XINCREF(py_V7);} | |
325 | |
326 V7 = NULL; | |
327 | |
328 { | |
329 | |
330 //////// <code generated by CURAND_Base> | |
331 int odims[2]; | |
332 int n_elements = 1; | |
333 int must_alloc_sample = ((NULL == V1) | |
334 || !CudaNdarray_Check(py_V1) | |
335 || (V1->nd != 2)); | |
336 | |
337 if (V5->nd != 1) | |
338 { | |
339 PyErr_SetString(PyExc_ValueError, "size must be vector"); | |
340 { | |
341 __failure = 9; | |
342 if (!PyErr_Occurred()) { | |
343 PyErr_SetString(PyExc_RuntimeError, | |
344 "Unexpected error in an Op's C code. " | |
345 "No Python exception was set."); | |
346 } | |
347 goto __label_9;} | |
348 } | |
349 if (V5->dimensions[0] != 2) | |
350 { | |
351 PyErr_Format(PyExc_ValueError, "size must have length %i (not %i)", | |
352 2, V5->dimensions[0]); | |
353 { | |
354 __failure = 9; | |
355 if (!PyErr_Occurred()) { | |
356 PyErr_SetString(PyExc_RuntimeError, | |
357 "Unexpected error in an Op's C code. " | |
358 "No Python exception was set."); | |
359 } | |
360 goto __label_9;} | |
361 } | |
362 if (PyArray_DESCR(V5)->type_num != NPY_INT32) | |
363 { | |
364 PyErr_SetString(PyExc_ValueError, "size must be int32"); | |
365 { | |
366 __failure = 9; | |
367 if (!PyErr_Occurred()) { | |
368 PyErr_SetString(PyExc_RuntimeError, | |
369 "Unexpected error in an Op's C code. " | |
370 "No Python exception was set."); | |
371 } | |
372 goto __label_9;} | |
373 } | |
374 for (int i = 0; i < 2; ++i) | |
375 { | |
376 odims[i] = ((npy_int32*)(V5->data + V5->strides[0] * i))[0]; | |
377 n_elements *= odims[i]; | |
378 must_alloc_sample = (must_alloc_sample | |
379 || CudaNdarray_HOST_DIMS(V1)[i] != odims[i]); | |
380 } | |
381 if (must_alloc_sample) | |
382 { | |
383 Py_XDECREF(V1); | |
384 V1 = (CudaNdarray*)CudaNdarray_NewDims(2, odims); | |
385 if(!V1) | |
386 { | |
387 { | |
388 __failure = 9; | |
389 if (!PyErr_Occurred()) { | |
390 PyErr_SetString(PyExc_RuntimeError, | |
391 "Unexpected error in an Op's C code. " | |
392 "No Python exception was set."); | |
393 } | |
394 goto __label_9;}; | |
395 } | |
396 } | |
397 if (!PyCObject_Check(V3)) | |
398 { | |
399 // allocate a new generator for o_generator | |
400 Py_XDECREF(V7); | |
401 curandGenerator_t * gen = (curandGenerator_t*)malloc(sizeof(curandGenerator_t)); | |
402 assert(gen); | |
403 if (CURAND_STATUS_SUCCESS != | |
404 curandCreateGenerator(gen, CURAND_RNG_PSEUDO_DEFAULT)) { | |
405 PyErr_Format(PyExc_RuntimeError, "Failed to initialize curand generator"); | |
406 { | |
407 __failure = 9; | |
408 if (!PyErr_Occurred()) { | |
409 PyErr_SetString(PyExc_RuntimeError, | |
410 "Unexpected error in an Op's C code. " | |
411 "No Python exception was set."); | |
412 } | |
413 goto __label_9;}; | |
414 } | |
415 if (CURAND_STATUS_SUCCESS != | |
416 curandSetPseudoRandomGeneratorSeed(*gen,234)) | |
417 { | |
418 PyErr_Format(PyExc_RuntimeError, "Failed to set curand generator seed"); | |
419 { | |
420 __failure = 9; | |
421 if (!PyErr_Occurred()) { | |
422 PyErr_SetString(PyExc_RuntimeError, | |
423 "Unexpected error in an Op's C code. " | |
424 "No Python exception was set."); | |
425 } | |
426 goto __label_9;}; | |
427 } | |
428 V7 = PyCObject_FromVoidPtr(gen, &free_generator); | |
429 assert (V3 == Py_False); | |
430 } | |
431 else if (1) | |
432 { | |
433 // use i_generator for o_generator | |
434 Py_XDECREF(V7); | |
435 Py_INCREF(V3); | |
436 V7 = V3; | |
437 } | |
438 else | |
439 { | |
440 // copy i_generator for o_generator | |
441 PyErr_Format(PyExc_NotImplementedError, "non-destructive CURAND generation"); | |
442 { | |
443 __failure = 9; | |
444 if (!PyErr_Occurred()) { | |
445 PyErr_SetString(PyExc_RuntimeError, | |
446 "Unexpected error in an Op's C code. " | |
447 "No Python exception was set."); | |
448 } | |
449 goto __label_9;}; | |
450 } | |
451 { | |
452 curandGenerator_t * gen = (curandGenerator_t*)PyCObject_AsVoidPtr(V7); | |
453 curandStatus_t err = curandGenerateNormal(*gen, | |
454 CudaNdarray_DEV_DATA(V1), | |
455 n_elements, | |
456 0.0, 1.0); | |
457 | |
458 | |
459 if (err != CURAND_STATUS_SUCCESS) | |
460 { | |
461 PyErr_Format(PyExc_RuntimeError, "curand error generating random normals %i", (int)err); | |
462 { | |
463 __failure = 9; | |
464 if (!PyErr_Occurred()) { | |
465 PyErr_SetString(PyExc_RuntimeError, | |
466 "Unexpected error in an Op's C code. " | |
467 "No Python exception was set."); | |
468 } | |
469 goto __label_9;}; | |
470 } | |
471 cudaThreadSynchronize(); | |
472 } | |
473 //////// </ code generated by CURAND_Base> | |
474 __label_9: | |
475 | |
476 double __DUMMY_9; | |
477 | |
478 } | |
479 __label_8: | |
480 | |
481 if (!__failure) { | |
482 | |
483 assert(py_V7->ob_refcnt > 1); | |
484 Py_DECREF(py_V7); | |
485 py_V7 = V7 ? V7 : Py_None; | |
486 Py_INCREF(py_V7); | |
487 | |
488 PyObject* old = PyList_GET_ITEM(storage_V7, 0); | |
489 {Py_XINCREF(py_V7);} | |
490 PyList_SET_ITEM(storage_V7, 0, py_V7); | |
491 {Py_XDECREF(old);} | |
492 } | |
493 | |
494 Py_XDECREF(V7); | |
495 | |
496 {Py_XDECREF(py_V7);} | |
497 | |
498 double __DUMMY_8; | |
499 | |
500 } | |
501 __label_6: | |
502 | |
503 if (V5) { | |
504 Py_XDECREF(V5); | |
505 } | |
506 | |
507 {Py_XDECREF(py_V5);} | |
508 | |
509 double __DUMMY_6; | |
510 | |
511 } | |
512 __label_4: | |
513 | |
514 Py_XDECREF(V3); | |
515 | |
516 {Py_XDECREF(py_V3);} | |
517 | |
518 double __DUMMY_4; | |
519 | |
520 } | |
521 __label_2: | |
522 | |
523 if (!__failure) { | |
524 | |
525 //std::cerr << "sync\n"; | |
526 if (NULL == V1) { | |
527 // failure: sync None to storage | |
528 Py_XDECREF(py_V1); | |
529 py_V1 = Py_None; | |
530 Py_INCREF(py_V1); | |
531 } | |
532 else | |
533 { | |
534 if (py_V1 != (PyObject*)V1) | |
535 { | |
536 Py_XDECREF(py_V1); | |
537 py_V1 = (PyObject*)V1; | |
538 Py_INCREF(py_V1); | |
539 } | |
540 assert(py_V1->ob_refcnt); | |
541 } | |
542 | |
543 PyObject* old = PyList_GET_ITEM(storage_V1, 0); | |
544 {Py_XINCREF(py_V1);} | |
545 PyList_SET_ITEM(storage_V1, 0, py_V1); | |
546 {Py_XDECREF(old);} | |
547 } | |
548 | |
549 //std::cerr << "cleanup " << py_V1 << " " << V1 << "\n"; | |
550 //fprintf(stderr, "c_cleanup CNDA py_object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
551 if (V1) | |
552 { | |
553 //fprintf(stderr, "c_cleanup CNDA cn_object w refcnt %p %i\n", V1, (V1->ob_refcnt)); | |
554 Py_XDECREF(V1); | |
555 } | |
556 //std::cerr << "cleanup done" << py_V1 << "\n"; | |
557 | |
558 {Py_XDECREF(py_V1);} | |
559 | |
560 double __DUMMY_2; | |
561 | |
562 } | |
563 | |
564 | |
565 if (__failure) { | |
566 // When there is a failure, this code puts the exception | |
567 // in __ERROR. | |
568 PyObject* err_type = NULL; | |
569 PyObject* err_msg = NULL; | |
570 PyObject* err_traceback = NULL; | |
571 PyErr_Fetch(&err_type, &err_msg, &err_traceback); | |
572 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);} | |
573 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);} | |
574 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);} | |
575 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0); | |
576 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1); | |
577 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2); | |
578 PyList_SET_ITEM(__ERROR, 0, err_type); | |
579 PyList_SET_ITEM(__ERROR, 1, err_msg); | |
580 PyList_SET_ITEM(__ERROR, 2, err_traceback); | |
581 {Py_XDECREF(old_err_type);} | |
582 {Py_XDECREF(old_err_msg);} | |
583 {Py_XDECREF(old_err_traceback);} | |
584 } | |
585 // The failure code is returned to index what code block failed. | |
586 return __failure; | |
587 | |
588 } | |
589 }; | |
590 | |
591 | |
592 int __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6_executor(__struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6* self) { | |
593 return self->run(); | |
594 } | |
595 | |
596 void __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6_destructor(void* executor, void* self) { | |
597 delete ((__struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6*)self); | |
598 } | |
599 | |
600 ////////////////////// | |
601 //// Functions | |
602 ////////////////////// | |
603 static PyObject * instantiate(PyObject * self, PyObject *argtuple) { | |
604 assert(PyTuple_Check(argtuple)); | |
605 if (5 != PyTuple_Size(argtuple)){ | |
606 PyErr_Format(PyExc_TypeError, "Wrong number of arguments, expected 5, got %i", (int)PyTuple_Size(argtuple)); | |
607 return NULL; | |
608 } | |
609 __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6* struct_ptr = new __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6(); | |
610 struct_ptr->init( PyTuple_GET_ITEM(argtuple, 0),PyTuple_GET_ITEM(argtuple, 1),PyTuple_GET_ITEM(argtuple, 2),PyTuple_GET_ITEM(argtuple, 3),PyTuple_GET_ITEM(argtuple, 4) ); | |
611 PyObject* thunk = PyCObject_FromVoidPtrAndDesc((void*)(&__struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6_executor), struct_ptr, __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6_destructor); | |
612 return thunk; } | |
613 | |
614 ////////////////////// | |
615 //// Module init | |
616 ////////////////////// | |
617 static PyMethodDef MyMethods[] = { | |
618 {"instantiate", instantiate, METH_VARARGS, "undocumented"} , | |
619 {NULL, NULL, 0, NULL} | |
620 }; | |
621 PyMODINIT_FUNC init0a2742cf42fdbba4c958f02e9b7af2f6(void){ | |
622 import_array(); | |
623 (void) Py_InitModule("0a2742cf42fdbba4c958f02e9b7af2f6", MyMethods); | |
624 } | |
625 | |
=============================== | |
In file included from /usr/include/python2.7/Python.h:8:0, | |
from mod.cu:1: | |
/usr/include/python2.7/pyconfig.h:1161:0: warning: "_POSIX_C_SOURCE" redefined [enabled by default] | |
/usr/include/features.h:164:0: note: this is the location of the previous definition | |
/usr/include/python2.7/pyconfig.h:1183:0: warning: "_XOPEN_SOURCE" redefined [enabled by default] | |
/usr/include/features.h:166:0: note: this is the location of the previous definition | |
mod.cu:5:20: fatal error: /usr/local/cuda-5.5/include/curand.h: Permission denied | |
compilation terminated. | |
['nvcc', '-shared', '-g', '-O3', '-arch=sm_30', '-m64', '-Xcompiler', '-Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=cdfd37325f98c49dfd27419bb10b2bac,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC', '-Xlinker', '-rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-I/usr/local/cuda-5.5/include', '-I/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-I/usr/lib/python2.7/dist-packages/numpy/core/include', '-I/usr/include/python2.7', '-I/home/ludwig/Theano/theano/sandbox/cuda', '-o', '/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/tmp9_KVmB/0a2742cf42fdbba4c958f02e9b7af2f6.so', 'mod.cu', '-L/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib64', '-L/usr/lib', '-lpython2.7', '-lcudart', '-lcublas', '-lcurand', '-lcuda_ndarray'] | |
E1 #include <Python.h> | |
2 #include <iostream> | |
3 #include <numpy/arrayobject.h> | |
4 #include <math.h> | |
5 #include "curand.h" | |
6 #include <numpy/arrayscalars.h> | |
7 #include "cuda_ndarray.cuh" | |
8 ////////////////////// | |
9 //// Support Code | |
10 ////////////////////// | |
11 | |
12 | |
13 #if PY_MAJOR_VERSION >= 3 | |
14 void free_generator(PyObject *_gen) | |
15 { | |
16 curandGenerator_t * gen = (curandGenerator_t*)NpyCapsule_AsVoidPtr(_gen); | |
17 #else | |
18 void free_generator(void *_gen) | |
19 { | |
20 curandGenerator_t * gen = (curandGenerator_t*)_gen; | |
21 #endif | |
22 | |
23 curandStatus_t err = curandDestroyGenerator(*gen); | |
24 if (err != CURAND_STATUS_SUCCESS) | |
25 { | |
26 fprintf(stderr, "Failure (%i) in destroying CURAND generator.\n", | |
27 (int)err); | |
28 } | |
29 free(gen); | |
30 } | |
31 | |
32 | |
33 struct __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6 { | |
34 PyObject* __ERROR; | |
35 | |
36 PyObject* storage_V3; | |
37 PyObject* storage_V5; | |
38 PyObject* storage_V7; | |
39 PyObject* storage_V1; | |
40 | |
41 | |
42 __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6() {} | |
43 ~__struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6(void) { | |
44 cleanup(); | |
45 } | |
46 | |
47 int init(PyObject* __ERROR, PyObject* storage_V3, PyObject* storage_V5, PyObject* storage_V7, PyObject* storage_V1) { | |
48 Py_XINCREF(storage_V3); | |
49 Py_XINCREF(storage_V5); | |
50 Py_XINCREF(storage_V7); | |
51 Py_XINCREF(storage_V1); | |
52 this->storage_V3 = storage_V3; | |
53 this->storage_V5 = storage_V5; | |
54 this->storage_V7 = storage_V7; | |
55 this->storage_V1 = storage_V1; | |
56 int __failure = 0; | |
57 | |
58 { | |
59 | |
60 { | |
61 | |
62 { | |
63 | |
64 { | |
65 | |
66 this->__ERROR = __ERROR; | |
67 return 0; | |
68 __label_7: | |
69 | |
70 double __DUMMY_7; | |
71 | |
72 } | |
73 __label_5: | |
74 | |
75 double __DUMMY_5; | |
76 | |
77 } | |
78 __label_3: | |
79 | |
80 double __DUMMY_3; | |
81 | |
82 } | |
83 __label_1: | |
84 | |
85 double __DUMMY_1; | |
86 | |
87 } | |
88 | |
89 Py_XDECREF(this->storage_V3); | |
90 Py_XDECREF(this->storage_V5); | |
91 Py_XDECREF(this->storage_V7); | |
92 Py_XDECREF(this->storage_V1); | |
93 | |
94 if (__failure) { | |
95 // When there is a failure, this code puts the exception | |
96 // in __ERROR. | |
97 PyObject* err_type = NULL; | |
98 PyObject* err_msg = NULL; | |
99 PyObject* err_traceback = NULL; | |
100 PyErr_Fetch(&err_type, &err_msg, &err_traceback); | |
101 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);} | |
102 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);} | |
103 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);} | |
104 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0); | |
105 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1); | |
106 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2); | |
107 PyList_SET_ITEM(__ERROR, 0, err_type); | |
108 PyList_SET_ITEM(__ERROR, 1, err_msg); | |
109 PyList_SET_ITEM(__ERROR, 2, err_traceback); | |
110 {Py_XDECREF(old_err_type);} | |
111 {Py_XDECREF(old_err_msg);} | |
112 {Py_XDECREF(old_err_traceback);} | |
113 } | |
114 // The failure code is returned to index what code block failed. | |
115 return __failure; | |
116 | |
117 } | |
118 void cleanup(void) { | |
119 __label_1: | |
120 | |
121 double __DUMMY_1; | |
122 __label_3: | |
123 | |
124 double __DUMMY_3; | |
125 __label_5: | |
126 | |
127 double __DUMMY_5; | |
128 __label_7: | |
129 | |
130 double __DUMMY_7; | |
131 | |
132 Py_XDECREF(this->storage_V3); | |
133 Py_XDECREF(this->storage_V5); | |
134 Py_XDECREF(this->storage_V7); | |
135 Py_XDECREF(this->storage_V1); | |
136 } | |
137 int run(void) { | |
138 int __failure = 0; | |
139 | |
140 PyObject* py_V1; | |
141 CudaNdarray * V1; | |
142 PyObject* py_V3; | |
143 | |
144 PyObject* V3; | |
145 | |
146 PyObject* py_V5; | |
147 | |
148 PyArrayObject* V5; | |
149 int type_num_V5; | |
150 typedef npy_int32 dtype_V5; | |
151 | |
152 PyObject* py_V7; | |
153 | |
154 PyObject* V7; | |
155 | |
156 { | |
157 | |
158 py_V1 = PyList_GET_ITEM(storage_V1, 0); | |
159 {Py_XINCREF(py_V1);} | |
160 | |
161 if (py_V1 == Py_None) | |
162 { | |
163 V1 = NULL; | |
164 } | |
165 else | |
166 { | |
167 | |
168 assert(py_V1->ob_refcnt >= 2); // There should be at least one ref from the container object, | |
169 // and one ref from the local scope. | |
170 | |
171 if (CudaNdarray_Check(py_V1)) | |
172 { | |
173 //fprintf(stderr, "c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
174 V1 = (CudaNdarray*)py_V1; | |
175 //std::cerr << "c_extract " << V1 << '\n'; | |
176 if (V1->nd != 2) | |
177 { | |
178 PyErr_Format(PyExc_RuntimeError, | |
179 "c_extract: Some CudaNdarray has rank %i, it was supposed to have rank 2", | |
180 V1->nd); | |
181 V1 = NULL; | |
182 { | |
183 __failure = 2; | |
184 if (!PyErr_Occurred()) { | |
185 PyErr_SetString(PyExc_RuntimeError, | |
186 "Unexpected error in an Op's C code. " | |
187 "No Python exception was set."); | |
188 } | |
189 goto __label_2;}; | |
190 } | |
191 //std::cerr << "c_extract " << V1 << " nd check passed\n"; | |
192 | |
193 | |
194 assert(V1); | |
195 Py_INCREF(py_V1); | |
196 } | |
197 else if (py_V1 == Py_None) | |
198 { | |
199 PyErr_SetString(PyExc_TypeError, | |
200 "expected a CudaNdarray, not None"); | |
201 V1 = NULL; | |
202 { | |
203 __failure = 2; | |
204 if (!PyErr_Occurred()) { | |
205 PyErr_SetString(PyExc_RuntimeError, | |
206 "Unexpected error in an Op's C code. " | |
207 "No Python exception was set."); | |
208 } | |
209 goto __label_2;}; | |
210 } | |
211 else | |
212 { | |
213 //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
214 PyErr_SetString(PyExc_TypeError, "Argument not a CudaNdarray"); | |
215 V1 = NULL; | |
216 { | |
217 __failure = 2; | |
218 if (!PyErr_Occurred()) { | |
219 PyErr_SetString(PyExc_RuntimeError, | |
220 "Unexpected error in an Op's C code. " | |
221 "No Python exception was set."); | |
222 } | |
223 goto __label_2;}; | |
224 } | |
225 //std::cerr << "c_extract done " << V1 << '\n'; | |
226 | |
227 | |
228 } | |
229 | |
230 { | |
231 | |
232 py_V3 = PyList_GET_ITEM(storage_V3, 0); | |
233 {Py_XINCREF(py_V3);} | |
234 | |
235 Py_INCREF(py_V3); | |
236 V3 = py_V3; | |
237 | |
238 { | |
239 | |
240 py_V5 = PyList_GET_ITEM(storage_V5, 0); | |
241 {Py_XINCREF(py_V5);} | |
242 | |
243 V5 = NULL; | |
244 if (py_V5 == Py_None) { | |
245 // We can either fail here or set V5 to NULL and rely on Ops | |
246 // using tensors to handle the NULL case, but if they fail to do so | |
247 // they'll end up with nasty segfaults, so this is public service. | |
248 PyErr_SetString(PyExc_ValueError, "expected an ndarray, not None"); | |
249 { | |
250 __failure = 6; | |
251 if (!PyErr_Occurred()) { | |
252 PyErr_SetString(PyExc_RuntimeError, | |
253 "Unexpected error in an Op's C code. " | |
254 "No Python exception was set."); | |
255 } | |
256 goto __label_6;} | |
257 } | |
258 if (!PyArray_Check(py_V5)) { | |
259 PyErr_SetString(PyExc_ValueError, "expected an ndarray"); | |
260 { | |
261 __failure = 6; | |
262 if (!PyErr_Occurred()) { | |
263 PyErr_SetString(PyExc_RuntimeError, | |
264 "Unexpected error in an Op's C code. " | |
265 "No Python exception was set."); | |
266 } | |
267 goto __label_6;} | |
268 } | |
269 // We expect NPY_INT32 | |
270 type_num_V5 = ((PyArrayObject*)py_V5)->descr->type_num; | |
271 if (!PyArray_ISALIGNED(py_V5)) { | |
272 PyErr_Format(PyExc_NotImplementedError, | |
273 "expected an aligned array of type %ld " | |
274 "(NPY_INT32), got non-aligned array of type %ld" | |
275 " with %ld dimensions, with 3 last dims " | |
276 "%ld, %ld, %ld" | |
277 " and 3 last strides %ld %ld, %ld.", | |
278 (long int) NPY_INT32, | |
279 (long int) type_num_V5, | |
280 (long int) PyArray_NDIM(py_V5), | |
281 (long int) PyArray_NDIM(py_V5) >= 3 ? | |
282 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-3] : -1, | |
283 (long int) PyArray_NDIM(py_V5) >= 2 ? | |
284 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-2] : -1, | |
285 (long int) PyArray_NDIM(py_V5) >= 1 ? | |
286 PyArray_DIMS(py_V5)[PyArray_NDIM(py_V5)-1] : -1, | |
287 (long int) PyArray_NDIM(py_V5) >= 3 ? | |
288 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-3] : -1, | |
289 (long int) PyArray_NDIM(py_V5) >= 2 ? | |
290 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-2] : -1, | |
291 (long int) PyArray_NDIM(py_V5) >= 1 ? | |
292 PyArray_STRIDES(py_V5)[PyArray_NDIM(py_V5)-1] : -1 | |
293 ); | |
294 { | |
295 __failure = 6; | |
296 if (!PyErr_Occurred()) { | |
297 PyErr_SetString(PyExc_RuntimeError, | |
298 "Unexpected error in an Op's C code. " | |
299 "No Python exception was set."); | |
300 } | |
301 goto __label_6;} | |
302 } | |
303 // This is a TypeError to be consistent with DEBUG_MODE | |
304 // Note: DEBUG_MODE also tells the name of the container | |
305 if (type_num_V5 != NPY_INT32) { | |
306 PyErr_Format(PyExc_TypeError, | |
307 "expected type_num %d (NPY_INT32) got %d", | |
308 NPY_INT32, type_num_V5); | |
309 { | |
310 __failure = 6; | |
311 if (!PyErr_Occurred()) { | |
312 PyErr_SetString(PyExc_RuntimeError, | |
313 "Unexpected error in an Op's C code. " | |
314 "No Python exception was set."); | |
315 } | |
316 goto __label_6;} | |
317 } | |
318 V5 = (PyArrayObject*)(py_V5); | |
319 Py_XINCREF(V5); | |
320 | |
321 { | |
322 | |
323 py_V7 = Py_None; | |
324 {Py_XINCREF(py_V7);} | |
325 | |
326 V7 = NULL; | |
327 | |
328 { | |
329 | |
330 //////// <code generated by CURAND_Base> | |
331 int odims[2]; | |
332 int n_elements = 1; | |
333 int must_alloc_sample = ((NULL == V1) | |
334 || !CudaNdarray_Check(py_V1) | |
335 || (V1->nd != 2)); | |
336 | |
337 if (V5->nd != 1) | |
338 { | |
339 PyErr_SetString(PyExc_ValueError, "size must be vector"); | |
340 { | |
341 __failure = 9; | |
342 if (!PyErr_Occurred()) { | |
343 PyErr_SetString(PyExc_RuntimeError, | |
344 "Unexpected error in an Op's C code. " | |
345 "No Python exception was set."); | |
346 } | |
347 goto __label_9;} | |
348 } | |
349 if (V5->dimensions[0] != 2) | |
350 { | |
351 PyErr_Format(PyExc_ValueError, "size must have length %i (not %i)", | |
352 2, V5->dimensions[0]); | |
353 { | |
354 __failure = 9; | |
355 if (!PyErr_Occurred()) { | |
356 PyErr_SetString(PyExc_RuntimeError, | |
357 "Unexpected error in an Op's C code. " | |
358 "No Python exception was set."); | |
359 } | |
360 goto __label_9;} | |
361 } | |
362 if (PyArray_DESCR(V5)->type_num != NPY_INT32) | |
363 { | |
364 PyErr_SetString(PyExc_ValueError, "size must be int32"); | |
365 { | |
366 __failure = 9; | |
367 if (!PyErr_Occurred()) { | |
368 PyErr_SetString(PyExc_RuntimeError, | |
369 "Unexpected error in an Op's C code. " | |
370 "No Python exception was set."); | |
371 } | |
372 goto __label_9;} | |
373 } | |
374 for (int i = 0; i < 2; ++i) | |
375 { | |
376 odims[i] = ((npy_int32*)(V5->data + V5->strides[0] * i))[0]; | |
377 n_elements *= odims[i]; | |
378 must_alloc_sample = (must_alloc_sample | |
379 || CudaNdarray_HOST_DIMS(V1)[i] != odims[i]); | |
380 } | |
381 if (must_alloc_sample) | |
382 { | |
383 Py_XDECREF(V1); | |
384 V1 = (CudaNdarray*)CudaNdarray_NewDims(2, odims); | |
385 if(!V1) | |
386 { | |
387 { | |
388 __failure = 9; | |
389 if (!PyErr_Occurred()) { | |
390 PyErr_SetString(PyExc_RuntimeError, | |
391 "Unexpected error in an Op's C code. " | |
392 "No Python exception was set."); | |
393 } | |
394 goto __label_9;}; | |
395 } | |
396 } | |
397 if (!PyCObject_Check(V3)) | |
398 { | |
399 // allocate a new generator for o_generator | |
400 Py_XDECREF(V7); | |
401 curandGenerator_t * gen = (curandGenerator_t*)malloc(sizeof(curandGenerator_t)); | |
402 assert(gen); | |
403 if (CURAND_STATUS_SUCCESS != | |
404 curandCreateGenerator(gen, CURAND_RNG_PSEUDO_DEFAULT)) { | |
405 PyErr_Format(PyExc_RuntimeError, "Failed to initialize curand generator"); | |
406 { | |
407 __failure = 9; | |
408 if (!PyErr_Occurred()) { | |
409 PyErr_SetString(PyExc_RuntimeError, | |
410 "Unexpected error in an Op's C code. " | |
411 "No Python exception was set."); | |
412 } | |
413 goto __label_9;}; | |
414 } | |
415 if (CURAND_STATUS_SUCCESS != | |
416 curandSetPseudoRandomGeneratorSeed(*gen,234)) | |
417 { | |
418 PyErr_Format(PyExc_RuntimeError, "Failed to set curand generator seed"); | |
419 { | |
420 __failure = 9; | |
421 if (!PyErr_Occurred()) { | |
422 PyErr_SetString(PyExc_RuntimeError, | |
423 "Unexpected error in an Op's C code. " | |
424 "No Python exception was set."); | |
425 } | |
426 goto __label_9;}; | |
427 } | |
428 V7 = PyCObject_FromVoidPtr(gen, &free_generator); | |
429 assert (V3 == Py_False); | |
430 } | |
431 else if (1) | |
432 { | |
433 // use i_generator for o_generator | |
434 Py_XDECREF(V7); | |
435 Py_INCREF(V3); | |
436 V7 = V3; | |
437 } | |
438 else | |
439 { | |
440 // copy i_generator for o_generator | |
441 PyErr_Format(PyExc_NotImplementedError, "non-destructive CURAND generation"); | |
442 { | |
443 __failure = 9; | |
444 if (!PyErr_Occurred()) { | |
445 PyErr_SetString(PyExc_RuntimeError, | |
446 "Unexpected error in an Op's C code. " | |
447 "No Python exception was set."); | |
448 } | |
449 goto __label_9;}; | |
450 } | |
451 { | |
452 curandGenerator_t * gen = (curandGenerator_t*)PyCObject_AsVoidPtr(V7); | |
453 curandStatus_t err = curandGenerateNormal(*gen, | |
454 CudaNdarray_DEV_DATA(V1), | |
455 n_elements, | |
456 0.0, 1.0); | |
457 | |
458 | |
459 if (err != CURAND_STATUS_SUCCESS) | |
460 { | |
461 PyErr_Format(PyExc_RuntimeError, "curand error generating random normals %i", (int)err); | |
462 { | |
463 __failure = 9; | |
464 if (!PyErr_Occurred()) { | |
465 PyErr_SetString(PyExc_RuntimeError, | |
466 "Unexpected error in an Op's C code. " | |
467 "No Python exception was set."); | |
468 } | |
469 goto __label_9;}; | |
470 } | |
471 cudaThreadSynchronize(); | |
472 } | |
473 //////// </ code generated by CURAND_Base> | |
474 __label_9: | |
475 | |
476 double __DUMMY_9; | |
477 | |
478 } | |
479 __label_8: | |
480 | |
481 if (!__failure) { | |
482 | |
483 assert(py_V7->ob_refcnt > 1); | |
484 Py_DECREF(py_V7); | |
485 py_V7 = V7 ? V7 : Py_None; | |
486 Py_INCREF(py_V7); | |
487 | |
488 PyObject* old = PyList_GET_ITEM(storage_V7, 0); | |
489 {Py_XINCREF(py_V7);} | |
490 PyList_SET_ITEM(storage_V7, 0, py_V7); | |
491 {Py_XDECREF(old);} | |
492 } | |
493 | |
494 Py_XDECREF(V7); | |
495 | |
496 {Py_XDECREF(py_V7);} | |
497 | |
498 double __DUMMY_8; | |
499 | |
500 } | |
501 __label_6: | |
502 | |
503 if (V5) { | |
504 Py_XDECREF(V5); | |
505 } | |
506 | |
507 {Py_XDECREF(py_V5);} | |
508 | |
509 double __DUMMY_6; | |
510 | |
511 } | |
512 __label_4: | |
513 | |
514 Py_XDECREF(V3); | |
515 | |
516 {Py_XDECREF(py_V3);} | |
517 | |
518 double __DUMMY_4; | |
519 | |
520 } | |
521 __label_2: | |
522 | |
523 if (!__failure) { | |
524 | |
525 //std::cerr << "sync\n"; | |
526 if (NULL == V1) { | |
527 // failure: sync None to storage | |
528 Py_XDECREF(py_V1); | |
529 py_V1 = Py_None; | |
530 Py_INCREF(py_V1); | |
531 } | |
532 else | |
533 { | |
534 if (py_V1 != (PyObject*)V1) | |
535 { | |
536 Py_XDECREF(py_V1); | |
537 py_V1 = (PyObject*)V1; | |
538 Py_INCREF(py_V1); | |
539 } | |
540 assert(py_V1->ob_refcnt); | |
541 } | |
542 | |
543 PyObject* old = PyList_GET_ITEM(storage_V1, 0); | |
544 {Py_XINCREF(py_V1);} | |
545 PyList_SET_ITEM(storage_V1, 0, py_V1); | |
546 {Py_XDECREF(old);} | |
547 } | |
548 | |
549 //std::cerr << "cleanup " << py_V1 << " " << V1 << "\n"; | |
550 //fprintf(stderr, "c_cleanup CNDA py_object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
551 if (V1) | |
552 { | |
553 //fprintf(stderr, "c_cleanup CNDA cn_object w refcnt %p %i\n", V1, (V1->ob_refcnt)); | |
554 Py_XDECREF(V1); | |
555 } | |
556 //std::cerr << "cleanup done" << py_V1 << "\n"; | |
557 | |
558 {Py_XDECREF(py_V1);} | |
559 | |
560 double __DUMMY_2; | |
561 | |
562 } | |
563 | |
564 | |
565 if (__failure) { | |
566 // When there is a failure, this code puts the exception | |
567 // in __ERROR. | |
568 PyObject* err_type = NULL; | |
569 PyObject* err_msg = NULL; | |
570 PyObject* err_traceback = NULL; | |
571 PyErr_Fetch(&err_type, &err_msg, &err_traceback); | |
572 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);} | |
573 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);} | |
574 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);} | |
575 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0); | |
576 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1); | |
577 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2); | |
578 PyList_SET_ITEM(__ERROR, 0, err_type); | |
579 PyList_SET_ITEM(__ERROR, 1, err_msg); | |
580 PyList_SET_ITEM(__ERROR, 2, err_traceback); | |
581 {Py_XDECREF(old_err_type);} | |
582 {Py_XDECREF(old_err_msg);} | |
583 {Py_XDECREF(old_err_traceback);} | |
584 } | |
585 // The failure code is returned to index what code block failed. | |
586 return __failure; | |
587 | |
588 } | |
589 }; | |
590 | |
591 | |
592 int __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6_executor(__struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6* self) { | |
593 return self->run(); | |
594 } | |
595 | |
596 void __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6_destructor(void* executor, void* self) { | |
597 delete ((__struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6*)self); | |
598 } | |
599 | |
600 ////////////////////// | |
601 //// Functions | |
602 ////////////////////// | |
603 static PyObject * instantiate(PyObject * self, PyObject *argtuple) { | |
604 assert(PyTuple_Check(argtuple)); | |
605 if (5 != PyTuple_Size(argtuple)){ | |
606 PyErr_Format(PyExc_TypeError, "Wrong number of arguments, expected 5, got %i", (int)PyTuple_Size(argtuple)); | |
607 return NULL; | |
608 } | |
609 __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6* struct_ptr = new __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6(); | |
610 struct_ptr->init( PyTuple_GET_ITEM(argtuple, 0),PyTuple_GET_ITEM(argtuple, 1),PyTuple_GET_ITEM(argtuple, 2),PyTuple_GET_ITEM(argtuple, 3),PyTuple_GET_ITEM(argtuple, 4) ); | |
611 PyObject* thunk = PyCObject_FromVoidPtrAndDesc((void*)(&__struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6_executor), struct_ptr, __struct_compiled_op_0a2742cf42fdbba4c958f02e9b7af2f6_destructor); | |
612 return thunk; } | |
613 | |
614 ////////////////////// | |
615 //// Module init | |
616 ////////////////////// | |
617 static PyMethodDef MyMethods[] = { | |
618 {"instantiate", instantiate, METH_VARARGS, "undocumented"} , | |
619 {NULL, NULL, 0, NULL} | |
620 }; | |
621 PyMODINIT_FUNC init0a2742cf42fdbba4c958f02e9b7af2f6(void){ | |
622 import_array(); | |
623 (void) Py_InitModule("0a2742cf42fdbba4c958f02e9b7af2f6", MyMethods); | |
624 } | |
625 | |
=============================== | |
In file included from /usr/include/python2.7/Python.h:8:0, | |
from mod.cu:1: | |
/usr/include/python2.7/pyconfig.h:1161:0: warning: "_POSIX_C_SOURCE" redefined [enabled by default] | |
/usr/include/features.h:164:0: note: this is the location of the previous definition | |
/usr/include/python2.7/pyconfig.h:1183:0: warning: "_XOPEN_SOURCE" redefined [enabled by default] | |
/usr/include/features.h:166:0: note: this is the location of the previous definition | |
mod.cu:5:20: fatal error: /usr/local/cuda-5.5/include/curand.h: Permission denied | |
compilation terminated. | |
['nvcc', '-shared', '-g', '-O3', '-arch=sm_30', '-m64', '-Xcompiler', '-Wno-write-strings,-Wno-unused-label,-Wno-unused-variable,-fno-math-errno,-DCUDA_NDARRAY_CUH=cdfd37325f98c49dfd27419bb10b2bac,-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY,-D NPY_ARRAY_ALIGNED=NPY_ALIGNED,-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE,-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL,-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS,-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS,-fPIC', '-Xlinker', '-rpath,/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-I/usr/local/cuda-5.5/include', '-I/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-I/usr/lib/python2.7/dist-packages/numpy/core/include', '-I/usr/include/python2.7', '-I/home/ludwig/Theano/theano/sandbox/cuda', '-o', '/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/tmp8OAkSL/0a2742cf42fdbba4c958f02e9b7af2f6.so', 'mod.cu', '-L/home/ludwig/.theano/compiledir_Linux-3.2.0-48-generic-x86_64-with-Ubuntu-12.04-precise-x86_64-2.7.3-64/cuda_ndarray', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib', '-L/usr/local/cuda-5.5/lib64', '-L/usr/lib', '-lpython2.7', '-lcudart', '-lcublas', '-lcurand', '-lcuda_ndarray'] | |
E....DeepCopyOp [@A] '' 0 | |
|<CudaNdarrayType(float32, matrix)> [@B] | |
DeepCopyOp [@A] '' 0 | |
|<CudaNdarrayType(float32, matrix)> [@B] | |
.................................................................EE........................................Segmentation fault (core dumped) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment