Commit 103674f0 authored by Christopher Werner's avatar Christopher Werner
Browse files

Merge branch 'master' of git.ichec.ie:training/sohpc-training-2021 into cw-python

parents c26e6902 858d578a
......@@ -9,4 +9,4 @@
#SBATCH --output=sohpc_test.log
python setup.py build_ext --inplace
python run.py
\ No newline at end of file
python run.py
\ No newline at end of file
import numpy as np
def kernel(zr, zi, cr, ci, radius, num_iters):
count = 0
while ((zr*zr + zi*zi) < (radius*radius)) and count < num_iters:
zr, zi = zr * zr - zi * zi + cr, 2 * zr * zi + ci
count += 1
return count
def compute_mandel_py(cr, ci, N, bound, radius=1000.):
mandel = np.empty((N, N), dtype=int)
grid_x = np.linspace(-bound, bound, N)
for i, x in enumerate(grid_x):
for j, y in enumerate(grid_x):
mandel[i,j] = kernel(x, y, cr, ci, radius, N)
return mandel
\ No newline at end of file
# cython: boundscheck=False
# cython: wraparound=False
import numpy as np
cimport numpy as cnp
cdef int kernel(double zr, double zi, double cr, double ci, double radius, int num_iters):
cdef int count = 0
while ((zr*zr + zi*zi) < (radius*radius)) and count < num_iters:
zr, zi = zr * zr - zi * zi + cr, 2 * zr * zi + ci
count += 1
return count
def compute_mandel_cyt(double cr, double ci, int N, double bound, double radius=1000.):
cdef cnp.ndarray[cnp.int_t, ndim=2] mandel
mandel = np.empty((N, N), dtype=int)
grid_x = np.linspace(-bound, bound, N)
cdef int i, j
cdef double x, y
for i, x in enumerate(grid_x):
for j, y in enumerate(grid_x):
mandel[i,j] = kernel(x, y, cr, ci, radius, N)
return mandel
\ No newline at end of file
import time
from mandlebrot import compute_mandel_py
from mandlebrot_cython import compute_mandel_cyt
kwargs = dict(cr=0.3852, ci=-0.2026,
N=400,
bound=1.2)
t0 = time.time()
mandel = compute_mandel_py(**kwargs)
runtime_p = time.time() - t0
print("Mandelbrot set generated in {} seconds\n".format(runtime_p))
t0 = time.time()
mandel = compute_mandel_cyt(**kwargs)
runtime_c = time.time() - t0
print("Mandelbrot set generated in {} seconds\n".format(runtime_c))
print("Speed Up is: %f" % (runtime_p/runtime_c))
\ No newline at end of file
from distutils.core import setup
from Cython.Build import cythonize
import numpy
setup(ext_modules=cythonize("mandlebrot_cython.pyx"),
include_dirs=[numpy.get_include()]
)
......@@ -4,5 +4,7 @@ Answer the following questions
- Which version of the evolve function respond better to optimisation by Numba, the Python or the NumPy one?
- Try eager compilation on the better function - record time
- Try things like `parallel=True`, `prange`, `cache=True`, `fastmath=True`. What combination gave the best optimisation? Which loop is better to have the `prange` on?
- `njit` the iterate function as well, notice anything wrong when you parallelise the iterate function?
\ No newline at end of file
- Try things like `parallel=True` with `prange`, `cache=True`, `fastmath=True`. What combination gave the best optimisation?
- Which loop is better to have the `prange` on?
- `njit` the iterate function as well, notice anything wrong when you parallelise the iterate function?
[note - have to remove/ comment out some python-y parts]
......@@ -4,5 +4,46 @@ Answer the following questions
- Which version of the evolve function respond better to optimisation by Numba, the Python or the NumPy one?
- Try eager compilation on the better function - record time
- Try things like `parallel=True`, `prange`, `cache=True`, `fastmath=True`. What combination gave the best optimisation? Which loop is better to have the `prange` on?
- `njit` the iterate function as well, notice anything wrong when you parallelise the iterate function?
\ No newline at end of file
- Try things like `parallel=True` with `prange`, `cache=True`, `fastmath=True`. What combination gave the best optimisation?
- Which loop is better to have the `prange` on?
- `njit` the iterate function as well, notice anything wrong with heat_200.png when you parallelise the iterate function?
[note - have to remove comment out some python-y parts]
Solutions:
- Numba performed better on the python version.
- Eager compilation gives best performance
- best result with prange on first loop
Time for heat_equation python_njit- 2.952451
Time for heat_equation numpy_njit- 5.289980
Time for heat_equation eag- 1.372720
Time for heat_equation cache- 2.527283
Time for heat_equation cachefm- 2.196652
Time for heat_equation par- 2.049756
Time for heat_equation parfm- 2.130447
Time for heat_equation eager & njit iterate - 1.606561
Time for heat_equation eager & njit parallel iterate - 2.002234
To look at heat_200.png image need to do something like -
scp course01@kay.ichec.ie:/ichec/home/users/course01/sohpc-training-2021/D2-Python/04- Numba/solutions/big_exercises/heat_200.png
Needed to remove to njit the iterate function
```
if i % image_interval == 0:
write_field(field, i)
```
Parallelising the iterate function gives wrong output!- makes sense since the system evolves based on the previous iteration - therefore cannot be parallelised.
#!/bin/bash
#SBATCH --partition=CourseDevQ
#SBATCH --reservation=SOHPCCourse
#SBATCH --nodes=1
#SBATCH --time=00:10:00
#SBATCH --account=course
module load intel gcc
cd $SLURM_SUBMIT_DIR
export OMP_NUM_THREADS=20
# Run OMP code
./prog
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment