Patrikios · September 20, 2022 15:42
diff --git a/multithreading.jl b/multithreading.jl
 # Multithreaded programming in Julia

 #= 
    Julia supports multiple types of parallelism
    - SIMD (handled by the compiler) -> for instance the package StaticArrays.jl, short static vectors incline to encourage SIMD operations, a further reason why the package is so efficient 
    - Threads with shared memory (this documents touches upon this point)
    - Distributed (multi-mode, not sharing memory)
    - GPUs
 =#

 #=
    Getting started with Threads
    - use `julia -t 4` to start julia with 4 Threads
    - In VSCode use the settings' julia.numThreads variable
 =#

 Threads.nthreads()
 # 4
 Threads.threadid()
 # 1

 #=
    Julia uses a task based model, where it has a fixed number of Threads
    and schedule defined pieces of the work (Task's) onto them.
    Julia Team likes this model bacause for them, it is 'composable'.
 =#

 using Images, Statistics
 using Base.Threads: @spawn, @threads, threadid

 x = @spawn threadid()
 fetch(x)

 for i = 1:10
    println( "Hello from thread ", threadid() )
 end

 @threads for i in 1:10
    println( "Hello from thread ", threadid() )
 end

 @spawn println( "Hello from thread ", threadid() )

 @sync begin
    @spawn println( "Hello from thread ", threadid() )
    @spawn println( "Hello from thread ", threadid() )
    @spawn println( "Hello from thread ", threadid() )
    @spawn println( "Hello from thread ", threadid() )
 end

 t1 = @spawn mean(randn(1_000_000_000))
 fetch(t1)

 ##--- prove there are multiple threads ---------------------------------------------------

 x = 1
 @spawn begin
    while x == 1
    end
    println("done!")
 end

 1 + 1 # u can run this asynchronously (while the previous code still runs), as the loop runs in the background which doesn't block the main session
 x = 0 # interupts the background process as x is not anymore equal to 1

 #--- memory is shared hence the spawned thread has access to the main threads memory ---

 using DataFrames
 using Statistics

 t = @spawn begin
    n = 100000000
    d1 = DataFrame(x = 1:n)
    temp = d1.x |> sum
    d1.y = repeat([temp], n) 
    d1
 end 
 print("Hello world!")
 df = fetch(t)

 #-----------------------------------------------------------------------------------------

 #=
    Use @threads for loops with uniform iterations
    use @spawn for unbalanced and nested parallelism
 =#

 ## example for unbalanced parallelism

 function escapetime(z; maxiter = 80)
    c = z
    for n = 1:maxiter
        if abs(z) > 2
            return n-1
        end
        z = z^2 + c
    end
    return maxiter
 end

 function mandel(; width = 80, height = 20, maxiter = 80)
    out = zeros(Int, height, width)
    real = range(-2.0,0.5,length=width)
    imag = range(-1.0,1.0,length=height)
    for x in 1:width
        for y in 1:height 
            z = real[x] + imag[y]*im
            out[x,y]= escapetime(z, maxiter = maxiter)
        end
    end
    return out
 end

 mandel(width = 80, height = 20, maxiter = 80)

 # somethin the code above doesnt want to even even though it is one to one copy from https://www.youtube.com/watch?v=FzhipiZO4Jk

 using ThreadsX #parallel versions of common algos like sum, sort, max, min etc

 @time sqrt(6 * sum(1/n^2 for n in 1:1_000_000_000))
 @time sqrt(6 * ThreadsX.sum(1/n^2 for n in 1:1_000_000_000))

 #=
 I/O 
    - I/O is itegrated with task system
    - file, socket, pipe I/O is thread-safe and can overlap
    - compute tasks with I/O latency
    - can speed up file I/O a bit using 'open(filename, lock = false)' when yuo need multi-threaded access
 =#

 #=
 Caveats
    - data races are possible but fairly easy to avoid with these hogher-level constructs
    - Base julia data structures are usually not thread-safe by default
    - for more advances cases:
        - channels (inter-task communication)
        - locks
        - atomic operations
 =#

 # race condition
 x = [0]
 @threads for i = 1:1000
    x[1] += 1
 end
 x

 # VS

 # no race condition however the atomic is expensive operation
 x = Threads.Atomic{Int}(0)
 @threads for i = 1:1000
    Threads.atomic_add!(x, 1)
 end
 x[]


 # async is an older kyeword, not really neccessary anymore, one can use @spawn instead

 @async f($(g(x)), 1)
	# Multithreaded programming in Julia

	#=
	Julia supports multiple types of parallelism
	- SIMD (handled by the compiler) -> for instance the package StaticArrays.jl, short static vectors incline to encourage SIMD operations, a further reason why the package is so efficient
	- Threads with shared memory (this documents touches upon this point)
	- Distributed (multi-mode, not sharing memory)
	- GPUs
	=#

	#=
	Getting started with Threads
	- use `julia -t 4` to start julia with 4 Threads
	- In VSCode use the settings' julia.numThreads variable
	=#

	Threads.nthreads()
	# 4
	Threads.threadid()
	# 1

	#=
	Julia uses a task based model, where it has a fixed number of Threads
	and schedule defined pieces of the work (Task's) onto them.
	Julia Team likes this model bacause for them, it is 'composable'.
	=#

	using Images, Statistics
	using Base.Threads: @spawn, @threads, threadid

	x = @spawn threadid()
	fetch(x)

	for i = 1:10
	println( "Hello from thread ", threadid() )
	end

	@threads for i in 1:10
	println( "Hello from thread ", threadid() )
	end

	@spawn println( "Hello from thread ", threadid() )

	@sync begin
	@spawn println( "Hello from thread ", threadid() )
	@spawn println( "Hello from thread ", threadid() )
	@spawn println( "Hello from thread ", threadid() )
	@spawn println( "Hello from thread ", threadid() )
	end

	t1 = @spawn mean(randn(1_000_000_000))
	fetch(t1)

	##--- prove there are multiple threads ---------------------------------------------------

	x = 1
	@spawn begin
	while x == 1
	end
	println("done!")
	end

	1 + 1 # u can run this asynchronously (while the previous code still runs), as the loop runs in the background which doesn't block the main session
	x = 0 # interupts the background process as x is not anymore equal to 1

	#--- memory is shared hence the spawned thread has access to the main threads memory ---

	using DataFrames
	using Statistics

	t = @spawn begin
	n = 100000000
	d1 = DataFrame(x = 1:n)
	temp = d1.x \|> sum
	d1.y = repeat([temp], n)
	d1
	end
	print("Hello world!")
	df = fetch(t)

	#-----------------------------------------------------------------------------------------

	#=
	Use @threads for loops with uniform iterations
	use @spawn for unbalanced and nested parallelism
	=#

	## example for unbalanced parallelism

	function escapetime(z; maxiter = 80)
	c = z
	for n = 1:maxiter
	if abs(z) > 2
	return n-1
	end
	z = z^2 + c
	end
	return maxiter
	end

	function mandel(; width = 80, height = 20, maxiter = 80)
	out = zeros(Int, height, width)
	real = range(-2.0,0.5,length=width)
	imag = range(-1.0,1.0,length=height)
	for x in 1:width
	for y in 1:height
	z = real[x] + imag[y]*im
	out[x,y]= escapetime(z, maxiter = maxiter)
	end
	end
	return out
	end

	mandel(width = 80, height = 20, maxiter = 80)

	# somethin the code above doesnt want to even even though it is one to one copy from https://www.youtube.com/watch?v=FzhipiZO4Jk

	using ThreadsX #parallel versions of common algos like sum, sort, max, min etc

	@time sqrt(6 * sum(1/n^2 for n in 1:1_000_000_000))
	@time sqrt(6 * ThreadsX.sum(1/n^2 for n in 1:1_000_000_000))

	#=
	I/O
	- I/O is itegrated with task system
	- file, socket, pipe I/O is thread-safe and can overlap
	- compute tasks with I/O latency
	- can speed up file I/O a bit using 'open(filename, lock = false)' when yuo need multi-threaded access
	=#

	#=
	Caveats
	- data races are possible but fairly easy to avoid with these hogher-level constructs
	- Base julia data structures are usually not thread-safe by default
	- for more advances cases:
	- channels (inter-task communication)
	- locks
	- atomic operations
	=#

	# race condition
	x = [0]
	@threads for i = 1:1000
	x[1] += 1
	end
	x

	# VS

	# no race condition however the atomic is expensive operation
	x = Threads.Atomic{Int}(0)
	@threads for i = 1:1000
	Threads.atomic_add!(x, 1)
	end
	x[]


	# async is an older kyeword, not really neccessary anymore, one can use @spawn instead

	@async f($(g(x)), 1)