#*************************************************************************** #* * #* Copyright (c) 2011, 2012 * #* Jose Luis Cercos Pita * #* * #* This program is free software; you can redistribute it and/or modify * #* it under the terms of the GNU Lesser General Public License (LGPL) * #* as published by the Free Software Foundation; either version 2 of * #* the License, or (at your option) any later version. * #* for detail see the LICENCE text file. * #* * #* This program is distributed in the hope that it will be useful, * #* but WITHOUT ANY WARRANTY; without even the implied warranty of * #* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * #* GNU Library General Public License for more details. * #* * #* You should have received a copy of the GNU Library General Public * #* License along with this program; if not, write to the Free Software * #* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * #* USA * #* * #*************************************************************************** # numpy import numpy as np # pyOpenCL import pyopencl as cl from pyopencl.reduction import ReductionKernel import pyopencl.array as cl_array import clUtils import FreeCAD grav=9.81 class minres: def __init__(self, context, queue): """ Constructor. @param context OpenCL context where apply. @param queue OpenCL command queue. """ self.context = context self.queue = queue self.program = clUtils.loadProgram(context, clUtils.path() + "/minres.cl") # Create OpenCL objects as null objects, that we will generate # at the first iteration self.A = None self.B = None self.X0 = None self.X = None self.R = None # Create dot operator self.dot = ReductionKernel(context, np.float32, neutral="0", reduce_expr="a+b", map_expr="x[i]*y[i]", arguments="__global float *x, __global float *y") def solve(self, A, B, x0=None, tol=10e-6, iters=300): r""" Solve linear system of equations by a Jacobi iterative method. @param A Linear system matrix. @param B Linear system independent term. @param x0 Initial aproximation of the solution. @param tol Relative error tolerance: \n \$ \vert\vert B - A \, x \vert \vert_\infty / \vert\vert B \vert \vert_\infty \$ @param iters Maximum number of iterations. """ # Create/set OpenCL buffers self.setBuffers(A,B,x0) # Get dimensions for OpenCL execution n = np.uint32(len(B)) gSize = (clUtils.globalSize(n),) # Get a norm to can compare later for valid result B_cl = cl_array.to_device(self.context,self.queue,B) bnorm2 = self.dot(B_cl,B_cl).get() FreeCAD.Console.PrintMessage(bnorm2) FreeCAD.Console.PrintMessage("\n") # Iterate while the result converges or maximum number # of iterations is reached. for i in range(0,iters): # Compute residues kernelargs = (self.A, self.B, self.X0, self.R.data, n) # Test if the final result has been reached self.program.r(self.queue, gSize, None, *(kernelargs)) rnorm2 = self.dot(self.R,self.R).get() FreeCAD.Console.PrintMessage("\t") FreeCAD.Console.PrintMessage(rnorm2) FreeCAD.Console.PrintMessage("\n") if np.sqrt(rnorm2 / bnorm2) <= tol: break # Iterate kernelargs = (self.A, self.R.data, self.AR.data, n) self.program.dot_mat_vec(self.queue, gSize, None, *(kernelargs)) AR_R = self.dot(self.AR,self.R).get() AR_AR = self.dot(self.AR,self.AR).get() kernelargs = (self.A, self.R.data, self.X, self.X0, AR_R, AR_AR, n) self.program.minres(self.queue, gSize, None, *(kernelargs)) # Swap variables swap = self.X self.X = self.X0 self.X0 = swap # Return result computed x = np.zeros((n), dtype=np.float32) cl.enqueue_read_buffer(self.queue, self.X0, x).wait() return (x, np.sqrt(rnorm2 / bnorm2), i) def setBuffers(self, A,B,x0): """ Create/set OpenCL required buffers. @param A Linear system matrix. @param B Independent linear term. @param x0 Initial solution estimator. """ # Get dimensions shape = np.shape(A) if len(shape) != 2: raise ValueError, 'Matrix A must be 2 dimensional array' if shape[0] != shape[1]: raise ValueError, 'Square linear system matrix expected' if len(B) != shape[0]: raise ValueError, 'Matrix and independet term dimensions does not match' n = len(B) # Set x0 if not provided if x0 != None: if len(x0) != n: raise ValueError, 'Initial solution estimator length does not match with linear system dimensions' if x0 == None: x0 = B # Create OpenCL objects if not already generated if not self.A: mf = cl.mem_flags self.A = cl.Buffer( self.context, mf.READ_ONLY, size = n*n * np.dtype('float32').itemsize ) self.B = cl.Buffer( self.context, mf.READ_ONLY, size = n * np.dtype('float32').itemsize ) self.X0 = cl.Buffer( self.context, mf.READ_WRITE, size = n * np.dtype('float32').itemsize ) self.X = cl.Buffer( self.context, mf.READ_WRITE, size = n * np.dtype('float32').itemsize ) self.R = cl_array.zeros(self.context,self.queue, (n), np.float32) self.AR = cl_array.zeros(self.context,self.queue, (n), np.float32) # Transfer data to buffers events = [] events.append(cl.enqueue_write_buffer(self.queue, self.A, A.reshape((n*n)) )) events.append(cl.enqueue_write_buffer(self.queue, self.B, B)) events.append(cl.enqueue_write_buffer(self.queue, self.X0, x0)) for e in events: e.wait()