; $Id: sqrtv32.asm,v 1.15 2012/02/02 12:07:04 netdev Exp $ ;*********************************************************** ; Function : sqrt_32 ; Processor: C55x ; Description: Square root of a 32-bit number ; C-callable ; History: ; - Li Yuan 01/30/2002 (sqrt_16) ; fixed the bug where NX should be stored in register T0 instead of AR2. ; fixed code to work in large block memory by correct XAR loading. ; - Brian Willoughby 03/01/2011 ; fixed the bug where "rounding" the result resulted in off-by-one (+1). ; optimized code to use ROUND modes plus other specialized opcodes. ; added variation to support 32-bit input. ; optimized with additional parallelization of code. ; ;**************************************************************** ; bool sqrt_32(LDATA *in, DATA *out, int nx) .CPL_on .mmregs .c54cm_off .def _sqrt_32 ; assign registers and set status bits ;------------------------------------- .asg AR4, AR_TBL .asg XAR4, XAR_TBL .asg AR0, AR_X ;input vector .asg AR1, AR_R ;output vector _sqrt_32: PSH mmap(ST0_55) ; bclr ACOV0 PSH mmap(ST1_55) ; bset SATD PSH mmap(ST2_55) ; bclr RDM PSH mmap(ST3_55) ; bset SMUL PSHBOTH XAR5 || BCLR ACOV0 PSHBOTH XAR6 || BSET SATD ; Set Number Of Iterations of Sqrt Algorithm (N=5) ;------------------------------------------------ MOV #4, BRC1 ;inner loop counter set to no: of iterations-1 || BCLR RDM AMOV #30, T1 || SUB #1, T0 ; N -= 1 MOV T0, BRC0 ;Block repeat counter set to no: of inputs-1 ; Initialize constants ;--------------------- AMOV #SqrtTable32, XAR6 ;initialize square root lookup table BSET SMUL || RPTB loop1-1 ; Get next input value in array, x ;---------------------------------------------------------------- MOV dbl(*AR_X+), AC0 ;load the input to AC0 ; Normalize input value ;---------------------- MANT AC0, AC0 ; normalize the input :: NEXP AC0, T0 ; gives the negative of the exponent NEG T0 ; negate the exponent || MOV #5E00h << #16, AC1 ; Initial value for Ynorm SFTS AC0, #-1 ; /2 || CMPU T0 > T1, TC1 ; check for out of bounds values (negative inputs) MOV AC0, AC2 || XCC TC1 MOV T1, T0 ; Load normalized estimate of square root ; Ynorm(new) = Ynorm(old) - (Ynorm(old)^2 - Xnorm)/2 ;--------------------------------------------------- RPTBLOCAL loop2-1 ; do 5 iterations MOV AC2, AC0 || NOP_16 ; CPU_116 SQS AC1, AC0 ADD AC0, AC1 ROUND AC1 loop2: ;inner loop ends here ; Use lookup table to find SQRT of exponent ; lookup table index == exponent ;------------------------------------------ AMAR *AR6(T0), XAR_TBL ; &SqrtTable[exp] ; Multiply sqrt(Ynorm) * sqrt(normalized_exponent) ; And round the result ;------------------------------------------------- MPYMR *AR_TBL, AC1, AC0 MOV HI(AC0<<#1), *AR_R+ ; Store result in AR_R loop1: ;outer loop ends here ;Return overflow flag ;--------------------- MOV #0, T0 || XCCPART overflow(AC0) MOV #1, T0 ; Pop off registers and restore c environment ;-------------------------------------------- POPBOTH XAR6 POPBOTH XAR5 POP mmap(ST3_55) POP mmap(ST2_55) POP mmap(ST1_55) POP mmap(ST0_55) ;Return to calling function ;--------------------------- RET ; Square root lookup table ;------------------------------------------------- ; Ytable = 1/sqrt(2^n) values: ; .data .def SqrtTable32 SqrtTable32: .word 7FFFh ; 1/sqrt(2^0) = 0.99997 .word 5A82h ; 1/sqrt(2^1) = 0.70711 .word 4000h ; 1/sqrt(2^2) = 0.50000 .word 2D41h ; 1/sqrt(2^3) = 0.35355 .word 2000h ; 1/sqrt(2^4) = 0.25000 .word 16A1h ; 1/sqrt(2^5) = 0.17678 .word 1000h ; 1/sqrt(2^6) = 0.12500 .word 0B50h ; 1/sqrt(2^7) = 0.08839 .word 0800h ; 1/sqrt(2^8) = 0.06250 .word 05A8h ; 1/sqrt(2^9) = 0.04419 .word 0400h ; 1/sqrt(2^10) = 0.03125 .word 02D4h ; 1/sqrt(2^11) = 0.02210 .word 0200h ; 1/sqrt(2^12) = 0.01563 .word 016Ah ; 1/sqrt(2^13) = 0.01105 .word 0100h ; 1/sqrt(2^14) = 0.00781 .word 00B5h ; 1/sqrt(2^15) = 0.00552 .word 0080h ; 1/sqrt(2^16) = 0.00391 .word 0058h ; 1/sqrt(2^17) = 0.00276 .word 0040h ; 1/sqrt(2^18) = 0.00195 .word 002Dh ; 1/sqrt(2^19) = 0.00138 .word 0020h ; 1/sqrt(2^20) = 0.00098 .word 0017h ; 1/sqrt(2^21) = 0.00069 .word 0010h ; 1/sqrt(2^22) = 0.00049 .word 000Bh ; 1/sqrt(2^23) = 0.00035 .word 0008h ; 1/sqrt(2^24) = 0.00024 .word 0006h ; 1/sqrt(2^25) = 0.00017 .word 0004h ; 1/sqrt(2^26) = 0.00012 .word 0003h ; 1/sqrt(2^27) = 0.00009 .word 0002h ; 1/sqrt(2^28) = 0.00006 .word 0001h ; 1/sqrt(2^29) = 0.00004 .word 0000h