ARM application development SDK for MNT ZZ9000 graphics and coprocessor card for classic Amiga computers.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ldivmod.S 5.0KB


  1. /* Runtime ABI for the ARM Cortex-M0
  2. * ldivmod.S: 64 bit division (quotient and remainder)
  3. *
  4. * Copyright (c) 2012-2017 Jörg Mische <bobbl@gmx.de>
  5. *
  6. * Permission to use, copy, modify, and/or distribute this software for any
  7. * purpose with or without fee is hereby granted, provided that the above
  8. * copyright notice and this permission notice appear in all copies.
  9. *
  10. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  11. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  13. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
  16. * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17. */
  18. .syntax unified
  19. .text
  20. .thumb
  21. .cpu cortex-a9
  22. @ {long long quotient, long long remainder}
  23. @ __aeabi_ldivmod(long long numerator, long long denominator)
  24. @
  25. @ Divide r1:r0 by r3:r2 and return the quotient in r1:r0 and the remainder in
  26. @ r3:r2 (all signed)
  27. @
  28. .thumb_func
  29. .global __aeabi_ldivmod
  30. __aeabi_ldivmod:
  31. cmp r1, #0
  32. bge .Lnumerator_pos
  33. push {r4, lr}
  34. movs r4, #0 @ num = -num
  35. rsbs r0, r0, #0
  36. sbcs r4, r1
  37. mov r1, r4
  38. cmp r3, #0
  39. bge .Lboth_neg
  40. movs r4, #0 @ den = -den
  41. rsbs r2, r2, #0
  42. sbcs r4, r3
  43. mov r3, r4
  44. bl __aeabi_uldivmod
  45. movs r4, #0 @ rem = -rem
  46. rsbs r2, r2, #0
  47. sbcs r4, r3
  48. mov r3, r4
  49. pop {r4, pc}
  50. .Lboth_neg:
  51. bl __aeabi_uldivmod
  52. movs r4, #0 @ quot = -quot
  53. rsbs r0, r0, #0
  54. sbcs r4, r1
  55. mov r1, r4
  56. movs r4, #0 @ rem = -rem
  57. rsbs r2, r2, #0
  58. sbcs r4, r3
  59. mov r3, r4
  60. pop {r4, pc}
  61. .Lnumerator_pos:
  62. cmp r3, #0
  63. bge .Luldivmod
  64. push {r4, lr}
  65. movs r4, #0 @ den = -den
  66. rsbs r2, r2, #0
  67. sbcs r4, r3
  68. mov r3, r4
  69. bl __aeabi_uldivmod
  70. movs r4, #0 @ quot = -quot
  71. rsbs r0, r0, #0
  72. sbcs r4, r1
  73. mov r1, r4
  74. pop {r4, pc}
  75. @ unsigned long long __udivdi3(unsigned long long num, unsigned long long denom)
  76. @
  77. @ libgcc wrapper: just an alias for __aeabi_uldivmod(), the remainder is ignored
  78. @
  79. .thumb_func
  80. .global __udivdi3
  81. __udivdi3:
  82. @ {unsigned long long quotient, unsigned long long remainder}
  83. @ __aeabi_uldivmod(unsigned long long numerator, unsigned long long denominator)
  84. @
  85. @ Divide r1:r0 by r3:r2 and return the quotient in r1:r0 and the remainder
  86. @ in r3:r2 (all unsigned)
  87. @
  88. .thumb_func
  89. .global __aeabi_uldivmod
  90. __aeabi_uldivmod:
  91. .Luldivmod:
  92. cmp r3, #0
  93. bne .L_large_denom
  94. cmp r2, #0
  95. beq .L_divison_by_0
  96. cmp r1, #0
  97. beq .L_fallback_32bits
  98. @ case 1: num >= 2^32 and denom < 2^32
  99. @ Result might be > 2^32, therefore we first calculate the upper 32
  100. @ bits of the result. It is done similar to the calculation of the
  101. @ lower 32 bits, but with a denominator that is shifted by 32.
  102. @ Hence the lower 32 bits of the denominator are always 0 and the
  103. @ costly 64 bit shift and sub operations can be replaced by cheap 32
  104. @ bit operations.
  105. push {r4, r5, r6, r7, lr}
  106. @ shift left the denominator until it is greater than the numerator
  107. @ denom(r7:r6) = r3:r2 << 32
  108. movs r5, #1 @ bitmask
  109. adds r7, r2, #0 @ dont shift if denominator would overflow
  110. bmi .L_upper_result
  111. cmp r1, r7
  112. blo .L_upper_result
  113. .L_denom_shift_loop1:
  114. lsls r5, #1
  115. lsls r7, #1
  116. bmi .L_upper_result @ dont shift if overflow
  117. cmp r1, r7
  118. bhs .L_denom_shift_loop1
  119. .L_upper_result:
  120. mov r3, r1
  121. mov r2, r0
  122. movs r1, #0 @ upper result = 0
  123. b .L_sub_entry1
  124. .L_sub_loop1:
  125. lsrs r7, #1 @ denom(r7:r6) >>= 1
  126. .L_sub_entry1:
  127. cmp r3, r7
  128. bcc .L_dont_sub1 @ if (num>denom)
  129. subs r3, r7 @ num -= denom
  130. orrs r1, r5 @ result(r7:r6) |= bitmask(r5)
  131. .L_dont_sub1:
  132. lsrs r5, #1 @ bitmask(r5) >>= 1
  133. bne .L_sub_loop1
  134. movs r5, #1
  135. lsls r5, #31
  136. lsls r6, r7, #31 @ denom(r7:r6) = (r7:0) >> 1
  137. lsrs r7, #1 @ dont forget least significant bit!
  138. b .L_lower_result
  139. @ case 2: division by 0
  140. @ call __aeabi_ldiv0
  141. .L_divison_by_0:
  142. b __aeabi_ldiv0
  143. @ case 3: num < 2^32 and denom < 2^32
  144. @ fallback to 32 bit division
  145. .L_fallback_32bits:
  146. mov r1, r2
  147. push {lr}
  148. bl __aeabi_uidivmod
  149. mov r2, r1
  150. movs r1, #0
  151. movs r3, #0
  152. pop {pc}
  153. @ case 4: denom >= 2^32
  154. @ result is smaller than 2^32
  155. .L_large_denom:
  156. push {r4, r5, r6, r7, lr}
  157. mov r7, r3
  158. mov r6, r2
  159. mov r3, r1
  160. mov r2, r0
  161. @ Shift left the denominator until it is greater than the numerator
  162. movs r1, #0 @ high word of result is 0
  163. movs r5, #1 @ bitmask
  164. adds r7, #0 @ dont shift if denominator would overflow
  165. bmi .L_lower_result
  166. cmp r3, r7
  167. blo .L_lower_result
  168. .L_denom_shift_loop4:
  169. lsls r5, #1
  170. lsls r7, #1
  171. lsls r6, #1
  172. adcs r7, r1 @ r1=0
  173. bmi .L_lower_result @ dont shift if overflow
  174. cmp r3, r7
  175. bhs .L_denom_shift_loop4
  176. .L_lower_result:
  177. eors r0, r0
  178. .L_sub_loop4:
  179. mov r4, r3
  180. cmp r2, r6
  181. sbcs r4, r7
  182. bcc .L_dont_sub4 @ if (num>denom)
  183. subs r2, r6 @ numerator -= denom
  184. sbcs r3, r7
  185. orrs r0, r5 @ result(r1:r0) |= bitmask(r5)
  186. .L_dont_sub4:
  187. lsls r4, r7, #31 @ denom(r7:r6) >>= 1
  188. lsrs r6, #1
  189. lsrs r7, #1
  190. orrs r6, r4
  191. lsrs r5, #1 @ bitmask(r5) >>= 1
  192. bne .L_sub_loop4
  193. pop {r4, r5, r6, r7, pc}