|
@@ -36,8 +36,7 @@
|
|
*/
|
|
*/
|
|
|
|
|
|
/*
|
|
/*
|
|
- * unsigned int csum_partial(const unsigned char *buf, int len,
|
|
|
|
- * unsigned int sum);
|
|
|
|
|
|
+ * asmlinkage __wsum csum_partial(const void *buf, int len, __wsum sum);
|
|
*/
|
|
*/
|
|
|
|
|
|
.text
|
|
.text
|
|
@@ -49,11 +48,31 @@ ENTRY(csum_partial)
|
|
* Fortunately, it is easy to convert 2-byte alignment to 4-byte
|
|
* Fortunately, it is easy to convert 2-byte alignment to 4-byte
|
|
* alignment for the unrolled loop.
|
|
* alignment for the unrolled loop.
|
|
*/
|
|
*/
|
|
- mov r5, r1
|
|
|
|
mov r4, r0
|
|
mov r4, r0
|
|
- tst #2, r0 ! Check alignment.
|
|
|
|
- bt 2f ! Jump if alignment is ok.
|
|
|
|
|
|
+ tst #3, r0 ! Check alignment.
|
|
|
|
+ bt/s 2f ! Jump if alignment is ok.
|
|
|
|
+ mov r4, r7 ! Keep a copy to check for alignment
|
|
!
|
|
!
|
|
|
|
+ tst #1, r0 ! Check alignment.
|
|
|
|
+ bt 21f ! Jump if alignment is boundary of 2bytes.
|
|
|
|
+
|
|
|
|
+ ! buf is odd
|
|
|
|
+ tst r5, r5
|
|
|
|
+ add #-1, r5
|
|
|
|
+ bt 9f
|
|
|
|
+ mov.b @r4+, r0
|
|
|
|
+ extu.b r0, r0
|
|
|
|
+ addc r0, r6 ! t=0 from previous tst
|
|
|
|
+ mov r6, r0
|
|
|
|
+ shll8 r6
|
|
|
|
+ shlr16 r0
|
|
|
|
+ shlr8 r0
|
|
|
|
+ or r0, r6
|
|
|
|
+ mov r4, r0
|
|
|
|
+ tst #2, r0
|
|
|
|
+ bt 2f
|
|
|
|
+21:
|
|
|
|
+ ! buf is 2 byte aligned (len could be 0)
|
|
add #-2, r5 ! Alignment uses up two bytes.
|
|
add #-2, r5 ! Alignment uses up two bytes.
|
|
cmp/pz r5 !
|
|
cmp/pz r5 !
|
|
bt/s 1f ! Jump if we had at least two bytes.
|
|
bt/s 1f ! Jump if we had at least two bytes.
|
|
@@ -61,16 +80,17 @@ ENTRY(csum_partial)
|
|
bra 6f
|
|
bra 6f
|
|
add #2, r5 ! r5 was < 2. Deal with it.
|
|
add #2, r5 ! r5 was < 2. Deal with it.
|
|
1:
|
|
1:
|
|
- mov r5, r1 ! Save new len for later use.
|
|
|
|
mov.w @r4+, r0
|
|
mov.w @r4+, r0
|
|
extu.w r0, r0
|
|
extu.w r0, r0
|
|
addc r0, r6
|
|
addc r0, r6
|
|
bf 2f
|
|
bf 2f
|
|
add #1, r6
|
|
add #1, r6
|
|
2:
|
|
2:
|
|
|
|
+ ! buf is 4 byte aligned (len could be 0)
|
|
|
|
+ mov r5, r1
|
|
mov #-5, r0
|
|
mov #-5, r0
|
|
- shld r0, r5
|
|
|
|
- tst r5, r5
|
|
|
|
|
|
+ shld r0, r1
|
|
|
|
+ tst r1, r1
|
|
bt/s 4f ! if it's =0, go to 4f
|
|
bt/s 4f ! if it's =0, go to 4f
|
|
clrt
|
|
clrt
|
|
.align 2
|
|
.align 2
|
|
@@ -92,30 +112,31 @@ ENTRY(csum_partial)
|
|
addc r0, r6
|
|
addc r0, r6
|
|
addc r2, r6
|
|
addc r2, r6
|
|
movt r0
|
|
movt r0
|
|
- dt r5
|
|
|
|
|
|
+ dt r1
|
|
bf/s 3b
|
|
bf/s 3b
|
|
cmp/eq #1, r0
|
|
cmp/eq #1, r0
|
|
- ! here, we know r5==0
|
|
|
|
- addc r5, r6 ! add carry to r6
|
|
|
|
|
|
+ ! here, we know r1==0
|
|
|
|
+ addc r1, r6 ! add carry to r6
|
|
4:
|
|
4:
|
|
- mov r1, r0
|
|
|
|
|
|
+ mov r5, r0
|
|
and #0x1c, r0
|
|
and #0x1c, r0
|
|
tst r0, r0
|
|
tst r0, r0
|
|
- bt/s 6f
|
|
|
|
- mov r0, r5
|
|
|
|
- shlr2 r5
|
|
|
|
|
|
+ bt 6f
|
|
|
|
+ ! 4 bytes or more remaining
|
|
|
|
+ mov r0, r1
|
|
|
|
+ shlr2 r1
|
|
mov #0, r2
|
|
mov #0, r2
|
|
5:
|
|
5:
|
|
addc r2, r6
|
|
addc r2, r6
|
|
mov.l @r4+, r2
|
|
mov.l @r4+, r2
|
|
movt r0
|
|
movt r0
|
|
- dt r5
|
|
|
|
|
|
+ dt r1
|
|
bf/s 5b
|
|
bf/s 5b
|
|
cmp/eq #1, r0
|
|
cmp/eq #1, r0
|
|
addc r2, r6
|
|
addc r2, r6
|
|
- addc r5, r6 ! r5==0 here, so it means add carry-bit
|
|
|
|
|
|
+ addc r1, r6 ! r1==0 here, so it means add carry-bit
|
|
6:
|
|
6:
|
|
- mov r1, r5
|
|
|
|
|
|
+ ! 3 bytes or less remaining
|
|
mov #3, r0
|
|
mov #3, r0
|
|
and r0, r5
|
|
and r0, r5
|
|
tst r5, r5
|
|
tst r5, r5
|
|
@@ -139,8 +160,18 @@ ENTRY(csum_partial)
|
|
8:
|
|
8:
|
|
addc r0, r6
|
|
addc r0, r6
|
|
mov #0, r0
|
|
mov #0, r0
|
|
- addc r0, r6
|
|
|
|
|
|
+ addc r0, r6
|
|
9:
|
|
9:
|
|
|
|
+ ! Check if the buffer was misaligned, if so realign sum
|
|
|
|
+ mov r7, r0
|
|
|
|
+ tst #1, r0
|
|
|
|
+ bt 10f
|
|
|
|
+ mov r6, r0
|
|
|
|
+ shll8 r6
|
|
|
|
+ shlr16 r0
|
|
|
|
+ shlr8 r0
|
|
|
|
+ or r0, r6
|
|
|
|
+10:
|
|
rts
|
|
rts
|
|
mov r6, r0
|
|
mov r6, r0
|
|
|
|
|