|
@@ -14,6 +14,7 @@
|
|
#include <linux/fs.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/gfs2_ondisk.h>
|
|
#include <linux/gfs2_ondisk.h>
|
|
#include <linux/lm_interface.h>
|
|
#include <linux/lm_interface.h>
|
|
|
|
+#include <linux/prefetch.h>
|
|
|
|
|
|
#include "gfs2.h"
|
|
#include "gfs2.h"
|
|
#include "incore.h"
|
|
#include "incore.h"
|
|
@@ -33,6 +34,16 @@
|
|
#define BFITNOENT ((u32)~0)
|
|
#define BFITNOENT ((u32)~0)
|
|
#define NO_BLOCK ((u64)~0)
|
|
#define NO_BLOCK ((u64)~0)
|
|
|
|
|
|
|
|
+#if BITS_PER_LONG == 32
|
|
|
|
+#define LBITMASK (0x55555555UL)
|
|
|
|
+#define LBITSKIP55 (0x55555555UL)
|
|
|
|
+#define LBITSKIP00 (0x00000000UL)
|
|
|
|
+#else
|
|
|
|
+#define LBITMASK (0x5555555555555555UL)
|
|
|
|
+#define LBITSKIP55 (0x5555555555555555UL)
|
|
|
|
+#define LBITSKIP00 (0x0000000000000000UL)
|
|
|
|
+#endif
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* These routines are used by the resource group routines (rgrp.c)
|
|
* These routines are used by the resource group routines (rgrp.c)
|
|
* to keep track of block allocation. Each block is represented by two
|
|
* to keep track of block allocation. Each block is represented by two
|
|
@@ -138,45 +149,63 @@ static inline unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd,
|
|
static u32 gfs2_bitfit(const u8 *buffer, unsigned int buflen, u32 goal,
|
|
static u32 gfs2_bitfit(const u8 *buffer, unsigned int buflen, u32 goal,
|
|
u8 old_state)
|
|
u8 old_state)
|
|
{
|
|
{
|
|
- const u8 *byte;
|
|
|
|
- u32 blk = goal;
|
|
|
|
- unsigned int bit, bitlong;
|
|
|
|
- const unsigned long *plong;
|
|
|
|
-#if BITS_PER_LONG == 32
|
|
|
|
- const unsigned long plong55 = 0x55555555;
|
|
|
|
-#else
|
|
|
|
- const unsigned long plong55 = 0x5555555555555555;
|
|
|
|
-#endif
|
|
|
|
-
|
|
|
|
- byte = buffer + (goal / GFS2_NBBY);
|
|
|
|
- plong = (const unsigned long *)(buffer + (goal / GFS2_NBBY));
|
|
|
|
- bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE;
|
|
|
|
- bitlong = bit;
|
|
|
|
-
|
|
|
|
- while (byte < buffer + buflen) {
|
|
|
|
-
|
|
|
|
- if (bitlong == 0 && old_state == 0 && *plong == plong55) {
|
|
|
|
- plong++;
|
|
|
|
- byte += sizeof(unsigned long);
|
|
|
|
- blk += sizeof(unsigned long) * GFS2_NBBY;
|
|
|
|
- continue;
|
|
|
|
|
|
+ const u8 *byte, *start, *end;
|
|
|
|
+ int bit, startbit;
|
|
|
|
+ u32 g1, g2, misaligned;
|
|
|
|
+ unsigned long *plong;
|
|
|
|
+ unsigned long lskipval;
|
|
|
|
+
|
|
|
|
+ lskipval = (old_state & GFS2_BLKST_USED) ? LBITSKIP00 : LBITSKIP55;
|
|
|
|
+ g1 = (goal / GFS2_NBBY);
|
|
|
|
+ start = buffer + g1;
|
|
|
|
+ byte = start;
|
|
|
|
+ end = buffer + buflen;
|
|
|
|
+ g2 = ALIGN(g1, sizeof(unsigned long));
|
|
|
|
+ plong = (unsigned long *)(buffer + g2);
|
|
|
|
+ startbit = bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE;
|
|
|
|
+ misaligned = g2 - g1;
|
|
|
|
+ if (!misaligned)
|
|
|
|
+ goto ulong_aligned;
|
|
|
|
+/* parse the bitmap a byte at a time */
|
|
|
|
+misaligned:
|
|
|
|
+ while (byte < end) {
|
|
|
|
+ if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) {
|
|
|
|
+ return goal +
|
|
|
|
+ (((byte - start) * GFS2_NBBY) +
|
|
|
|
+ ((bit - startbit) >> 1));
|
|
}
|
|
}
|
|
- if (((*byte >> bit) & GFS2_BIT_MASK) == old_state)
|
|
|
|
- return blk;
|
|
|
|
bit += GFS2_BIT_SIZE;
|
|
bit += GFS2_BIT_SIZE;
|
|
- if (bit >= 8) {
|
|
|
|
|
|
+ if (bit >= GFS2_NBBY * GFS2_BIT_SIZE) {
|
|
bit = 0;
|
|
bit = 0;
|
|
byte++;
|
|
byte++;
|
|
|
|
+ misaligned--;
|
|
|
|
+ if (!misaligned) {
|
|
|
|
+ plong = (unsigned long *)byte;
|
|
|
|
+ goto ulong_aligned;
|
|
|
|
+ }
|
|
}
|
|
}
|
|
- bitlong += GFS2_BIT_SIZE;
|
|
|
|
- if (bitlong >= sizeof(unsigned long) * 8) {
|
|
|
|
- bitlong = 0;
|
|
|
|
- plong++;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- blk++;
|
|
|
|
}
|
|
}
|
|
|
|
+ return BFITNOENT;
|
|
|
|
|
|
|
|
+/* parse the bitmap a unsigned long at a time */
|
|
|
|
+ulong_aligned:
|
|
|
|
+ /* Stop at "end - 1" or else prefetch can go past the end and segfault.
|
|
|
|
+ We could "if" it but we'd lose some of the performance gained.
|
|
|
|
+ This way will only slow down searching the very last 4/8 bytes
|
|
|
|
+ depending on architecture. I've experimented with several ways
|
|
|
|
+ of writing this section such as using an else before the goto
|
|
|
|
+ but this one seems to be the fastest. */
|
|
|
|
+ while ((unsigned char *)plong < end - 1) {
|
|
|
|
+ prefetch(plong + 1);
|
|
|
|
+ if (((*plong) & LBITMASK) != lskipval)
|
|
|
|
+ break;
|
|
|
|
+ plong++;
|
|
|
|
+ }
|
|
|
|
+ if ((unsigned char *)plong < end) {
|
|
|
|
+ byte = (const u8 *)plong;
|
|
|
|
+ misaligned += sizeof(unsigned long) - 1;
|
|
|
|
+ goto misaligned;
|
|
|
|
+ }
|
|
return BFITNOENT;
|
|
return BFITNOENT;
|
|
}
|
|
}
|
|
|
|
|