fixed-point in C
original material: artist-embedded.org/EmbeddedControl Slides
reference 1: fixedpt.html
reference 2: Q_format
1. Fixed-point Representation
- x: real number
- X: fixed-point number
- N: wordlength
- m: integer (excluding sign bit)
- f: number of fraction bit
- “Q-format” : Qm.n
0/1101/011
sign bit/4bit integer/3bit fraction
2. Conversion to and from fixed-point
real to fixed
- Multiply the floating point number by 2^f
- Round to the nearest integer
X=round(x˙2f)
fixed to real
x=X˙2−f
example) 13.4 to Q4.3 format
X=round(13.4˙23)=107(=011010112)
3. Range of fixed-point representation
4. Arithmetic operations of fixed-point
int16_t sat16(int32_t x)
{
if (x > 0x7FFF) return 0x7FFF;
else if (x < 0x8000) return 0x8000;
else return (int16_t)x;
}
int16_t q_add_sat(int16_t a, int16_t b)
{
int16_t result;
int32_t tmp;
tmp = (int32_t)a + (int32_t)b;
if (tmp > 0x7FFF)
tmp = 0x7FFF;
if (tmp < -1 * 0x8000)
tmp = -1 * 0x8000;
result = (int16_t)tmp;
return result;
}
int16_t q_sub(int16_t a, int16_t b)
{
int16_t result;
result = a - b;
return result;
}
#define K (1 << (f - 1))
int16_t q_mul(int16_t a, int16_t b)
{
int16_t result;
int32_t temp;
temp = (int32_t)a * (int32_t)b;
temp += K;
result = sat16(temp >> Q);
return result;
}
int16_t q_div(int16_t a, int16_t b)
{
int16_t result;
int32_t temp;
temp = (int32_t)a << Q;
if ((temp >= 0 && b >= 0) || (temp < 0 && b < 0))
temp += b / 2;
else
temp -= b / 2;
result = (int16_t)(temp / b);
return result;
}