C: Difference between revisions
(→P99) |
|||
(15 intermediate revisions by the same user not shown) | |||
Line 8: | Line 8: | ||
</source> |
</source> |
||
* [http://c-faq.com/decl/spiral.anderson.html The "Clockwise/Spiral Rule"] (simple rule for how to read C types). |
* [http://c-faq.com/decl/spiral.anderson.html The "Clockwise/Spiral Rule"] (simple rule for how to read C types). |
||
* [https://jorenar.com/blog/less-known-c Few lesser known tricks, quirks and features of C] |
|||
;On this wiki |
;On this wiki |
||
Line 14: | Line 15: | ||
* [[C examples]] |
* [[C examples]] |
||
* [[C / C++]]. |
* [[C / C++]]. |
||
* [[Autotools]] (<code>libtoolize</code>, <code>autogen</code>, <code>configure</code>) |
|||
== Examples == |
== Examples == |
||
Line 213: | Line 215: | ||
typedef uint8_t A_t[16]; |
typedef uint8_t A_t[16]; |
||
A_t A1 = { 19, 23 }; // an array - sizeof(A1) == 16 |
A_t A1 = { 19, 23 }; // an array - sizeof(A1) == 16 |
||
A_t pA1 = &A1; // a pointer to array - sizeof(pA1) |
A_t *pA1 = &A1; // a pointer to array - sizeof(*pA1) == 16 |
||
#define pX1 (A_t *)0x123456 // ... same, but temporary - sizeof(*pX1) == 16 |
#define pX1 (A_t *)0x123456 // ... same, but temporary - sizeof(*pX1) == 16 |
||
</source> |
</source> |
||
Line 452: | Line 454: | ||
const char *proName = basename(argv[0]); |
const char *proName = basename(argv[0]); |
||
</source> |
|||
=== Statement-as-expression for safer macros === |
|||
Statement-as-expressions are a gnu extension that allows turning a sequence of statement into a valid expression [https://gcc.gnu.org/onlinedocs/gcc-4.7.4/gcc/Statement-Exprs.html]. |
|||
Example: |
|||
<source lang="c"> |
|||
// Avoid evaluating a and b twice: |
|||
#define maxint(a,b) \ |
|||
({int _a = (a), _b = (b); _a > _b ? _a : _b; }) |
|||
// Add static assert to some construction |
|||
#define SafeRandom(target) ({ _Static_assert( sizeof(target) != sizeof(void*), “accidental pointer!” ); Random( &(target), sizeof(target) ); }) |
|||
</source> |
|||
=== Safe cyclic shifts (rotate) without undefined behaviour === |
|||
Reference: https://stackoverflow.com/questions/776508/best-practices-for-circular-shift-rotate-operations-in-c |
|||
<source lang="c"> |
|||
// Clang |
|||
#include <stdint.h> // for uint32_t |
|||
#include <limits.h> // for CHAR_BIT |
|||
// #define NDEBUG |
|||
#include <assert.h> |
|||
__attribute__((unused)) |
|||
static inline uint32_t rotl32 (uint32_t n, unsigned int c) |
|||
{ |
|||
const unsigned int mask = (CHAR_BIT*sizeof(n) - 1); // assumes width is a power of 2. |
|||
// assert ( (c<=mask) &&"rotate by type width or more"); |
|||
c &= mask; |
|||
return (n<<c) | (n>>( (-c)&mask )); |
|||
} |
|||
__attribute__((unused)) |
|||
static inline uint32_t rotr32 (uint32_t n, unsigned int c) |
|||
{ |
|||
const unsigned int mask = (CHAR_BIT*sizeof(n) - 1); |
|||
// assert ( (c<=mask) &&"rotate by type width or more"); |
|||
c &= mask; |
|||
return (n>>c) | (n<<( (-c)&mask )); |
|||
} |
|||
// MSVC |
|||
_rotl(n,c); |
|||
_rotr(n,c); |
|||
</source> |
|||
=== Use of <code>_Generic</code> === |
|||
<code>_Generic</code> is a new construction introduced in C11, but it has some limitation |
|||
* See https://www.chiark.greenend.org.uk/~sgtatham/quasiblog/c11-generic/ |
|||
* See [https://news.ycombinator.com/item?id=36933345 HN comments] for more workarounds: |
|||
<source lang="c"> |
|||
define string_length(x) _Generic(x, \ |
|||
const char * : strlen((const char*)(const void*)x), \ |
|||
struct MyStringBuffer * : ((const MyStringBuffer*)(const void*)x)->length) |
|||
</source> |
</source> |
||
Line 585: | Line 649: | ||
__attribute__((packed)) uint32_t a; // OK |
__attribute__((packed)) uint32_t a; // OK |
||
uint32_t __attribute__((packed)) b; // OK |
uint32_t __attribute__((packed)) b; // OK |
||
uint32_t |
uint32_t c __attribute__((packed)); // OK |
||
} |
} |
||
</source> |
</source> |
||
Line 600: | Line 664: | ||
* Add option <code>-v</code> to see linker command when using gcc/clang. |
* Add option <code>-v</code> to see linker command when using gcc/clang. |
||
* See also [https://sourceforge.net/p/msys2/mailman/msys2-users/thread/CAFgFV9NR37cEeD%2BsFxxkALsto0JbFjAmw9VsLW4O9i7RWAxUTw%40mail.gmail.com/#msg35019824 this post] for more information on libraries automatically linked in (<code>gcc -dumpspecs</code>). |
* See also [https://sourceforge.net/p/msys2/mailman/msys2-users/thread/CAFgFV9NR37cEeD%2BsFxxkALsto0JbFjAmw9VsLW4O9i7RWAxUTw%40mail.gmail.com/#msg35019824 this post] for more information on libraries automatically linked in (<code>gcc -dumpspecs</code>). |
||
=== Undefined behaviour === |
|||
Typical construction that leads to undefined behaviour: |
|||
<source lang="c"> |
|||
// Rotations - Undefined behaviour for n=0 or n>31 |
|||
// See tips above for safe implementation |
|||
#define rotr(v,n) (((v) << (32-(n))) | ((v) >> (n))) |
|||
</source> |
|||
== How-To == |
== How-To == |
||
Line 895: | Line 967: | ||
<source lang="c"> |
<source lang="c"> |
||
g++ -O2 -march=native -flto deadcode.cpp -o deadcode |
g++ -O2 -march=native -flto deadcode.cpp -o deadcode |
||
</source> |
|||
=== Build smaller binaries (C/C++) === |
|||
See http://ptspts.blogspot.com/2013/12/how-to-make-smaller-c-and-c-binaries.html |
|||
== Examples == |
|||
=== Simple trace / logging === |
|||
Here a simple trace header file {{file|trace.h}}. Simply include it in any C file to get trace / logging capability. |
|||
<source lang="c"> |
|||
#ifndef __TRACE_H__ |
|||
#define __TRACE_H__ |
|||
#include <stdio.h> |
|||
/** |
|||
* Convert an array of char into a list of hexadecimal values. |
|||
* For instance, "\xca\xfe" is converted into "cafe". |
|||
* ############################################################################################################ |
|||
*/ |
|||
static uint32_t _char2hex(char *s, const void *data, size_t data_n, size_t n) |
|||
{ |
|||
uint32_t o = 0; |
|||
const uint8_t *data8 = data; |
|||
#define out(f, a ...) { \ |
|||
o += (uint32_t)snprintf(s + o, n - (uint32_t)o, f, ## a); \ |
|||
o = o <= n ? o : n; \ |
|||
} |
|||
if (n > 0) { |
|||
s[0] = 0; |
|||
} |
|||
for (size_t i = 0; i < data_n; i++) { |
|||
out("%.2x", data8[i]); |
|||
} |
|||
return o; |
|||
#undef out |
|||
} |
|||
#define trace(x, n) xtrace(__FILE__, __LINE__, __FUNCTION__, # x, &(x), n) |
|||
#define safetrace(x) xtrace(__FILE__, __LINE__, __FUNCTION__, # x, &(x), sizeof(x)) |
|||
static void xtrace(const char *file, uint32_t line, const char *fct, const char *s, const void *data, uint32_t n) |
|||
{ |
|||
char buf[2 * n + 1]; |
|||
uint32_t o = _char2hex(buf, data, n, sizeof(buf)); |
|||
assert(o < sizeof(buf)); // Check if given buffer was big enough |
|||
printf("%s:%s:%d: %s = %s\n", fct, file, line, s, buf); |
|||
fflush(stdout); |
|||
} |
|||
#endif |
|||
</source> |
|||
Example of use: |
|||
<source lang="c"> |
|||
#include "trace.h" |
|||
void foo(int a, int * A, int n) |
|||
{ |
|||
safetrace(a); |
|||
trace(A,n); |
|||
} |
|||
</source> |
</source> |
||
Line 926: | Line 1,061: | ||
* [http://www.netlib.org/blas/ BLAS (Basic Linear Algebra Subprograms)]. |
* [http://www.netlib.org/blas/ BLAS (Basic Linear Algebra Subprograms)]. |
||
* [http://math-atlas.sourceforge.net/ ATLAS (Automatically Tuned Linear Algebra Software)] |
* [http://math-atlas.sourceforge.net/ ATLAS (Automatically Tuned Linear Algebra Software)] |
||
=== libcpucycles === |
|||
* libcpucycles is a public-domain microlibrary for counting CPU cycles. |
|||
* See https://cpucycles.cr.yp.to/ |
|||
== Debugging == |
== Debugging == |
||
See general page on [[Debugging]]. |
See general page on [[Debugging]]. |
||
== Testing / validation == |
|||
;C Bounded Model Checker |
|||
* https://www.philipzucker.com/cbmc_tut/ |
|||
;UBSAN / ASAN |
|||
* See clang. |
|||
== Embedded C == |
== Embedded C == |
Latest revision as of 06:23, 27 September 2024
References
- The C Library Reference Guide
- C Preprocessor reference
- manpage manpages-dev
- manpage manpages-posix-dev
sudo apt-get install manpages-dev manpages-posix-dev
- The "Clockwise/Spiral Rule" (simple rule for how to read C types).
- Few lesser known tricks, quirks and features of C
- On this wiki
- Clang
- GCC
- C examples
- C / C++.
- Autotools (
libtoolize
,autogen
,configure
)
Examples
See C examples.
Hacker tips
See Hacker tips (programming tips, bit manipulation...).
Tips
Designated Struct Initializer
struct {
int a;
int b;
} s = { .a = 1, .b = 2 };
Same applies for union.
See GCC or StackOverflow.
Array initializer
To build a local temporary array:
memcpy(dst,((uint8_t[4]){1,2,3,4}),4) // Copy array {1,2,3,4} to dst
Variadic macros
References:
#define eprintf(...) fprintf (stderr, __VA_ARGS__) // C99 Standard
#define eprintf(fmt, ...) fprintf (stderr, fmt, __VA_ARGS__) // ???, comma not suppressed (maybe VisualC does remove it)
#define eprintf(fmt, ...) fprintf (stderr, fmt, ## __VA_ARGS__) // GNU extension, comma suppressed if no args
#define eprintf(args...) fprintf (stderr, args) // GNU extension
#define eprintf(args...) fprintf (stderr, ## args) // GNU extension, comma suppressed if args empty
Some caveats:
#define eprintf(format,...) fprintf (stderr, format, __VA_ARGS__)
Looks more descriptive, but now at least one argument must be provided, except if compiler supports the construction (gcc, vc).
An handy macro hack that counts the number of parameters passed before expansion (See [1] and [2]):
/* The PP_NARG macro returns the number of arguments that have been
* passed to it. This compensates for lack of __VA_NARGS__.
* Macros written by Laurent Deniau See http://en.wikipedia.org/wiki/Variadic_macro.
*/
#define PP_NARG(...) \
PP_NARG_(__VA_ARGS__,PP_RSEQ_N())
#define PP_NARG_(...) \
PP_ARG_N(__VA_ARGS__)
#define PP_ARG_N( \
_1, _2, _3, _4, _5, _6, _7, _8, _9,_10, \
_11,_12,_13,_14,_15,_16,_17,_18,_19,_20, \
_21,_22,_23,_24,_25,_26,_27,_28,_29,_30, \
_31,_32,_33,_34,_35,_36,_37,_38,_39,_40, \
_41,_42,_43,_44,_45,_46,_47,_48,_49,_50, \
_51,_52,_53,_54,_55,_56,_57,_58,_59,_60, \
_61,_62,_63,N,...) N
#define PP_RSEQ_N() \
63,62,61,60, \
59,58,57,56,55,54,53,52,51,50, \
49,48,47,46,45,44,43,42,41,40, \
39,38,37,36,35,34,33,32,31,30, \
29,28,27,26,25,24,23,22,21,20, \
19,18,17,16,15,14,13,12,11,10, \
9,8,7,6,5,4,3,2,1,0
/* Some test cases */
PP_NARG(A) -> 1
PP_NARG(A,B) -> 2
PP_NARG(A,B,C) -> 3
PP_NARG(A,B,C,D) -> 4
PP_NARG(A,B,C,D,E) -> 5
PP_NARG(1,2,3,4,5,6,7,8,9,0,
1,2,3,4,5,6,7,8,9,0,
1,2,3,4,5,6,7,8,9,0,
1,2,3,4,5,6,7,8,9,0,
1,2,3,4,5,6,7,8,9,0,
1,2,3,4,5,6,7,8,9,0,
1,2,3) -> 63
Variadic I/O Functions
See [3] for some reference information on variable functions.
The standard variadic I/O functions are:
#include <stdio.h>
#include <stdarg.h>
int vprintf(const char *format, va_list ap);
int vfprintf(FILE *stream, const char *format, va_list ap);
int vsprintf(char *str, const char *format, va_list ap);
int vsnprintf(char *str, size_t size, const char *format, va_list ap);
Example of use:
//This works both on win32 / linux
#include <stdio.h>
#include <stdarg.h>
void vario(const char* formatstr,...)
{
va_list args;
va_start(args, formatstr);
vprintf(formatstr, args);
va_end(args); /* cleanup - DON'T FORGET */
}
Note on win32:
- There exists alternative functions ([4]) prefixed with a '_', such as
_vprintf_p
,_vfprintf_l
, ...
Temporary variable names for Macro
/* UNIQ(x) creates a unique variable name that depends on the current source line as returned by __LINE__. We need
Several intermediate macros because identifier are not expanded in macro if they are used along with # or ## in
macro definition.
Example: #define SCAN_MY(var,n) {int UNIQ(x); for(UNIQ(x)=0; UNIQ(x)<n; ++UNIQ(x)) printf(var[UNIQ(x)]);}
*/
#define UNIQ__(x,y) x ## y
#define UNIQ_(x,y) UNIQ__(x,y)
#define UNIQ(x) UNIQ_(x,__LINE__)
Static assertions
Static assertions are assertions that can be verified at compile time.
From Static_assertions (WP):
#define SASSERT(pred) switch(0){case 0:case pred:;}
SASSERT( BOOLEAN CONDITION );
static char const static_assertion[ (BOOLEAN CONDITION)
? 1 : -1
] = {'!'};
Static assert
Static assert that supports sizeof()
operator: http://www.pixelbeat.org/programming/gcc/static_assert.html.
#define ASSERT_CONCAT_(a, b) a##b
#define ASSERT_CONCAT(a, b) ASSERT_CONCAT_(a, b)
/* These can't be used after statements in c89. */
#ifdef __COUNTER__
#define STATIC_ASSERT(e,m) \
;enum { ASSERT_CONCAT(static_assert_, __COUNTER__) = 1/(int)(!!(e)) }
#else
/* This can't be used twice on the same line so ensure if using in headers
* that the headers are not included twice (by wrapping in #ifndef...#endif)
* Note it doesn't cause an issue when used on same line of separate modules
* compiled with gcc -combine -fwhole-program. */
#define STATIC_ASSERT(e,m) \
;enum { ASSERT_CONCAT(assert_line_, __LINE__) = 1/(int)(!!(e)) }
#endif
Call a function at a given absolute address
Using a nice typedef
(SO):
typedef int func(void);
func* f = (func*)0xdeadbeef;
int i = f();
Avoiding the assignment:
typedef int func(void);
int i = ((func*)0xdeadbeef)();
All in one line:
int i = ((int (*)(void))0xdeadbeef)();
Complex or unusual type-casting
- Arrays
uint8_t A[16]; // an array - type is uint8_t (*)[16] - sizeof(A) == 16
uint8_t (*pA)[16] = &A; // a pointer to an array - sizeof(*pA) == 16
#define pX (uint8_t (*)[16])0x123456 // ... same, but temporary - sizeof(*pX) == 16
typedef uint8_t A_t[16];
A_t A1 = { 19, 23 }; // an array - sizeof(A1) == 16
A_t *pA1 = &A1; // a pointer to array - sizeof(*pA1) == 16
#define pX1 (A_t *)0x123456 // ... same, but temporary - sizeof(*pX1) == 16
- Multi-dimensional arrays
// Multi-dimensional arrays
uint8_t array[20][8]; // array is uint8_t (*)[8], so a pointer to uint8_t[8]
uint8_t (*p)[8];
p = array; // CORRECT!
Deal with misaligned data in a portable way
The recommended solution is to use memcpy
[5]:
_Bool check_ip_header_sum (const char * p, size_t size)
{
uint32_t temp;
uint64_t sum = 0;
memcpy (&temp, p, 4); sum += temp;
memcpy (&temp, p + 4, 4); sum += temp;
memcpy (&temp, p + 8, 4); sum += temp;
memcpy (&temp, p + 12, 4); sum += temp;
memcpy (&temp, p + 16, 4); sum += temp;
for (size_t i = 20; i < size; i+= 4) {
memcpy (&temp, p + i, 4);
sum += temp;
}
do {
sum = (sum & 0xFFFF) + (sum >> 16);
} while (sum & ~0xFFFFL);
return sum == 0xFFFF;
}
Using C99 compound literals
C99 constant literal are useful to build local constants without the need to create a variable for them. For instance, from SO:
if (setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &(int){ 1 }, sizeof(int)) < 0)
error("setsockopt(SO_REUSEADDR) failed");
We use the compound literal (int){ 1 }
to create a constant variable and then return its address. The alternative would be to define a variable:
int enable = 1;
if (setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(enable)) < 0)
error("setsockopt(SO_REUSEADDR) failed");
Parse integer
Use strtol
:
char * endptr;
port = strtol(optarg, &endptr, 10); // *endptr == '\x00' if all bytes parsed
if (errno || *endptr || port > 65535) {
fprintf("Invalid port number %s.\n", optarg);
}
Handy macros
#define NUM(A) ( sizeof(A) / sizeof((A)[0]) ) // Number of elements in an array
String stream
Use fmemopen
or open_memstream
from stdio.h [6], [7]
#include <stdio.h>
static char buffer[] = "foobar";
int
main (void)
{
int ch;
FILE *stream;
stream = fmemopen (buffer, strlen (buffer), "r");
while ((ch = fgetc (stream)) != EOF)
printf ("Got %c\n", ch);
fclose (stream);
return 0;
}
Concatenate static arrays
- Reference
- I published this tip on SO.
The macros:
#include <string.h>
#define cat(z, a) *((uint8_t *)memcpy(&(z), &(a), sizeof(a)) + sizeof(a))
#define cat1(z, a) cat((z),(a))
#define cat2(z, a, b) cat1(cat((z),(a)),b)
#define cat3(z, a, b...) cat2(cat((z),(a)),b)
#define cat4(z, a, b...) cat3(cat((z),(a)),b)
#define cat5(z, a, b...) cat4(cat((z),(a)),b)
// ... add more as necessary
#define catn(n, z, a ...) (&cat ## n((z), a) - (uint8_t *)&(z)) // Returns total length
Example of use:
char One[1] = { 0x11 };
char Two[2] = { 0x22, 0x22 };
char Three[3] = { 0x33, 0x33, 0x33 };
char Four[4] = { 0x44, 0x44, 0x44, 0x44 };
char All[10];
unsigned nAll = catn(4, All, One, Two, Three, Four);
The macros can concatenate any type of objects:
char One = 0x11; // A byte
char Two[2] = { 0x22, 0x22 }; // An array of two byte
char Three[] = "33"; // A string ! 3rd byte = '\x00'
struct {
char a[2];
short b;
} Four = { .a = { 0x44, 0x44}, .b = 0x4444 }; // A structure
void * Eight = &One; // A 64-bit pointer
char All[18];
unsigned nAll = catn(5, All, One, Two, Three, Four, Eight);
Using C99 constant literals, one can also concatenate constants, function result, or constant arrays:
cat2(All,(char){0x11},(unsigned){some_fct()},((uint8_t[4]){1,2,3,4}));
Generate random
Simpler: use srand()
and srand()
[8]
#include <time.h>
#include <stdlib.h>
srand(time(NULL)); // Initialization, should only be called once.
int r = rand(); // Returns a pseudo-random integer between 0 and RAND_MAX.
More powerful: use random()
and srandom()
from stdlib.h [9].
#include <time.h>
#include <stdlib.h>
srandom(time(NULL)); // Initialization, should only be called once.
int r = random(); // Returns a pseudo-random integer between 0 and RAND_MAX.
Nuke non-ascii characters in source file
I've seen gdb refusing to locate source line because source file contained non-ascii character. So here a script to nuke them all.
find "$@" -name "*.[chsS]" -print0 | LANG=C LC_ALL=C xargs -0 sed -ri $'
s/(\xef\xbf\xbd)/(c)/g; # F*cking copyright char
s/(\xef\xbb\xbf)//g; # UTF-8 header
s/\xc3\xab/e/g; # For me especially
s/\xe2\x89\xa0/!=/g; # A different !=
s/(\xe2\x80\x99|(\xc2)?\xb4)/\x27/g; # Different ways to apostrophe things
s/(\xc2)?\xb5/u/g; # A micro with huge effect on gdb
s_\xc2\xb1_+/-_g; # More or less +/-
s/(\xc2)?\xa9/(c)/g; # Another m*therf*cking copynotsoright char
s/\xb2/2/g; # A square, does it look like a square?
s/\xe9/e/g; # Letter e with funny thing above
s/\x92/\x27/g; # catapostrophe
'
Get high-precision timestamps
Note: maybe using cpu performance counters may achieve higher precision:
- Linux
#include <time.h>
struct timespec res, tp;
clock_getres(CLOCK_MONOTONIC, &res); // On Linux, 1ns resolution
clock_gettime(CLOCK_MONOTONIC, &tp); // On Linux, ~50ns call duration
clock_gettime(CLOCK_MONOTONIC_COARSE, &tp); // On Linux, ~10ns call duration, ~ms resolution
clock_gettime(CLOCK_REALTIME, &tp); // On Linux, ~25ns call duration
clock_gettime(CLOCK_REALTIME_COARSE, &tp); // On Linux, ~10ns call duration, ~ms resolution
- Windows
#if defined(WIN32)
// https://stackoverflow.com/questions/5404277/porting-clock-gettime-to-windows
// Works with clang, both in -m32 or -m64
// Resolution: 100ns, one call ~ 30ns
#include <windows.h>
struct timespec {
long tv_sec;
long tv_nsec;
}; // header part
#define CLOCK_REALTIME 0 // unused
#define CLOCK_REALTIME_COARSE 0
#define CLOCK_MONOTONIC 0
#define CLOCK_MONOTONIC_COARSE 0
int clock_gettime(int t, struct timespec *spec) // C-file part
{
(void)t;
__int64 wintime;
GetSystemTimeAsFileTime((FILETIME *)&wintime);
wintime -= (__int64)116444736000000000; // 1jan1601 to 1jan1970
spec->tv_sec = wintime / (__int64)10000000; // seconds
spec->tv_nsec = wintime % (__int64)10000000 * (__int64)100; // nano-seconds
return 0;
}
#else
#include <time.h>
#endif
❗ | Note that clock_gettime is also defined in libwinpthread-1.dll on MSYS2. If using pthread library, make sure to add -lpthread after object files in the link command
|
Get program name from argv[0] / basename in C
From SO:
#include <libgen.h> // Available on Windows/MSYS2 and Linux
const char *proName = basename(argv[0]);
Statement-as-expression for safer macros
Statement-as-expressions are a gnu extension that allows turning a sequence of statement into a valid expression [10].
Example:
// Avoid evaluating a and b twice:
#define maxint(a,b) \
({int _a = (a), _b = (b); _a > _b ? _a : _b; })
// Add static assert to some construction
#define SafeRandom(target) ({ _Static_assert( sizeof(target) != sizeof(void*), “accidental pointer!” ); Random( &(target), sizeof(target) ); })
Safe cyclic shifts (rotate) without undefined behaviour
Reference: https://stackoverflow.com/questions/776508/best-practices-for-circular-shift-rotate-operations-in-c
// Clang
#include <stdint.h> // for uint32_t
#include <limits.h> // for CHAR_BIT
// #define NDEBUG
#include <assert.h>
__attribute__((unused))
static inline uint32_t rotl32 (uint32_t n, unsigned int c)
{
const unsigned int mask = (CHAR_BIT*sizeof(n) - 1); // assumes width is a power of 2.
// assert ( (c<=mask) &&"rotate by type width or more");
c &= mask;
return (n<<c) | (n>>( (-c)&mask ));
}
__attribute__((unused))
static inline uint32_t rotr32 (uint32_t n, unsigned int c)
{
const unsigned int mask = (CHAR_BIT*sizeof(n) - 1);
// assert ( (c<=mask) &&"rotate by type width or more");
c &= mask;
return (n>>c) | (n<<( (-c)&mask ));
}
// MSVC
_rotl(n,c);
_rotr(n,c);
Use of _Generic
_Generic
is a new construction introduced in C11, but it has some limitation
- See https://www.chiark.greenend.org.uk/~sgtatham/quasiblog/c11-generic/
- See HN comments for more workarounds:
define string_length(x) _Generic(x, \
const char * : strlen((const char*)(const void*)x), \
struct MyStringBuffer * : ((const MyStringBuffer*)(const void*)x)->length)
Pits
Multiple definition of uninitialized variables
By default, gcc allow multiple definition of uninitialized variables! This is really EVIL. Solutions:
References:
|
Let's consider foo.c int foo;
bar.c int bar=0;
and main.c int foo;
int bar=0;
int main(int argc, char** argv)
{
return foo+bar;
}
We have the following: $ gcc main.c bar.c
# /tmp/ccjIc7Is.o:(.bss+0x0): multiple definition of `bar'
# /tmp/ccFys0wm.o:(.bss+0x0): first defined here
# collect2: ld returned 1 exit status
gcc main.c foo.c
# No error !!!
gcc does not report any error for duplicate definitions of uninitialized variables. We can force it as follows: gcc -fno-common main.c foo.c
# /tmp/ccnMOLlf.o:(.bss+0x0): multiple definition of `foo'
# /tmp/cc5YeNxE.o:(.bss+0x0): first defined here
# collect2: ld returned 1 exit status
gcc -Wl,--warn-common main.c foo.c
# /tmp/ccm9Z06a.o: warning: multiple common of `foo'
# /tmp/ccJIfrYq.o: warning: previous common is here
|
Undefined reference
Undefined reference with gcc although library is given gcc -lm mymath.c -o mymath /tmp/ccI4NbJJ.o: In function `V': mymath.c:(.text+0x235): undefined reference to `pow' |
The solution is to have the -lm at the end. Order does make a difference (at least since gcc 4.6). gcc mymath.c -o mymath -lm |
No sequence point in assignment operator
The following code is wrong [11]:
uint32_t
swaphalves(uint32_t a)
{
a = (a >>= 16) | (a <<= 16);
return a;
}
There is no sequence point here, and so we don't know anything about the order of operations here. Of course the code can be easily fixed:
uint32_t
swaphalves(uint32_t a)
{
return (a >> 16) | (a << 16);
}
More on sequence points [12], [13], [14] (! links for C++, there might be difference with C).
Strict Aliasing
See [15]
Addr-of array, &a
int a[4];
int *p;
p=&a+2; // WRONG - ACCESS (void *)a + 2*sizeof(a)
p=a+2; // CORRECT - ACCESS (void *)a + 2*sizeof(int)
p=&a[2]; // CORRECT
*p=1;
Overflows and standard libraries
Make sure to avoid overflow when using standard libraries.
- sscanf — "%ns" writes n+1 bytes
- The maximum field width n does not include the terminating NUL character. So
sscanf
will write n+1 bytes in buffer.
- fgets — fgets(..., size, ...) writes at most size bytes
fgets(line,sizeof(line),...)
is always safe.- fgets writes at most size bytes, the last one being always the terminating NUL character.
Use __attribute__
at wrong location
__attribute__((packed))
must be used BEFORE the struct keyword (at least in Clang 11.x):
struct {
__attribute__((packed)) struct { // OK
uint8_t a;
uint16_t b;
};
struct {
uint8_t c;
uint16_t d;
} __attribute__((packed)); // BAD and NO WARNING
}
Strangely it can occur anywhere for a mere field:
struct {
__attribute__((packed)) uint32_t a; // OK
uint32_t __attribute__((packed)) b; // OK
uint32_t c __attribute__((packed)); // OK
}
Linking libraries before object files
When linking, it is strongly recommended to list libraries after the built objects.
myapp: $(OBJECTS) $(LIBS)
$(LD) $(filter-out -Xlinker -l%,$(LDFLAGS)) -o $@ $(OBJECTS) $(LIBS) $(filter -Xlinker -l%,$(LDFLAGS))
Notes:
- Add option
-v
to see linker command when using gcc/clang. - See also this post for more information on libraries automatically linked in (
gcc -dumpspecs
).
Undefined behaviour
Typical construction that leads to undefined behaviour:
// Rotations - Undefined behaviour for n=0 or n>31
// See tips above for safe implementation
#define rotr(v,n) (((v) << (32-(n))) | ((v) >> (n)))
How-To
First we must compile with Position-Independent Code (PIC):
gcc -c -Wall -Werror -fpic foo.c
We use -shared
to generate a dynamically loaded library:
gcc -shared -o libfoo.so foo.o
To link with the library, we must usually gives the location of the library:
$ gcc -Wall -o test main.c -lfoo
# /usr/bin/ld: cannot find -lfoo
# collect2: ld returned 1 exit status
gcc -L/home/username/foo -Wall -o test main.c -lfoo
Likewise, we must tell the location of the library at runtime using LD_LIBRARY_PATH
:
./test
# ./test: error while loading shared libraries: libfoo.so: cannot open shared object file: No such file or directory
LD_LIBRARY_PATH=/home/username/foo:$LD_LIBRARY_PATH ./test
We use rpath
at compilation-time to hardcode the library path and avoids the use of LD_LIBRARY_PATH
(but also the cost of system configuration flexibility):
gcc -L/home/username/foo -Wl,-rpath=/home/username/foo -Wall -o test main.c -lfoo
./test
The best option is to make the library available system-wide using ldconfig
:
cp /home/username/foo/libfoo.so /usr/lib
chmod 0755 /usr/lib/libfoo.so
ldconfig
ldconfig -p | grep foo
gcc -Wall -o test main.c -lfoo # We don't need -L ... anymore
ldd test | grep foo
# libfoo.so => /usr/lib/libfoo.so (0x00a42000)
./test # We don't need LD_LIBRARY_PATH
# This is a shared library test...
# Hello, I'm a shared library
We can build Windows DLL with:
- MinGW gcc on Windows (MSYS/MinGW).
- MinGW gcc from package mingw-w64 on Debian / Ubuntu.
The recipe is similar to building a library on Linux. We only need to export or import explicitly each DLL entries using __declspec
statements [16].
Content of example_dll.cpp (see http://www.mingw.org/wiki/sampledll for more export examples):
<
#define EXPORT_DLL
#include "example_dll.h"
int Double(int x)
{
return 2 * x;
}
Content of example_dll.h:
#ifndef EXAMPLE_DLL_H
#define EXAMPLE_DLL_H
#if defined(EXPORT_DLL)
#define DLL_DECL __declspec(dllexport)
#else
#define DLL_DECL __declspec(dllimport)
#endif
extern "C" int DLL_DECL Double(int x);
#endif // EXAMPLE_DLL_H
To build the DLL:
g++ -c example_dll.cpp
g++ -shared -o example_dll.dll example_dll.o # We build for the MINGW toolchain only
Note: If need to use the DLL with another toolchain than MinGW, we must add -Wl,--out-implib,libexample_dll.a
to the linker call [17].
To use our DLL, content of example_exe.cpp:
#include <stdio.h>
#include "example_dll.h"
int main(void)
{
printf("%d\n", Double(333));
return 0;
}
We build our example:
g++ -c example_exe.cpp
g++ -o example_exe.exe example_exe.o -L. -lexample_dll
- Building / linking without the __declspec declarations
- Use
-no-undefined
at build time to export all functions into the DLL. - Use
--enable-runtime-pseudo-reloc
to import all functions automaticaly.
Execute in data region / Write in code region on Linux
Say we have some encrypted code that we only want to decrypt when we execute it (eg. when presenting a valid password).
Doing this requires that either we write in a code region (typ. .text
), and then execute the new code, or that we write in data region (eg. .stack
or .bss
), and then execute that code.
Without further preparation, the first will segfault because writing in code region is forbidden in Linux, and the latter will fail because executing in the data region is forbidden.
Things we tried:
- Works — edit the ELF file.
- Fails — Use
objcopy --writable-text
(use tip above instead.). - Works — Change page protection at runtime using
mprotect
. - Works — Make stack executable (several methods).
- Fails — Use flag
--omagic
(or-Wl,--omagic
) on clang. - Fails — Create a custom ld script (we can inject a section in .text, but it is not writable or executable).
- References
Use mprotect
- https://stackoverflow.com/questions/38113551/can-i-execute-code-that-resides-in-data-segment-elf-binary
- https://stackoverflow.com/questions/8794555/how-can-i-make-gcc-compile-the-text-section-as-writable-in-an-elf-binary
- Probably the best method.
- We only need to make sure that the given address is aligned to a page (or calls fail.)
mprotect
interferes with GDB (at least Mozilla rr). Better call it outside any region where debugging might happen.
#if defined(__amd64) || defined(i386)
extern int my_array[];
#define MY_ARRAY_LEN 16384
/* Make sure the decryption buffer is set as read-write-exec */
if (mprotect((void *)((uintptr_t)my_array & ~4095),MY_ARRAY_LEN+4096,PROT_READ|PROT_WRITE|PROT_EXEC) != 0) {
exit(1);
};
#endif
Mark .stack executable
On Linux, marking the stack executable has a also as side effect to mark the whole heap as executable. So if the object to execute is in the heap, we will be able to write and execute it.
- Using execstack
- Use package execstack:
execstack -s my_elf
objdump -x my_elf # Check that .stack is rwx
- Link in at least one assembly file (.S)
- Using clang, assembling a .S file (even an empty one), and linking that file in the application makes the
.stack
section rwx.
clang ... myfile.S
# ...
ld ... myfile.o -o my_elf
objdump -x my_elf # Check that .stack is rwx
Mark all .text section writable by editing ELF file
This is same as previous tip, but on the .text
section.
In principle this would be done with something like:
objcopy --writable-text my_elf # Does not work for some (all?) ELF format
But this fails on recent Linux. Instead, we use the small program listed on StackOverflow answer [18]:
// Source: https://stackoverflow.com/questions/21638871/objcopy-writable-text-not-making-elf-binary-text-section-writable/44993123
//
// Make .text section writable in ELF file.
//
// Usage: ./objcopy-writable-text <ELF_FILE>
#include <stdlib.h>
#include <stdio.h>
#include <elf.h>
int main(int argc, char** argv)
{
if (argc <= 1) return -1;
FILE* fp = fopen(argv[1], "r+");
Elf64_Ehdr teh;
fread(&teh, sizeof(teh), 1, fp);
fseek(fp, 0, SEEK_SET);
if (teh.e_ident[EI_CLASS] == ELFCLASS64) {
Elf64_Ehdr eh;
fread(&eh, sizeof(eh), 1, fp);
Elf64_Phdr* ph = malloc(eh.e_phnum * eh.e_phentsize);
Elf64_Shdr* sh = malloc(eh.e_shnum * eh.e_shentsize);
fseek(fp, eh.e_phoff, SEEK_SET);
fread(ph, eh.e_phentsize, eh.e_phnum, fp);
fseek(fp, eh.e_shoff, SEEK_SET);
fread(sh, eh.e_shentsize, eh.e_shnum, fp);
for (int i = 0; i < eh.e_phnum; i++) {
if (ph[i].p_vaddr <= eh.e_entry && ph[i].p_vaddr + ph[i].p_memsz > eh.e_entry) {
fseek(fp, eh.e_phoff + i * eh.e_phentsize + (unsigned int)&((Elf64_Phdr*)0)->p_flags, SEEK_SET);
ph[i].p_flags |= PF_W;
fwrite(&ph[i].p_flags, sizeof(ph[i].p_flags), 1, fp);
}
}
for (int i = 0; i < eh.e_shnum; i++) {
if (sh[i].sh_addr <= eh.e_entry && sh[i].sh_addr + sh[i].sh_size > eh.e_entry) {
fseek(fp, eh.e_shoff + i * eh.e_shentsize + (unsigned int)&((Elf64_Shdr*)0)->sh_flags, SEEK_SET);
sh[i].sh_flags |= SHF_WRITE;
fwrite(&sh[i].sh_flags, sizeof(sh[i].sh_flags), 1, fp);
}
}
free(ph);
free(sh);
} else {
Elf32_Ehdr eh;
fread(&eh, sizeof(eh), 1, fp);
Elf32_Phdr* ph = malloc(eh.e_phnum * eh.e_phentsize);
Elf32_Shdr* sh = malloc(eh.e_shnum * eh.e_shentsize);
fseek(fp, eh.e_phoff, SEEK_SET);
fread(ph, eh.e_phentsize, eh.e_phnum, fp);
fseek(fp, eh.e_shoff, SEEK_SET);
fread(sh, eh.e_shentsize, eh.e_shnum, fp);
for (int i = 0; i < eh.e_phnum; i++) {
if (ph[i].p_vaddr <= eh.e_entry && ph[i].p_vaddr + ph[i].p_memsz > eh.e_entry) {
fseek(fp, eh.e_phoff + i * eh.e_phentsize + (unsigned int)&((Elf32_Phdr*)0)->p_flags, SEEK_SET);
ph[i].p_flags |= PF_W;
fwrite(&ph[i].p_flags, sizeof(ph[i].p_flags), 1, fp);
}
}
for (int i = 0; i < eh.e_shnum; i++) {
if (sh[i].sh_addr <= eh.e_entry && sh[i].sh_addr + sh[i].sh_size > eh.e_entry) {
fseek(fp, eh.e_shoff + i * eh.e_shentsize + (unsigned int)&((Elf32_Shdr*)0)->sh_flags, SEEK_SET);
sh[i].sh_flags |= SHF_WRITE;
fwrite(&sh[i].sh_flags, sizeof(sh[i].sh_flags), 1, fp);
}
}
free(ph);
free(sh);
}
fflush(fp);
fclose(fp);
return 0;
}
Then, we also need to move our data array into .text section. One way to do it is to inject our own section into the default linker script:
/* References:
* - https://stackoverflow.com/questions/6877922/injecting-sections-into-gnu-ld-script-script-compatibility-between-versions-of
* - https://stackoverflow.com/questions/38113551/can-i-execute-code-that-resides-in-data-segment-elf-binary
*/
SECTIONS
{
.text.execdata :
{
*(SORT_BY_NAME(.execdata*))
}
}
INSERT AFTER .text;
Then we can allocate our array in that section like:
__attribute__((section(".text.execdata")))
uint64_t my_array[8192/8] __attribute__((aligned(16))); // functions are aligned 16 in ELF32/64
Make deterministic / reproducible builds
- Reference: https://blog.conan.io/2019/09/02/Deterministic-builds-with-C-C++.html
- With Clang, see Clang.
Remove (strip) unused or dead code from binary (gcc/clang)
See [19].
- Compile all sources at once and pass flag
-fwhole-program
.
g++ -O2 -march=native -fwhole-program deadcode.cpp -o deadcode
- Use linker garbage collection, with flags
-ffunction-sections
(-fdata-sections
may also be needed) and-Wl,--gc-sections
. Warning for C++: this may affect static initializers, etc.
g++ -O2 -march=native -ffunction-sections -Wl,--gc-sections deadcode.cpp -o deadcode
- Use Link-Time Optimization with flag
-flto
. LTO sees lot of improvements but some says it slows down build a lot, or changes a lot the final binary.
g++ -O2 -march=native -flto deadcode.cpp -o deadcode
Build smaller binaries (C/C++)
See http://ptspts.blogspot.com/2013/12/how-to-make-smaller-c-and-c-binaries.html
Examples
Simple trace / logging
Here a simple trace header file trace.h. Simply include it in any C file to get trace / logging capability.
#ifndef __TRACE_H__
#define __TRACE_H__
#include <stdio.h>
/**
* Convert an array of char into a list of hexadecimal values.
* For instance, "\xca\xfe" is converted into "cafe".
* ############################################################################################################
*/
static uint32_t _char2hex(char *s, const void *data, size_t data_n, size_t n)
{
uint32_t o = 0;
const uint8_t *data8 = data;
#define out(f, a ...) { \
o += (uint32_t)snprintf(s + o, n - (uint32_t)o, f, ## a); \
o = o <= n ? o : n; \
}
if (n > 0) {
s[0] = 0;
}
for (size_t i = 0; i < data_n; i++) {
out("%.2x", data8[i]);
}
return o;
#undef out
}
#define trace(x, n) xtrace(__FILE__, __LINE__, __FUNCTION__, # x, &(x), n)
#define safetrace(x) xtrace(__FILE__, __LINE__, __FUNCTION__, # x, &(x), sizeof(x))
static void xtrace(const char *file, uint32_t line, const char *fct, const char *s, const void *data, uint32_t n)
{
char buf[2 * n + 1];
uint32_t o = _char2hex(buf, data, n, sizeof(buf));
assert(o < sizeof(buf)); // Check if given buffer was big enough
printf("%s:%s:%d: %s = %s\n", fct, file, line, s, buf);
fflush(stdout);
}
#endif
Example of use:
#include "trace.h"
void foo(int a, int * A, int n)
{
safetrace(a);
trace(A,n);
}
Libraries
Standard libraries
- ctype.h
- Provides character classification functions like
isdigit
,islower
,isupper
,isalpha
...
- err.h
- Provides standard error reporting functions like
err
,errx
... These replaces the need for customdie(int errcode, const char * fmt, ...)
functions.
if ( bind(sockfd, (struct sockaddr *)&server, sizeof(server)) < 0 ) {
err(1, "bind failed");
}
- unistd.h
- Provides
getopt
for parsing of command-line parameters (see C examples).
P99
P99 is not a regular library but instead a set of include files that offer powerful preprocessor macros (like P99_DUPL(...)
, P99_NARG(...)). From the website:
P99 is not a C library in the classical sense but merely a collection of include files:
- There is no binary library to be linked to your executable. The few functions that are provided are small wrappers that are compiled directly into your code.
- There is nothing to configure, P99 include files should work out of the box with any conforming C99 compiler.
Math
libcpucycles
- libcpucycles is a public-domain microlibrary for counting CPU cycles.
- See https://cpucycles.cr.yp.to/
Debugging
See general page on Debugging.
Testing / validation
- C Bounded Model Checker
- UBSAN / ASAN
- See clang.
Embedded C
Some tips to make embedded development with C.
Enum are not always int
Some compilers do not necessarily map enum types to int. The standard says it is platform-dependent. Some compiler may use (or be configured to use) the smallest date type that can hold the value of all enumerators.
To prevent strange behaviour, always enforce the enum size as follows:
typedef enum MyEnum_t {
first = 1,
second = 2,
_forceint = 0x7FFFFFFF // To force 32-bit (signed) enum
} MyEnum_t;
Using packed enum
Some compiler supports storing enum using the smallest data type (signed or unsigned):
- Command-line options
- On gcc, use option
--short-enums
or-fshort-enums
:
gcc --short-enums -c file.c
gcc -fshort-enums -c file.c
- On clang, use option
-fshort-enums
:
clang -fshort-enums -c file.c
- ARM compilers have similar options
- Source code
- gcc and alike: Use pragma
__attribute__((packed))
typedef enum {a,b} __atribute__((packed)) letters;
- VC: Support only packed structure
#pragma pack(push,1)
typedef struct {
letters x;
}
#pragma pack(pop)
Set section types in C file
This does NOT work with Clang at least
Section types can easily be set in assembly files (*.S. We can do the same in C files using a little hack [20]:
unsigned int __attribute__((section(".myVarSection,\"aw\",@nobits#"))) myVar;
This says that the variable myVar
must be allocated in a custom sections, with a nobits
attribute (like .bss
section).
Thanks to the terminating #
, the attribute type added by the compiler is commented:
.section .myVarSection,"aw",@nobits#,"aw",@progbits
Make reproducible builds
Reproducible builds allows for always generating the same binaries as the result of compilation. The binaries include object files, libraries, ELF files.
Some tips:
- Use flag
D
withar
:
[D] - use zero for timestamps and uids/gids (default)
Portability
Detect platform and compiler
To get the fill of all the predefined macros that the compiler uses:
# CLANG - list of all the predefined macros that the compiler uses
clang -dM -E -x c /dev/null
Win32 vs Linux
Linux | Win32 | Comment |
---|---|---|
snprintf |
_snprintf |
deprecated, replaced by _snprintf_s but not compatible
|
Cryptography
Compute AES-GCM using libgcrypt
// Inspired from: https://stackoverflow.com/questions/21882115/aes-gcm-using-libgcrypt-api-in-c
uint8_t key[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, };
uint8_t iv [12] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, };
/* uint8_t assoc[2] = { 0x11, 0x22, }; */
uint8_t plain[4] = { 0xaa, 0xbb, 0xcc, 0xdd, };
uint8_t cipher[4];
uint8_t tag[16];
gcry_cipher_hd_t hd;
int algo = gcry_cipher_map_name("aes128");
gcry_cipher_open(&hd, algo, GCRY_CIPHER_MODE_GCM, 0);
gcry_cipher_setkey(hd, key, sizeof(key));
gcry_cipher_setiv(hd, iv, sizeof(iv));
// gcry_cipher_authenticate (hd, assoc , sizeof(assoc) )
gcry_cipher_encrypt(hd, cipher, sizeof(cipher), plain, sizeof(plain));
gcry_cipher_gettag (hd, tag, sizeof(tag) );
gcry_cipher_close(hd);