Self-reproducing program

From miki
Jump to navigation Jump to search

This is my attempt at writing the smallest self-reproducible code in the C language (as inspired by Ken Thompson's paper).

Unless otherwise stated the code can be tested with the following command (assuming the code is in file called a.c):

$ cat a.c; echo "====="; gcc -o a a.c; ./a>b.c; echo "====="; cat b.c; echo "====="; gcc -o b b.c; ./b>b.c; echo "====="; cat b.c

1st - 437 bytes - Using a Quoting Function

The idea is to use a quoting function so that the content of the string is quoted back when printed.

char *PGM="char *PGM=%c%s%c;\n#include <stdio.h>\n\nchar qd[10000];\nchar* quote(char* p) { char *q=qd; while(*p) if(*p==10) {*(q++)=92; *(q++)='n'; p++;} else *(q++)=*(p++); *q=0; return qd; }\nmain() {printf(PGM,34,quote(PGM),34);}\n";
#include <stdio.h>
char qd[10000];
char* quote(char* p) { char *q=qd; while(*p) if(*p==10) {*(q++)=92; *(q++)='n'; p++;} else *(q++)=*(p++); *q=0; return qd; }
main() {printf(PGM,34,quote(PGM),34);}

2nd - 288 bytes - Getting rid of the quoting function

We get rid of the the quoting function by using separate printf to generate the preprocessor lines, and we use the stringize macro operator

#include <stdio.h>
#define z(x) #x
char *S="; main() {printf(z(#include <stdio.h>%c),10);printf(z(#define z(x) #x%c),10);printf(z(char *S=%c%s%c%s%c),34,S,34,S,10);}"; main() {printf(z(#include <stdio.h>%c),10);printf(z(#define z(x) #x%c),10);printf(z(char *S=%c%s%c%s%c),34,S,34,S,10);}

3rd - 256 bytes - Merging in one printf

#include <stdio.h>
#define z(x) #x
char*S=";main(){printf(z(%s%c%s%cchar*S=%c%s%c%s%c),z(#include <stdio.h>),10,z(#define z(x) x),10,34,S,34,S,10);}";main(){printf(z(%s%c%s%cchar*S=%c%s%c%s%c),z(#include <stdio.h>),10,z(#define z(x) #x),10,34,S,34,S,10);}

4th - 227 bytes - Merging further

#include<stdio.h>
#define z(x)#x
char*S=";main(){printf(z(#include<stdio.h>%c#define z(x)#x%cchar*S=%c%s%c%s%c),10,10,34,S,34,S,10);}";main(){printf(z(#include<stdio.h>%c#define z(x)#x%cchar*S=%c%s%c%s%c),10,10,34,S,34,S,10);}

5th - 220 byte - More of stringize

#include <stdio.h>
#define z(x) #x
char*S=z(;main(){printf(z(#include <stdio.h>%c#define z(x) #x%cchar*S=z(%s)%s%c),10,10,S,S,10);});main(){printf(z(#include <stdio.h>%c#define z(x) #x%cchar*S=z(%s)%s%c),10,10,S,S,10);}

6th - 193 byte - Factoring string in a macro

The idea is to factorize the duplicated string as a macro that can be either used along with the stringizer (but we need an intermedia macro y(x) so that macro is replaced first - because x in z(x) is not expanded since x is prefix-ed with the stringizer operator #), or that can be directly executed.

#include<stdio.h>
#define z(x)#x
#define y(x)z(x)
#define w main(){printf("#include<stdio.h>%c#define z(x)#x%c#define y(x)z(x)%c#define w %s%cchar*S=y(w);w%c",10,10,10,S,10,10);}
char*S=y(w);w

7th - 178 byte - Inline \n character

Actually the stringizer operator is automatically replaced by their quoted equivalent so we don't need our printf("...%c...",10) trick

#include<stdio.h>
#define z(x)#x
#define y(x)z(x)
#define w main(){printf("#include<stdio.h>\n#define z(x)#x\n#define y(x)z(x)\n#define w %s\nchar*S=y(w);w\n",S);}
char*S=y(w);w

8th - 141 byte - We don't need explicit stdio.h

The code can still be compiled if we remove the #include<stdio.h>

#define z(x)#x
#define y(x)z(x)
#define w main(){printf("#define z(x)#x\n#define y(x)z(x)\n#define w %s\nchar*S=y(w);w\n",S);}
char*S=y(w);w

9th - 125 byte - Simplified

Idea is to go back to basic and remove as much as possible preprocessor directive, which takes valuable space. We deal with the newline using our %c trick.

#define z(x)#x
char*S=z(%s%cchar*S=z(%s);main(){printf(S,z(#define z(x)#x),10,S);});main(){printf(S,z(#define z(x)#x),10,S);}