• Designing a Hex Dump Filter—Using Successive Refinement

    Define the Program


    Starting with Pseudo-Code


            Read a char from the input file

            convert the character to hex string

            Write the hex string to the output file

            Repeat until done.


            Read a character from standard input (stdin)

            Apart the 8-bit character into two nybbles, change all of them to hex character, and then put them together

            Witer the hex string to standard output (stdout)

            Repeat until done.

            Exit the program by calling sys_exit.


            Read a character from standard input (stdin)

            Test if we have reached End Of File (EOF)

            If we have reached EOF, we’re done, so jump to exit

            Apart the 8-bit character into two nybbles, change all of them to hex character, and then put them together

            Witer the hex string to standard output (stdout)

            Go back and read another character.

            Exit the program by calling sys_exit.


    Read: Set up registers for the sys_read kernel call.
              Call sys_read to read from stdin.
              Test for EOF.
              If we are at EOF, jump to Exit

              Apart the 8-bit character into two nybbles
              Change all of them into Hex character
              Put the two Hex char together

    Write: Set up registers for the sys_write kernel call.
               Call sys_write to write stdout.
               Jump back to Read and get another character

    Exit:    Set up registers for termianting the program via sys_exit.
               Call sys_exit.

    5、进一步细化如何把nybble转换成Hex character

               Set a looking up table for mapping Decimal to Hex:            Digits “0123456789ABCDEF”
               Set a output table for putting two Hex char together:        HexStr “ 00”

    Read:  Set up registers for the sys_read kernel call.
               Call sys_read to read from stdin.
               Test for EOF.
               If we are at EOF, jump to Exit

               Apart the 8-bit character into two nybbles
               For each nybble, the decimal value of this nybble plus the address of Digits, this byte in the Digits is the Hex character
               Put the Least Significant Hex in [HexStr+2].
               Put the Most Significant Hex in [HexStr+1].

    Write: Set up registers for the sys_write kernel call.
               Call sys_write to write stdout.
               Jump back to Read and get another character

    Exit:    Set up registers for termianting the program via sys_exit.
               Call sys_exit.


    ; Executable name	: myhexdump1
    ; Version		: 1.0
    ; Created data		: 3/22/2011
    ; Last update		: 3/22/2001
    ; Author		: Eric Wang
    ; Description		: A simple program in assembly for Linux, using NASM 2.05,
    ;	Demonstrating the conversion of binary values to hexadecial strings.
    ;	It acts as a very simple hex dump utility for files.
    ; Run it this way:
    ;	myhexdump1 < (input file)
    ; Build using these commands:
    ;	nasm -f elf -g -F dwarf myhexdump1.asm
    ;	ld -o myhexdump1 myhexdump1.o
    section .bss			; Section containing uninitialized data
    	Buff:	resb 1		; Text buffer itself
    section .data			; Section containing initialized data
    	Digits: db "0123456789ABCDEF"
    	HexStr: db " 00"
    	HEXLEN: equ $-HexStr
    section .text			; Section containing code
    	global _start		; Linker needs this to find the entry point!
    	nop			; This no-op keeps gdb happy...
    ; Read from stdin
    	mov eax,3		; Specify sys_read call
    	mov ebx,0		; Specify File Descriptor 0: Standard Input
    	mov ecx,Buff		; Pass address of the buffer to read to
    	mov edx,1		; Tell sys_read to read one char from stdin
    	int 80h			; Call sys_read
    	cmp eax,0		; If eax==0, sys_read reached EOF on stdin
    	je Done			; Jump If Equal (to 0, from compare)
    ; Apart the 8-bit char into two nybbles
    	xor eax,eax			; Clear eax to 0
    	mov al,byte [Buff]		; Put the char into eax register, for low nybble
    	mov ebx,eax			; For high nybble
    	and al,0fh			; mask the high 4-bits in this character
    	mov al,byte [Digits+eax]	; Look up table for finding the Hex char
    	mov byte [HexStr+2],al		; Put the Hex char into HexStr at least significant place
    	shr bl,4			; Shift the high 4-bits to the low 4-bits
    	mov bl,byte [Digits+ebx]	; Loop up table for finding the Hex char
    	mov byte [HexStr+1],bl		; Put the Hex char into HexStr at most significant place
    	mov eax,4		; Specify sys_write call
    	mov ebx,1		; Specify File Descriptor 1: Standard Output
    	mov ecx,HexStr		; Pass address of the character to write
    	mov edx,HEXLEN		; Pass number of characters to write
    	int 80h			; Call sys_write
    	jmp Read		; Jump back to Read and get another character
    ; All done!
    	mov eax,1		; Specify sys_exit call
    	mov ebx,0		; Return code of zero to Linux
    	int 80h			; Call sys_exit

    6、Buffered file scan

               Set a 16 bytes buffer.          

               Set a looking up table for mapping Decimal to Hex:            Digits “0123456789ABCDEF”
               Set a output table for putting two Hex char together:        HexStr “ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00”,10

    Read:  Set up registers for the sys_read kernel call.
               Call sys_read to read a buffer full of characters from stdin.
               Test for EOF.
               If we are at EOF, jump to Exit

               Set up registers as a pointer to scan the buffer
    Scan:  Apart the 8-bit character at buffer pointer into two nybbles
               For each nybble, the decimal value of this nybble plus the address of Digits, this byte in the Digits is the Hex character
               Put the Least Significant Hex in [HexStr+bufferPointer*3+2].
               Put the Most Significant Hex in [HexStr+bufferPointer*3+1].
               Increment buffer pointer.
               If we still have characters in the buffer, jump to Scan.

    Write: Set up registers for the sys_write kernel call.
               Call sys_write to write the processed HexStr to stdout.
               Jump back to Read and get another buffer full of characters.

    Exit:    Set up registers for termianting the program via sys_exit.
               Call sys_exit.

    7、Start talking specifics: which register do what:

               Set a 16 bytes buffer.          

               Set a looking up table for mapping Decimal to Hex:            Digits: db “0123456789ABCDEF”
               Set a output table for putting two Hex char together:        HexStr: db “ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00”,10

    Read:  Set up registers for the sys_read kernel call.
               Call sys_read to read a buffer full of characters from stdin.
               Store the number of characters read in esi
               Test for EOF (eax==0).
               If we are at EOF, jump to Exit.

               Set ebp to 0 to point to the first character address in buffer
    Scan:  Move the byte at [Buff+ebp] to al
               Copy the value of eax to ebx (Then the low 4-bits in al and the high 4-bits in bl are two nybbles)
               Mask the al with 0Fh, plus this value with the address of Digits, this byte in the Digits is the Least Significant Hex character
               Shift right for 4 bits of the bl, plus this value with the Digits’ address, this byte in Digits is the Most Significant Hex character
               Put the Least Significant Hex in [HexStr+bufferPointer*3+2].
               Put the Most Significant Hex in [HexStr+bufferPointer*3+1].
               Increment buffer pointer.
               If ebp < esi, jump to Scan.

    Write: Set up registers for the sys_write kernel call.
               Call sys_write to write the processed HexStr to stdout.
               Jump back to Read and get another buffer full of characters.

    Exit:    Set up registers for termianting the program via sys_exit.
               Call sys_exit.


    ; Executable name	: myhexdump2
    ; Version		: 2.0
    ; Created data		: 3/22/2011
    ; Last update		: 3/22/2001
    ; Author		: Eric Wang
    ; Description		: A simple program in assembly for Linux, using NASM 2.05,
    ;	Demonstrating the conversion of binary values to hexadecial strings.
    ;	It acts as a very simple hex dump utility for files with Buffer Scanning.
    ; Run it this way:
    ;	myhexdump2 < (input file)
    ; Build using these commands:
    ;	nasm -f elf -g -F dwarf myhexdump2.asm
    ;	ld -o myhexdump2 myhexdump2.o
    section .bss			; Section containing uninitialized data
    	BUFFLEN: equ 16		; Our buffer lenth is 16
    	Buff:	resb BUFFLEN	; Text buffer itself, resb may means "reset to byte"
    section .data			; Section containing initialized data
    	Digits: db "0123456789ABCDEF"
    	HexStr: db " 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00",10
    	HEXLEN: equ $-HexStr
    section .text			; Section containing code
    	global _start		; Linker needs this to find the entry point!
    	nop			; This no-op keeps gdb happy...
    ; Read from stdin
    	mov eax,3		; Specify sys_read call
    	mov ebx,0		; Specify File Descriptor 0: Standard Input
    	mov ecx,Buff		; Pass address of the buffer to read to
    	mov edx,BUFFLEN		; Tell sys_read to read BUFFLEN characters from stdin
    	int 80h			; Call sys_read
    	mov esi,eax		; Store the number of characters read in esi
    	cmp eax,0		; If eax==0, sys_read reached EOF on stdin
    	je Done			; Jump If Equal (to 0, from compare)
    	xor ebp,ebp		; set ebp to 0 to point to the first char in buffer
    ; Apart the 8-bit char into two nybbles
    	xor eax,eax			; Clear eax to 0
    	mov al,byte [Buff+ebp]		; Put the char into eax register, for low nybble
    	mov ebx,eax			; For high nybble
    	and al,0fh			; mask the high 4-bits in this character
    	mov al,byte [Digits+eax]	; Look up table for finding the Hex char
    	mov ecx,ebp			; The first step for X3. ecx = ebp
    	shl ecx,1			; ecx = ecx * 2, so ecx = ebp * 2
    	add ecx,ebp			; ecx = ebp + ecx, so ecx = ebp * 3
    	mov byte [HexStr+ecx+2],al	; Put the Hex char into HexStr at least significant place
    	shr bl,4			; Shift the high 4-bits to the low 4-bits
    	mov bl,byte [Digits+ebx]	; Loop up table for finding the Hex char
    	mov ecx,ebp			; The first step for X3. ecx = ebp
    	shl ecx,1			; ecx = ecx * 2, so ecx = ebp * 2
    	add ecx,ebp			; ecx = ebp + ecx, so ecx = ebp * 3
    	mov byte [HexStr+ecx+1],bl	; Put the Hex char into HexStr at most significant place
    	inc ebp				; increse buffer pointer
    	cmp ebp,esi			; Compare ebp and esi
    	jb Scan				; Jump if Below (if ebp < esi, jump to Scan)
    	mov eax,4		; Specify sys_write call
    	mov ebx,1		; Specify File Descriptor 1: Standard Output
    	mov ecx,HexStr		; Pass address of the character to write
    	mov edx,HEXLEN		; Pass number of characters to write
    	int 80h			; Call sys_write
    	jmp Read		; Jump back to Read and get another character
    ; All done!
    	mov eax,1		; Specify sys_exit call
    	mov ebx,0		; Return code of zero to Linux
    	int 80h			; Call sys_exit


    	mov eax,4		; Specify sys_write call
    	mov ebx,1		; Specify File Descriptor 1: Standard Output
    	mov ecx,HexStr		; Pass address of the character to write
    	cmp esi,BUFFLEN		; For the last read.
    	je Normal		; If esi = HEXLEN, this is not the last read, and the buffer is full.
    	mov edx,esi		; The real characters in the buffer
    	shl edx,1		; edx = esi * 2
    	add edx,esi		; edx = esi * 3
    	mov byte [HexStr+edx],10	; New line
    	inc edx			; edx plus one
    	jmp Call		; Do not mov edx,HEXLEN
    Normal:	mov edx,HEXLEN		; Pass number of characters to write
    Call:	int 80h			; Call sys_write
    	jmp Read		; Jump back to Read and get another character
