This commit is contained in:
xenia 2020-09-07 01:31:24 -04:00
parent 9905e167a1
commit ac1ea37c54
2 changed files with 739 additions and 7 deletions

695
examples/elf.ksy Normal file
View File

@ -0,0 +1,695 @@
meta:
id: elf
title: Executable and Linkable Format
application: SVR4 ABI and up, many *nix systems
license: CC0-1.0
ks-version: 0.8
doc-ref: https://sourceware.org/git/?p=glibc.git;a=blob;f=elf/elf.h;hb=HEAD
seq:
- id: magic
-orig-id: e_ident[EI_MAG0]..e[EI_MAG3]
size: 4
contents: [0x7f, "ELF"]
doc: File identification, must be 0x7f + "ELF".
- id: bits
-orig-id: e_ident[EI_CLASS]
type: u1
enum: bits
doc: |
File class: designates target machine word size (32 or 64
bits). The size of many integer fields in this format will
depend on this setting.
- id: endian
-orig-id: e_ident[EI_DATA]
type: u1
enum: endian
doc: Endianness used for all integers.
- id: ei_version
-orig-id: e_ident[EI_VERSION]
type: u1
doc: ELF header version.
- id: abi
-orig-id: e_ident[EI_OSABI]
type: u1
enum: os_abi
doc: |
Specifies which OS- and ABI-related extensions will be used
in this ELF file.
- id: abi_version
type: u1
doc: |
Version of ABI targeted by this ELF file. Interpretation
depends on `abi` attribute.
- id: pad
size: 7
- id: header
type: endian_elf
types:
phdr_type_flags:
params:
- id: value
type: u4
instances:
read:
value: value & 0x04 != 0
write:
value: value & 0x02 != 0
execute:
value: value & 0x01 != 0
mask_proc:
value: value & 0xf0000000 != 0
section_header_flags:
params:
- id: value
type: u4
instances:
write:
value: value & 0x01 != 0
doc: "writable"
alloc:
value: value & 0x02 != 0
doc: "occupies memory during execution"
exec_instr:
value: value & 0x04 != 0
doc: "executable"
merge:
value: value & 0x10 != 0
doc: "might be merged"
strings:
value: value & 0x20 != 0
doc: "contains nul-terminated strings"
info_link:
value: value & 0x40 != 0
doc: "'sh_info' contains SHT index"
link_order:
value: value & 0x80 != 0
doc: "preserve order after combining"
os_non_conforming:
value: value & 0x100 != 0
doc: "non-standard OS specific handling required"
group:
value: value & 0x200 != 0
doc: "section is member of a group"
tls:
value: value & 0x400 != 0
doc: "section hold thread-local data"
ordered:
value: value & 0x04000000 != 0
doc: "special ordering requirement (Solaris)"
exclude:
value: value & 0x08000000 != 0
doc: "section is excluded unless referenced or allocated (Solaris)"
mask_os:
value: value & 0x0ff00000 != 0
doc: "OS-specific"
mask_proc:
value: value & 0xf0000000 != 0
doc: "Processor-specific"
dt_flag_1_values:
params:
- id: value
type: u4
instances:
now:
value: value & 0x00000001 != 0
doc: "Set RTLD_NOW for this object."
rtld_global:
value: value & 0x00000002 != 0
doc: "Set RTLD_GLOBAL for this object."
group:
value: value & 0x00000004 != 0
doc: "Set RTLD_GROUP for this object."
nodelete:
value: value & 0x00000008 != 0
doc: "Set RTLD_NODELETE for this object."
loadfltr:
value: value & 0x00000010 != 0
doc: "Trigger filtee loading at runtime."
initfirst:
value: value & 0x00000020 != 0
doc: "Set RTLD_INITFIRST for this object"
noopen:
value: value & 0x00000040 != 0
doc: "Set RTLD_NOOPEN for this object."
origin:
value: value & 0x00000080 != 0
doc: "$ORIGIN must be handled."
direct:
value: value & 0x00000100 != 0
doc: "Direct binding enabled."
trans:
value: value & 0x00000200 != 0
interpose:
value: value & 0x00000400 != 0
doc: "Object is used to interpose."
nodeflib:
value: value & 0x00000800 != 0
doc: "Ignore default lib search path."
nodump:
value: value & 0x00001000 != 0
doc: "Object can't be dldump'ed."
confalt:
value: value & 0x00002000 != 0
doc: "Configuration alternative created."
endfiltee:
value: value & 0x00004000 != 0
doc: "Filtee terminates filters search."
dispreldne:
value: value & 0x00008000 != 0
doc: "Disp reloc applied at build time."
disprelpnd:
value: value & 0x00010000 != 0
doc: "Disp reloc applied at run-time."
nodirect:
value: value & 0x00020000 != 0
doc: "Object has no-direct binding."
ignmuldef:
value: value & 0x00040000 != 0
noksyms:
value: value & 0x00080000 != 0
nohdr:
value: value & 0x00100000 != 0
edited:
value: value & 0x00200000 != 0
doc: "Object is modified after built."
noreloc:
value: value & 0x00400000 != 0
symintpose:
value: value & 0x00800000 != 0
doc: "Object has individual interposers."
globaudit:
value: value & 0x01000000 != 0
doc: "Global auditing required."
singleton:
value: value & 0x02000000 != 0
doc: "Singleton symbols are used."
stub:
value: value & 0x04000000 != 0
pie:
value: value & 0x08000000 != 0
endian_elf:
meta:
endian:
switch-on: _root.endian
cases:
'endian::le': le
'endian::be': be
seq:
- id: e_type
type: u2
enum: obj_type
- id: machine
type: u2
enum: machine
- id: e_version
type: u4
# e_entry
- id: entry_point
type:
switch-on: _root.bits
cases:
'bits::b32': u4
'bits::b64': u8
# e_phoff
- id: program_header_offset
type:
switch-on: _root.bits
cases:
'bits::b32': u4
'bits::b64': u8
# e_shoff
- id: section_header_offset
type:
switch-on: _root.bits
cases:
'bits::b32': u4
'bits::b64': u8
# e_flags
- id: flags
size: 4
# e_ehsize
- id: e_ehsize
type: u2
# e_phentsize
- id: program_header_entry_size
type: u2
# e_phnum
- id: qty_program_header
type: u2
# e_shentsize
- id: section_header_entry_size
type: u2
# e_shnum
- id: qty_section_header
type: u2
# e_shstrndx
- id: section_names_idx
type: u2
types:
# Elf(32|64)_Phdr
program_header:
seq:
# p_type
- id: type
type: u4
enum: ph_type
# p_flags
- id: flags64
type: u4
if: _root.bits == bits::b64
# p_offset
- id: offset
type:
switch-on: _root.bits
cases:
'bits::b32': u4
'bits::b64': u8
# p_vaddr
- id: vaddr
type:
switch-on: _root.bits
cases:
'bits::b32': u4
'bits::b64': u8
# p_paddr
- id: paddr
type:
switch-on: _root.bits
cases:
'bits::b32': u4
'bits::b64': u8
# p_filesz
- id: filesz
type:
switch-on: _root.bits
cases:
'bits::b32': u4
'bits::b64': u8
# p_memsz
- id: memsz
type:
switch-on: _root.bits
cases:
'bits::b32': u4
'bits::b64': u8
# p_flags
- id: flags32
type: u4
if: _root.bits == bits::b32
# p_align
- id: align
type:
switch-on: _root.bits
cases:
'bits::b32': u4
'bits::b64': u8
instances:
dynamic:
io: _root._io
pos: offset
type: dynamic_section
size: filesz
if: type == ph_type::dynamic
flags_obj:
type: phdr_type_flags(flags64|flags32)
-webide-parse-mode: eager
-webide-representation: "{type} - f:{flags_obj:flags} (o:{offset}, s:{filesz:dec})"
section_header:
-orig-id: Elf(32|64)_Shdr
seq:
- id: ofs_name
-orig-id: sh_name
type: u4
- id: type
-orig-id: sh_type
type: u4
enum: sh_type
- id: flags
-orig-id: sh_flags
type:
switch-on: _root.bits
cases:
'bits::b32': u4
'bits::b64': u8
- id: addr
-orig-id: sh_addr
type:
switch-on: _root.bits
cases:
'bits::b32': u4
'bits::b64': u8
- id: ofs_body
-orig-id: sh_offset
type:
switch-on: _root.bits
cases:
'bits::b32': u4
'bits::b64': u8
- id: len_body
-orig-id: sh_size
type:
switch-on: _root.bits
cases:
'bits::b32': u4
'bits::b64': u8
- id: linked_section_idx
-orig-id: sh_link
type: u4
- id: info
-orig-id: sh_info
size: 4
- id: align
-orig-id: sh_addralign
type:
switch-on: _root.bits
cases:
'bits::b32': u4
'bits::b64': u8
- id: entry_size
-orig-id: sh_entsize
type:
switch-on: _root.bits
cases:
'bits::b32': u4
'bits::b64': u8
instances:
body:
io: _root._io
pos: ofs_body
size: len_body
type:
switch-on: type
cases:
'sh_type::dynamic': dynamic_section
'sh_type::strtab': strings_struct
'sh_type::dynsym': dynsym_section
'sh_type::dynstr': strings_struct
name:
io: _root.header.strings._io
pos: ofs_name
type: strz
encoding: ASCII
-webide-parse-mode: eager
flags_obj:
type: section_header_flags(flags)
-webide-parse-mode: eager
-webide-representation: "{name} ({type}) - f:{flags_obj:flags} (o:{offset}, s:{size:dec})"
strings_struct:
seq:
- id: entries
type: strz
repeat: eos
encoding: ASCII
dynamic_section:
seq:
- id: entries
type: dynamic_section_entry
repeat: eos
dynamic_section_entry:
seq:
- id: tag
type:
switch-on: _root.bits
cases:
'bits::b32': u4
'bits::b64': u8
- id: value_or_ptr
type:
switch-on: _root.bits
cases:
'bits::b32': u4
'bits::b64': u8
instances:
tag_enum:
value: tag
enum: dynamic_array_tags
flag_1_values:
type: dt_flag_1_values(value_or_ptr)
if: "tag_enum == dynamic_array_tags::flags_1"
-webide-parse-mode: eager
-webide-representation: "{tag_enum}: {value_or_ptr} {flag_1_values:flags}"
dynsym_section:
seq:
- id: entries
type:
switch-on: _root.bits
cases:
'bits::b32': dynsym_section_entry32
'bits::b64': dynsym_section_entry64
repeat: eos
dynsym_section_entry32:
seq:
- id: name_offset
type: u4
- id: value
type: u4
- id: size
type: u4
- id: info
type: u1
- id: other
type: u1
- id: shndx
type: u2
dynsym_section_entry64:
seq:
- id: name_offset
type: u4
- id: info
type: u1
- id: other
type: u1
- id: shndx
type: u2
- id: value
type: u8
- id: size
type: u8
instances:
program_headers:
pos: program_header_offset
repeat: expr
repeat-expr: qty_program_header
size: program_header_entry_size
type: program_header
section_headers:
pos: section_header_offset
repeat: expr
repeat-expr: qty_section_header
size: section_header_entry_size
type: section_header
strings:
pos: section_headers[section_names_idx].ofs_body
size: section_headers[section_names_idx].len_body
type: strings_struct
enums:
# EI_CLASS
bits:
# ELFCLASS32
1: b32
# ELFCLASS64
2: b64
# EI_DATA
endian:
# ELFDATA2LSB
1: le
# ELFDATA2MSB
2: be
os_abi:
0: system_v
1: hp_ux
2: netbsd
3: gnu
6: solaris
7: aix
8: irix
9: freebsd
0xa: tru64 # Compaq TRU64 UNIX
0xb: modesto # Novell Modesto
0xc: openbsd
0xd: openvms
0xe: nsk # Hewlett-Packard Non-Stop Kernel
0xf: aros # Amiga Research OS
0x10: fenixos # The FenixOS highly scalable multi-core OS
0x11: cloudabi # Nuxi CloudABI
0x12: openvos # Stratus Technologies OpenVOS
# e_type
obj_type:
# ET_REL
1: relocatable
# ET_EXEC
2: executable
# ET_DYN
3: shared
# ET_CORE
4: core
machine:
0x00: not_set
# EM_SPARC
0x02: sparc
# EM_386
0x03: x86
0x08: mips
0x14: powerpc
# EM_ARM
0x28: arm
# EM_SH
0x2A: superh
0x32: ia_64
# EM_X86_64
0x3E: x86_64
0xB7: aarch64
0xF3: riscv
0xF7: bpf
ph_type:
0: null_type
1: load
2: dynamic
3: interp
4: note
5: shlib
6: phdr
7: tls
# 0x60000000: loos
0x65041580: pax_flags
0x6fffffff: hios
# 0x70000000: loproc
0x70000001: arm_exidx
# 0x7fffffff: hiproc
0x6474e550: gnu_eh_frame
0x6474e551: gnu_stack
0x6474e552: gnu_relro
# http://docs.oracle.com/cd/E23824_01/html/819-0690/chapter6-94076.html#chapter6-73445
sh_type:
0: null_type
1: progbits
2: symtab
3: strtab
4: rela
5: hash
6: dynamic
7: note
8: nobits
9: rel
10: shlib
11: dynsym
14: init_array
15: fini_array
16: preinit_array
17: group
18: symtab_shndx
# 0x60000000: loos
# 0x6fffffef: losunw
0x6fffffef: sunw_capchain
0x6ffffff0: sunw_capinfo
0x6ffffff1: sunw_symsort
0x6ffffff2: sunw_tlssort
0x6ffffff3: sunw_ldynsym
0x6ffffff4: sunw_dof
0x6ffffff5: sunw_cap
0x6ffffff6: sunw_signature
0x6ffffff7: sunw_annotate
0x6ffffff8: sunw_debugstr
0x6ffffff9: sunw_debug
0x6ffffffa: sunw_move
0x6ffffffb: sunw_comdat
0x6ffffffc: sunw_syminfo
0x6ffffffd: sunw_verdef
0x6ffffffe: sunw_verneed
0x6fffffff: sunw_versym
# 0x6fffffff: HISUNW
# 0x6fffffff: hios
# 0x70000000: loproc
0x70000000: sparc_gotdata
0x70000001: amd64_unwind
# 0x70000001: arm_exidx
0x70000002: arm_preemptmap
0x70000003: arm_attributes
# 0x7fffffff: hiproc
# 0x80000000: louser
# 0xffffffff: hiuser
dynamic_array_tags:
0: "null" # Marks end of dynamic section
1: needed # Name of needed library
2: pltrelsz # Size in bytes of PLT relocs
3: pltgot # Processor defined value
4: hash # Address of symbol hash table
5: strtab # Address of string table
6: symtab # Address of symbol table
7: rela # Address of Rela relocs
8: relasz # Total size of Rela relocs
9: relaent # Size of one Rela reloc
10: strsz # Size of string table
11: syment # Size of one symbol table entry
12: init # Address of init function
13: fini # Address of termination function
14: soname # Name of shared object
15: rpath # Library search path (deprecated)
16: symbolic # Start symbol search here
17: rel # Address of Rel relocs
18: relsz # Total size of Rel relocs
19: relent # Size of one Rel reloc
20: pltrel # Type of reloc in PLT
21: debug # For debugging; unspecified
22: textrel # Reloc might modify .text
23: jmprel # Address of PLT relocs
24: bind_now # Process relocations of object
25: init_array # Array with addresses of init fct
26: fini_array # Array with addresses of fini fct
27: init_arraysz # Size in bytes of DT_INIT_ARRAY
28: fini_arraysz # Size in bytes of DT_FINI_ARRAY
29: runpath # Library search path
30: flags # Flags for the object being loaded
32: preinit_array # Array with addresses of preinit fct
33: preinit_arraysz # Size in bytes of DT_PREINIT_ARRAY
34: maxpostags # Number used
0x6000000d: sunw_auxiliary
0x6000000e: sunw_filter
0x60000010: sunw_cap
0x60000011: sunw_symtab
0x60000012: sunw_symsz
0x60000013: sunw_sortent
0x60000014: sunw_symsort
0x60000015: sunw_symsortsz
0x60000016: sunw_tlssort
0x60000017: sunw_tlssortsz
0x60000018: sunw_capinfo
0x60000019: sunw_strpad
0x6000001a: sunw_capchain
0x6000001b: sunw_ldmach
0x6000001d: sunw_capchainent
0x6000001f: sunw_capchainsz
0x6ffffdf5: gnu_prelinked # Prelinking timestamp
0x6ffffdf6: gnu_conflictsz # Size of conflict section
0x6ffffdf7: gnu_liblistsz # Size of library list
0x6ffffdf8: checksum
0x6ffffdf9: pltpadsz
0x6ffffdfa: moveent
0x6ffffdfb: movesz
0x6ffffdfc: feature_1 # Feature selection (DTF_*).
0x6ffffdfd: posflag_1 # Flags for DT_* entries, effecting the following DT_* entry.
0x6ffffdfe: syminsz # Size of syminfo table (in bytes)
0x6ffffdff: syminent # Entry size of syminfo
0x6ffffef5: gnu_hash
0x6ffffef6: tlsdesc_plt
0x6ffffef7: tlsdesc_got
0x6ffffef8: gnu_conflict
0x6ffffef9: gnu_liblist
0x6ffffefa: config
0x6ffffefb: depaudit
0x6ffffefc: audit
0x6ffffefd: pltpad
0x6ffffefe: movetab
0x6ffffeff: syminfo
0x6ffffff0: versym
0x6ffffff9: relacount
0x6ffffffa: relcount
0x6ffffffb: flags_1
0x6ffffffc: verdef
0x6ffffffd: verdefnum
0x6ffffffe: verneed
0x6fffffff: verneednum
0x70000001: sparc_register
0x7ffffffd: auxiliary
0x7ffffffe: used
0x7fffffff: filter

View File

@ -6,8 +6,13 @@
;; meow
;; utility to concat symbol stuff
(define-for-syntax (sym+ . args)
(define (->string a) (if (symbol? a) (symbol->string a) a))
(string->symbol (apply string-append (map ->string args))))
;; makes a syntax error raiser
(define-for-syntax (rse msg)
(define-for-syntax (make-rse msg)
(lambda () (raise-syntax-error #f msg)))
;; converts a ksy id to a racket id
@ -15,18 +20,48 @@
(define-for-syntax (kaitai-str->sym str)
(string->symbol (regexp-replace* #px"_" str "-")))
;; returns either the given id or meta.id
;; otherwise raises syntax error
(define-for-syntax (get-id top given-id)
(kaitai-str->sym
(or given-id (hash-ref (hash-ref top "meta" (hash)) "id"
(make-rse "type has no meta.id and none was provided")))))
;; helper for types meta lookup
(define-for-syntax (get-id-seq top given-id)
(define id-sym (get-id top given-id))
(define seq (hash-ref top "seq" (make-rse "form must have a seq element")))
(values id-sym seq))
;; generates struct definitions
(define-for-syntax (gen-struct top [given-id #f])
(define id (or given-id (hash-ref (hash-ref top "meta" (hash)) "id"
(rse "type has no meta.id and none was provided"))))
(define id-sym (kaitai-str->sym id))
(define seq (hash-ref top "seq" (rse "form must have a seq element")))
(define-values [id-sym seq] (get-id-seq top given-id))
(define instances (hash-ref top "instances" (hash)))
#`((provide [struct-out #,id-sym])
(struct #,id-sym
[#,@(map (lambda (a) (kaitai-str->sym (hash-ref a "id" (rse "seq element has no id"))))
[#,@(map (lambda (a) (kaitai-str->sym (hash-ref a "id" (make-rse "seq element has no id"))))
seq)] #:transparent)))
;; generates parser function
(define-for-syntax (gen-parser top [given-id #f])
(define-values [id-sym seq] (get-id-seq top given-id))
#`((define (,(sym+ "parse-" id-sym) input)
(void))
(provide ,(sym+ "parse-" id-sym))))
;; runtime lib
(module runtime racket/base
(provide (all-defined-out))
(define (kaitai:->binaryio thing)
;; TODO
thing)
(define (kaitai:uwu)
(displayln "uwu")))
(require 'runtime)
(provide (all-from-out 'runtime))
;; language infrastructure
(define-syntax (kaitai:module-begin stx)
@ -34,7 +69,9 @@
(define top-struct (gen-struct (syntax->datum body)))
; (displayln top-struct)
#`(#%module-begin
#,@top-struct))
#,@top-struct
(module+ main
(kaitai:uwu))))
(module reader syntax/module-reader
#:read kaitai-read