Upgrade to Pro — share decks privately, control downloads, hide ads and more …

StaticScript - TypeScript compiler on top of TypeScript as frontend and LLVM as backend #1

StaticScript - TypeScript compiler on top of TypeScript as frontend and LLVM as backend #1

Это первый доклад из серии докладов которые будут посвящены разработке компилятора “TypeScript” используя LLVM в качестве backend, и TypeScript в качестве frontend.

Dmitry Patsura

August 01, 2018
Tweet

More Decks by Dmitry Patsura

Other Decks in Programming

Transcript

  1. Сегодня в программе Теория о компиляторах Немного об LLVM Пишем

    программы LLVM IR Пишем компилятор Горячие закуски Выпивка +4
  2. +5 TypeScript как frontend LLVM Null terminate String Про JS

    будет, но чуток позжее Runtime Library Кодогенерация libstdc libstdc++ Линковшики
  3. *.cpp *.h Let’s take an example CLang Executable/Binary preprocessor Compile

    *.cpp -> *.o Linking file1.o file2.o Немножко про компиляторы A
  4. +10 LLVM - Low Level Virtual Machine The LLVM Project

    is a collection of modular and reusable compiler and toolchain technologies. Despite its name, LLVM has little to do with traditional virtual machines. The name "LLVM" itself is not an acronym; it is the full name of the project. Немножко про компиляторы T
  5. +11 Почему LLVM? LLVM IR (optimize, cfg, cfa) Clang (C/C++)

    Fortran etc… X86 PowerPC ARM Frontend Backend Немножко про компиляторы Ж
  6. +12 LLVM Language Reference Manual https://llvm.org/docs/LangRef.html LLVM IR - LLVM

    intermediate representation lllvm-llc LLVM IR Obj Немножко про компиляторы И
  7. +13 Каждый уважающий себя программист хочет сделать свой собственный компилятор.

    Мечты сбываются! LLVM — важный шаг, позволяющий избежать велосипедостроения. (C) @gridem Немножко про компиляторы В
  8. +16 int main() { return 0; } program that cannot

    do anything clang -S -emit-llvm main.cpp Генерация кода LLVM IR
  9. ; ModuleID = 'main.cpp' source_filename = "main.cpp" target datalayout =

    "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.14.0" ; Function Attrs: noinline norecurse nounwind optnone ssp uwtable define i32 @main() #0 { %1 = alloca i32, align 4 store i32 0, i32* %1, align 4 ret i32 0 } attributes #0 = { noinline norecurse nounwind optnone ssp uwtable «…..» } !llvm.module.flags = !{!0, !1} !llvm.ident = !{!2} !0 = !{i32 1, !"wchar_size", i32 4} !1 = !{i32 7, !"PIC Level", i32 2} !2 = !{!"Apple LLVM version 10.0.0 (clang-1000.11.45.2)"}
  10. +18 ; ModuleID = 'main.cpp' source_filename = "main.cpp" target datalayout

    = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.14.0" Генерация кода LLVM IR
  11. +19 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" https://llvm.org/docs/LangRef.html#data-layout Specifies that the target

    lays out data in little-endian form. e m:o : Mach-O mangling: Private symbols get L prefix. Other symbols get a _ prefix. https://llvm.org/devmtg/2014-10/Slides/Prashanth-DLO.pdf Implementing Data Layout Optimizations in the LLVM Framework Генерация кода LLVM IR
  12. +20 ; Function Attrs: noinline … define i32 @main() #0

    { %1 = alloca i32, align 4 store i32 0, i32* %1, align 4 ret i32 0 } clang -S -emit-llvm main.cpp Генерация кода LLVM IR
  13. +21 ; Function Attrs: norecurse nounwind readnone ssp uwtable define

    i32 @main() local_unnamed_addr #0 { ret i32 0 } clang -S -emit-llvm main.cpp -O3 Генерация кода LLVM IR
  14. .section __TEXT,__text,regular,pure_instructions .macosx_version_min 10, 14 .globl _main ## -- Begin

    function main .p2align 4, 0x90 _main: ## @main .cfi_startproc ## %bb.0: pushq %rbp .cfi_def_cfa_offset 16 .cfi_offset %rbp, -16 movq %rsp, %rbp .cfi_def_cfa_register %rbp xorl %eax, %eax popq %rbp retq .cfi_endproc llc main.ll Генерация кода LLVM IR
  15. +23 gcc main.s -o main && objdump -d main ovr@MBP-Dmitry

     ~/projects/ovr/hlvm   master • ?  gcc main.s -o main && objdump -d main  2 ↵  8.11.1 ⬢ main: file format Mach-O 64-bit x86-64 Disassembly of section __TEXT,__text: __text: 100000fb0: 55 pushq %rbp 100000fb1: 48 89 e5 movq %rsp, %rbp 100000fb4: 31 c0 xorl %eax, %eax 100000fb6: 5d popq %rbp 100000fb7: c3 retq _main: 100000fb0: 55 pushq %rbp 100000fb1: 48 89 e5 movq %rsp, %rbp 100000fb4: 31 c0 xorl %eax, %eax 100000fb6: 5d popq %rbp 100000fb7: c3 retq Генерация кода LLVM IR
  16. +25 int main() { int a = 5; int b

    = 6; return a + b; } clang -S -emit-llvm main.cpp -O0 Генерация кода LLVM IR
  17. define i32 @main() #0 { %1 = alloca i32, align

    4 %2 = alloca i32, align 4 %3 = alloca i32, align 4 store i32 0, i32* %1, align 4 store i32 5, i32* %2, align 4 store i32 6, i32* %3, align 4 %4 = load i32, i32* %2, align 4 %5 = load i32, i32* %3, align 4 %6 = add nsw i32 %4, %5 ret i32 %6 }
  18. +27 %1 = alloca i32, align 4 %2 = alloca

    i32, align 4 %3 = alloca i32, align 4 Static Single Assignment (SSA) The ‘alloca’ instruction allocates memory on the stack frame of the currently executing function, to be automatically released when this function returns to its caller. The object is always allocated in the address space for allocas indicated in the datalayout . <result> = alloca [inalloca] <type> [, <ty> <NumElements>] [, align <alignment>] [, addrspace(<num>)] Генерация кода LLVM IR
  19. Сыграем в игру? define i32 @main() #0 { %1 =

    alloca i32, align 4 %2 = alloca i32, align 4 %3 = alloca i32, align 4 store i32 0, i32* %1, align 4 store i32 5, i32* %2, align 4 store i32 6, i32* %3, align 4 %4 = load i32, i32* %2, align 4 %5 = load i32, i32* %3, align 4 %6 = add nsw i32 %4, %5 ret i32 %6 }
  20. define i32 @main() #0 { %1 = alloca i32, align

    4 %2 = alloca i32, align 4 %3 = alloca i32, align 4 store i32 0, i32* %1, align 4 store i32 5, i32* %2, align 4 store i32 6, i32* %3, align 4 %4 = load i32, i32* %2, align 4 %5 = load i32, i32* %3, align 4 %6 = add nsw i32 %4, %5 ret i32 %6 }
  21. define i32 @main() #0 { %1 = alloca i32, align

    4 %2 = alloca i32, align 4 %3 = alloca i32, align 4 store i32 0, i32* %1, align 4 store i32 5, i32* %2, align 4 store i32 6, i32* %3, align 4 %4 = load i32, i32* %2, align 4 %5 = load i32, i32* %3, align 4 %6 = add nsw i32 %4, %5 ret i32 %6 }
  22. define i32 @main() #0 { %1 = alloca i32, align

    4 %2 = alloca i32, align 4 %3 = alloca i32, align 4 store i32 0, i32* %1, align 4 store i32 5, i32* %2, align 4 store i32 6, i32* %3, align 4 %4 = load i32, i32* %2, align 4 %5 = load i32, i32* %3, align 4 %6 = add nsw i32 %4, %5 ret i32 %6 }
  23. define i32 @main() #0 { %a.0 = alloca i32, align

    4 %b.0 = alloca i32, align 4 store i32 5, i32* %a.0, align 4 store i32 6, i32* %b.0, align 4 %a.1 = load i32, i32* %a.0, align 4 %b.1 = load i32, i32* %b.0, align 4 %c.0 = add nsw i32 %a.1, %b.1 ret i32 %c.0 }
  24. define i32 @main() #0 { %a.0 = alloca i32, align

    4 %b.0 = alloca i32, align 4 store i32 5, i32* %a.0, align 4 store i32 6, i32* %b.0, align 4 %a.1 = load i32, i32* %a.0, align 4 %b.1 = load i32, i32* %b.0, align 4 %c.0 = add nsw i32 %a.1, %b.1 ret i32 %c.0 }
  25. define i32 @main() #0 { %a.0 = alloca i32, align

    4 %b.0 = alloca i32, align 4 store i32 5, i32* %a.0, align 4 store i32 6, i32* %b.0, align 4 %a.1 = load i32, i32* %a.0, align 4 %b.1 = load i32, i32* %b.0, align 4 %c.0 = add nsw i32 %a.1, %b.1 ret i32 %c.0 }
  26. +39 Зачем в C/C++ заголовочные файлы? #include <stdio.h> int main()

    { puts("Hello"); return 0; } main.cpp main.o Main compiler linker
  27. +40 clang -S -emit-llvm main.cpp -O0 declare i32 @puts(i8*) #1

    @.str = private unnamed_addr constant [6 x i8] c"Hello\00" define i32 @main() #0 { %1 = getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i32 0, i32 0) %2 = call i32 @puts(i8* %1) ret i32 0 } Генерация кода LLVM IR
  28. +41 @.str = private unnamed_addr constant [6 x i8] c"Hello\00"

    [6 x i8] c"Hello\00" [6 x i8] c"12345\00" Null-terminated string / С string Генерация кода LLVM IR
  29. +42 H E L L O \0 const char* str

    = *str *(str+1) *(str+2) *(str+3) *(str+4) *(str+5) How it works int puts(const str *)? Генерация кода LLVM IR
  30. +46 import * as ts from 'typescript'; const options =

    { lib: [], types: [] }; const files = ['sandbox/do-simple-math.ts']; const host = ts.createCompilerHost(options); const program = ts.createProgram(files, options, host); Компилятор - Frontend
  31. +47 Let’s use TypeScript diagnostics const diagnostics = ts.getPreEmitDiagnostics(program); if

    (diagnostics.length) { diagnostics.forEach(diagnostic => { const message = ts.flattenDiagnosticMessageText(diagnostic.messageText, '\n'); if (!diagnostic.file) { console.log(message); return; } const { line, character } = diagnostic.file.getLineAndCharacterOfPosition(diagnostic.start!); console.log(`${diagnostic.file.fileName} (${line + 1},${character + 1}): ${message}`); }); process.exit(1); } Компилятор - Frontend
  32. ovr@MBP-Dmitry  ~hlvm   master  node ./build/cli.js 

    SIGINT(2) ↵ Cannot find global type 'Array'. Cannot find global type 'Boolean'. Cannot find global type 'Function'. Cannot find global type 'IArguments'. Cannot find global type 'Number'. Cannot find global type 'Object'. Cannot find global type 'RegExp'. Cannot find global type 'String'. sandbox/do-simple-math.ts (3,5): Cannot find name 'puts'.
  33. const options = { lib: [ path.join(__dirname, '..', 'language.d.ts') ],

    types: [] }; interface Boolean {} interface Function {} interface IArguments {} interface Number {} interface Object {} interface RegExp {} interface String {} interface Array<T = any> {} declare function puts(str: string): void;
  34. const llvmContext = new llvm.LLVMContext(); const llvmModule = new llvm.Module("test",

    this.llvmContext); const block = llvm.BasicBlock.create(llvmContext, "Entry", mainFn); const builder = new llvm.IRBuilder(block); const mainFnType = llvm.FunctionType.get( llvm.Type.getVoidTy(llvmContext), false ); const mainFn = llvm.Function.create( mainFnType, LinkageTypes.ExternalLinkage, "main", llvmModule ); import * as llvm from 'llvm-node';
  35. export function passStatement(stmt: ts.Statement, ctx: Context, builder: llvm.IRBuilder) { switch

    (stmt.kind) { case ts.SyntaxKind.Block: passBlockStatement(<any>stmt, ctx, builder); break; case ts.SyntaxKind.ReturnStatement: passReturnStatement(<any>stmt, ctx, builder); break; case ts.SyntaxKind.IfStatement: passIfStatement(<any>stmt, ctx, builder); break; case ts.SyntaxKind.ForStatement: passForStatement(<any>stmt, ctx, builder); break; default: throw new UnsupportedError( stmt, `Unsupported statement: "${stmt.kind}"` ); } }
  36. Use Context + IRBuilder export class Context { public typeChecker:

    ts.TypeChecker; public llvmContext: llvm.LLVMContext; public llvmModule: llvm.Module; public scope: Scope = new Scope(); } export class Scope { public functions: FunctionsTable = new FunctionsTable(); public variables: VariablesTable = new VariablesTable(); }
  37. export function buildFromStringLiteral( node: ts.StringLiteral, ctx: Context, builder: llvm.IRBuilder ):

    llvm.Value { return builder.createGlobalStringPtr( node.text, ); } StringLiteral -> llvm.Value
  38. NumbericLiteral -> llvm.Value function buildFromNumericLiteral( value: ts.NumericLiteral, ctx: Context, builder:

    llvm.IRBuilder, ): llvm.Value { return llvm.ConstantFP.get( ctx.llvmContext, parseFloat(value.text) ); }
  39. { function doMath(): number { const a = 5.5; const

    b = 14.5; return ((a + b) * 50) / 10; } puts("hello"); puts(doMath()); } declare function puts(str: string): void; do-simple-math.ts:11:10 - error TS2345: Argument of type 'number' is not assignable to parameter of type 'string'. 11 puts(doMath()); ~~~~~~~~
  40. +61 #include <string> #include <sstream> #include <iostream> #include "include/helpers.h" __attribute__

    ((visibility ("default"))) const char* number2string(double number) { std::stringstream s; s << number; return s.str().c_str(); } Runtime Library
  41. +62 cmake_minimum_required(VERSION 3.12) project(hlvm-runtime) set(CMAKE_BUILD_TYPE Release) set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_FLAGS "-pedantic

    -Wextra -O2") set(SOURCE_FILES helpers.cpp) add_library(hlvm-runtime STATIC ${SOURCE_FILES}) add_library(<name> <SHARED|STATIC|MODULE|UNKNOWN> IMPORTED [GLOBAL]) Runtime Library
  42. define double @doMath() { Entry: %a = alloca double store

    double 5.500000e+00, double* %a %b = alloca double store double 1.450000e+01, double* %b %0 = load double, double* %a %1 = load double, double* %b %2 = fadd double %0, %1 %3 = fmul double %2, 5.000000e+01 %4 = fdiv double %3, 1.000000e+01 ret double %4 } define void @main() { Entry: %0 = call double @doMath() %1 = call i8* @number2string(double %0) call void @puts(i8* %1) ret void } declare void @puts(i8*) declare i8* @number2string(double) { function doMath(): number { const a = 5.5; const b = 14.5; return ((a + b) * 50) / 10; } puts(number2string(doMath())); }
  43. Undefined symbols for architecture x86_64: "_number2string", referenced from: _main in

    hello_world-0f8a45.o ld: symbol(s) not found for architecture x86_64 clang: error: linker command failed with exit code 1 (use -v to see invocation) Runtime Library
  44. OVERVIEW: llvm symbol table dumper ovr@MBP-Dmitry  ~/projects/ovr/hlvm/runtime  

    master ✚ • ?  nm libhlvm-runtime.a.  ✔ libhlvm-runtime.a(helpers.cpp.o): 0000000000000e4c s GCC_except_table0 0000000000000e94 s GCC_except_table13 U __Unwind_Resume 0000000000000000 T __Z13number2stringd 0000000000000ab0 T __ZNKSt3__115basic_stringbufIcNS_11char_traitsIcEENS_9allocatorIcEEE3strEv U __ZNKSt3__121__basic_string_commonILb1EE20__throw_length_errorEv U __ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE6resizeEmc U __ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE9push_backEc U __ZNSt3__113basic_istreamIcNS_11char_traitsIcEEED0Ev U __ZNSt3__113basic_istreamIcNS_11char_traitsIcEEED1Ev U __ZNSt3__113basic_ostreamIcNS_11char_traitsIcEEED0Ev U __ZNSt3__115basic_streambufIcNS_11char_traitsIcEEE6xsgetnEPcl …
  45. -demangle Demangle C++ symbol names ovr@MBP-Dmitry  ~/projects/ovr/hlvm/runtime  

    master ✚ • ?  nm -C libhlvm-runtime.a.  ✔ libhlvm-runtime.a(helpers.cpp.o): U __Unwind_Resume 0000000000000000 T number2string(double) 0000000000000ab0 T std::__1::basic_stringbuf<char, std::__1::char_traits<char>, std::__1::allocator<char> >::str() const U std::__1::__basic_string_common<true>::__throw_length_error() const U std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::resize(unsigned long, char) U std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::push_back(char) U std::__1::basic_istream<char, std::__1::char_traits<char> >::~basic_istream() U std::__1::basic_istream<char, std::__1::char_traits<char> >::~basic_istream() U std::__1::basic_ostream<char, std::__1::char_traits<char> >::~basic_ostream() U std::__1::basic_ostream<char, std::__1::char_traits<char> >::~basic_ostream() U std::__1::basic_ostream<char, std::__1::char_traits<char> >::operator<<(double) U std::__1::basic_iostream<char, std::__1::char_traits<char> >::~basic_iostream() U std::__1::basic_iostream<char, std::__1::char_traits<char> >::~basic_iostream() U std::__1::basic_iostream<char, std::__1::char_traits<char> >::~basic_iostream() U std::__1::basic_streambuf<char, std::__1::char_traits<char> >::sync() U std::__1::basic_streambuf<char, std::__1::char_traits<char> >::imbue(std::__1::locale const&) U std::__1::basic_streambuf<char, std::__1::char_traits<char> >::uflow() U std::__1::basic_streambuf<char, std::__1::char_traits<char> >::setbuf(char*, long) U std::__1::basic_streambuf<char, std::__1::char_traits<char> >::xsgetn(char*, long) U std::__1::basic_streambuf<char, std::__1::char_traits<char> >::xsputn(char const*, long) U std::__1::basic_streambuf<char, std::__1::char_traits<char> >::showmanyc() U std::__1::basic_streambuf<char, std::__1::char_traits<char> >::basic_streambuf() U std::__1::basic_streambuf<char, std::__1::char_traits<char> >::~basic_streambuf()
  46. +67 https://en.wikipedia.org/wiki/Name_mangling In compiler construction, name mangling (also called name

    decoration) is a technique used to solve various problems caused by the need to resolve unique names for programming entities in many modern programming languages. Name mangling Runtime Library
  47. define double @doMath() { Entry: %a = alloca double store

    double 5.500000e+00, double* %a %b = alloca double store double 1.450000e+01, double* %b %0 = load double, double* %a %1 = load double, double* %b %2 = fadd double %0, %1 %3 = fmul double %2, 5.000000e+01 %4 = fdiv double %3, 1.000000e+01 ret double %4 } define void @main() { Entry: %0 = call double @doMath() %1 = call i8* @_Z13number2stringd(double %0) call void @puts(i8* %1) ret void } declare void @puts(i8*) declare i8* @_Z13number2stringd(double) { function doMath(): number { const a = 5.5; const b = 14.5; return ((a + b) * 50) / 10; } puts(number2string(doMath())); }
  48. ovr@MBP-Dmitry  ~/projects/ovr/hlvm   master ✚ • ? 

    ./main  ✔ hello ovr@MBP-Dmitry  ~/projects/ovr/hlvm   master ✚ • ?   32 ↵
  49. LLDB debuging may affect and broken JS developers, please skip

    this section you are going to be be alive. CONTENT DISCLAMER
  50. +71 __attribute__ ((visibility ("default"))) const char* number2string(double number) { char*

    result = new char[100]; sprintf(result, "%f", number); return result; } Runtime Library
  51. +72 ovr ~/hlvm   master  ./main  ✔

    hello 100.000000 { function doMath(): number { const a = 5.5; const b = 14.5; return ((a + b) * 50) / 10; } puts("hello"); puts(_Z13number2stringd(doMath())); } Runtime Library
  52. ovr@MBP-Dmitry  hlvm  master  llvm-nm -C runtime/libhlvm-runtime.a ✔

    runtime/libhlvm-runtime.a(helpers.cpp.o): 0000000000000000 T number2string(double) U operator new[](unsigned long) U _sprintf
  53. ovr  ~/projects/ovr/hlvm   master  otool -L main

     ✔ main: /usr/lib/libc++.1.dylib (compatibility 1.0.0, current version 400.9.4) /usr/lib/libSystem.B.dylib (compatibility 1.0.0, current version 1252.200.5) clang -lstdc++ hello_world.o -lhlvm-runtime -L runtime -o main