diff --git a/.github/workflows/llvm-quick-fuzz.yml b/.github/workflows/llvm-quick-fuzz.yml index 0195178f..1e0a15fd 100644 --- a/.github/workflows/llvm-quick-fuzz.yml +++ b/.github/workflows/llvm-quick-fuzz.yml @@ -23,7 +23,8 @@ jobs: os: [ubuntu-22.04] # See doc/developing.md ghc: ["8.10.7"] - llvm: [ "https://github.com/llvm/llvm-project/releases/download/llvmorg-12.0.0/clang+llvm-12.0.0-x86_64-linux-gnu-ubuntu-20.04.tar.xz" + llvm: [ "https://github.com/llvm/llvm-project/releases/download/llvmorg-13.0.0/clang+llvm-13.0.0-x86_64-linux-gnu-ubuntu-20.04.tar.xz" + , "https://github.com/llvm/llvm-project/releases/download/llvmorg-12.0.0/clang+llvm-12.0.0-x86_64-linux-gnu-ubuntu-20.04.tar.xz" , "https://github.com/llvm/llvm-project/releases/download/llvmorg-11.0.0/clang+llvm-11.0.0-x86_64-linux-gnu-ubuntu-20.04.tar.xz" , "https://github.com/llvm/llvm-project/releases/download/llvmorg-10.0.0/clang+llvm-10.0.0-x86_64-linux-gnu-ubuntu-18.04.tar.xz" , "https://releases.llvm.org/9.0.0/clang+llvm-9.0.0-x86_64-linux-gnu-ubuntu-18.04.tar.xz" diff --git a/README.md b/README.md index 28a5a317..00d6bb37 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ compilers. | | v10.0 | ✓ | | | | | v11.0 | ✓ | | | | | v12.0 | ✓ | | | -| | v13.0 | | | See [issues][llvm13] | +| | v13.0 | ✓ | | See [issues][llvm13] | | | v14.0 | | | See [issues][llvm14] | | `clang++` | v3.4 | | | | | | v3.5 | | | | diff --git a/disasm-test/Main.hs b/disasm-test/Main.hs index e3150387..04040465 100644 --- a/disasm-test/Main.hs +++ b/disasm-test/Main.hs @@ -205,7 +205,10 @@ cube = TS.mkCUBE { TS.inputDirs = ["disasm-test/tests"] , TS.rootName = "*.ll" , TS.separators = "." - , TS.validParams = [ ("llvm-range", Just ["pre-llvm11", "at-least-llvm12"]) + , TS.validParams = [ ("llvm-range", Just [ "pre-llvm11" + , "at-least-llvm12" + , "at-least-llvm13" + ]) ] -- Somewhat unusually for tasty-sugar, we make the expectedSuffix the same -- as the rootName suffix. This is because we are comparing the contents of @@ -304,7 +307,11 @@ runTest llvmVer sweet expct ] in case lookup "llvm-range" (TS.expParamsMatch expct) of Just (TS.Explicit v) -> specMatchesInstalled v - Just (TS.Assumed v) -> specMatchesInstalled v + Just (TS.Assumed v) + | v == "pre-llvm11" || v == "at-least-llvm12" + -> specMatchesInstalled v + | otherwise + -> False _ -> error "llvm-range unknown" -- | Assemble some llvm assembly, producing a bitcode file in /tmp. diff --git a/disasm-test/tests/di-arg-list.at-least-llvm13.ll b/disasm-test/tests/di-arg-list.at-least-llvm13.ll new file mode 100644 index 00000000..23815b90 --- /dev/null +++ b/disasm-test/tests/di-arg-list.at-least-llvm13.ll @@ -0,0 +1,43 @@ +; ModuleID = 'di-arg-list.c' +source_filename = "di-arg-list.c" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone uwtable willreturn +define dso_local i32 @f(i32 %0, i32 %1) local_unnamed_addr #0 !dbg !9 { + call void @llvm.dbg.value(metadata i32 %0, metadata !14, metadata !DIExpression()), !dbg !17 + call void @llvm.dbg.value(metadata i32 %1, metadata !15, metadata !DIExpression()), !dbg !17 + call void @llvm.dbg.value(metadata !DIArgList(i32 %0, i32 %1), metadata !16, metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus, DW_OP_stack_value)), !dbg !17 + ret i32 0, !dbg !18 +} + +; Function Attrs: nofree nosync nounwind readnone speculatable willreturn +declare void @llvm.dbg.value(metadata, metadata, metadata) #1 + +attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone uwtable willreturn "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { nofree nosync nounwind readnone speculatable willreturn } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5, !6} +!llvm.ident = !{!7} +!llvm.commandline = !{!8} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 13.0.1", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "di-arg-list.c", directory: "/home/rscott/Documents/Hacking/Haskell/llvm-pretty-bc-parser/disasm-test/tests") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{i32 7, !"uwtable", i32 1} +!7 = !{!"clang version 13.0.1"} +!8 = !{!"/home/rscott/Software/clang+llvm-13.0.1/bin/clang-13 -S -emit-llvm -g -O1 -frecord-command-line di-arg-list.c -o di-arg-list.at-least-llvm13.ll"} +!9 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !10, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !13) +!10 = !DISubroutineType(types: !11) +!11 = !{!12, !12, !12} +!12 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!13 = !{!14, !15, !16} +!14 = !DILocalVariable(name: "x", arg: 1, scope: !9, file: !1, line: 1, type: !12) +!15 = !DILocalVariable(name: "y", arg: 2, scope: !9, file: !1, line: 1, type: !12) +!16 = !DILocalVariable(name: "z", scope: !9, file: !1, line: 2, type: !12) +!17 = !DILocation(line: 0, scope: !9) +!18 = !DILocation(line: 3, column: 3, scope: !9) diff --git a/disasm-test/tests/di-arg-list.c b/disasm-test/tests/di-arg-list.c new file mode 100644 index 00000000..2d5e8c0b --- /dev/null +++ b/disasm-test/tests/di-arg-list.c @@ -0,0 +1,4 @@ +int f(int x, int y) { + int z = x + y; + return 0; +} diff --git a/llvm-pretty b/llvm-pretty index a272e380..6750fc25 160000 --- a/llvm-pretty +++ b/llvm-pretty @@ -1 +1 @@ -Subproject commit a272e380bbd44d6af5afba63a70772b2dab6a2d0 +Subproject commit 6750fc256d0aedc1be5dcf85d87d81dfbba19e3d diff --git a/src/Data/LLVM/BitCode/GetBits.hs b/src/Data/LLVM/BitCode/GetBits.hs index 1440646a..b6aec17f 100644 --- a/src/Data/LLVM/BitCode/GetBits.hs +++ b/src/Data/LLVM/BitCode/GetBits.hs @@ -124,12 +124,12 @@ instance MonadPlus GetBits where -- compositions. Their functionality should be identical, but it may be easier -- to debug the first. -extractFromByteString' :: NumBits {-^ the last bit accessible in the ByteString -} - -> NumBits {-^ the bit to start extraction at -} - -> NumBits {-^ the number of bits to extract -} - -> ByteString {-^ the ByteString to extract from -} - -> Either String (Int, NumBits) -extractFromByteString' bitLimit startBit numBits bs = +_extractFromByteString' :: NumBits {-^ the last bit accessible in the ByteString -} + -> NumBits {-^ the bit to start extraction at -} + -> NumBits {-^ the number of bits to extract -} + -> ByteString {-^ the ByteString to extract from -} + -> Either String (Int, NumBits) +_extractFromByteString' bitLimit startBit numBits bs = let Bytes' s8 = fst (bitsToBytes startBit) Bytes' r8 = fst (bitsToBytes numBits) rcnt = r8 + 2 -- 2 == pre-shift overflow byte on either side diff --git a/src/Data/LLVM/BitCode/IR/Constants.hs b/src/Data/LLVM/BitCode/IR/Constants.hs index 581ee527..8b764e77 100644 --- a/src/Data/LLVM/BitCode/IR/Constants.hs +++ b/src/Data/LLVM/BitCode/IR/Constants.hs @@ -360,19 +360,8 @@ parseConstantEntry t (getTy,cs) (fromEntry -> Just r) = return (getTy, Typed (PrimType (Integer 1)) (ValConstExpr cst):cs) 18 -> label "CST_CODE_INLINEASM_OLD" $ do - let field = parseField r - ty <- getTy - flags <- field 0 numeric - let sideEffect = testBit (flags :: Int) 0 - alignStack = (flags `shiftR` 1) == 1 - - alen <- field 1 numeric - asm <- UTF8.decode <$> parseSlice r 2 alen char - - clen <- field (2+alen) numeric - cst <- UTF8.decode <$> parseSlice r (3+alen) clen char - - return (getTy, Typed ty (ValAsm sideEffect alignStack asm cst):cs) + tv <- parseInlineAsm InlineAsmCode18 getTy r + return (getTy, tv:cs) 19 -> label "CST_CODE_CE_SHUFFLEVEC_EX" $ do notImplemented @@ -415,28 +404,9 @@ parseConstantEntry t (getTy,cs) (fromEntry -> Just r) = FloatType Double -> build (ValDouble . castDouble) x -> Assert.unknownEntity "element type" x - 23 -> label "CST_CODE_INLINEASM" $ do - let field = parseField r - mask <- field 0 numeric - - let test = testBit (mask :: Word32) - hasSideEffects = test 0 - isAlignStack = test 1 - _asmDialect = mask `shiftR` 2 - - asmStrSize <- field 1 numeric - Assert.recordSizeGreater r (1 + asmStrSize) - - constStrSize <- field (2 + asmStrSize) numeric - Assert.recordSizeGreater r (2 + asmStrSize + constStrSize) - - asmStr <- fmap UTF8.decode $ parseSlice r 2 asmStrSize char - constStr <- fmap UTF8.decode $ parseSlice r (3 + asmStrSize) constStrSize char - - ty <- getTy - let val = ValAsm hasSideEffects isAlignStack asmStr constStr - - return (getTy, Typed ty val : cs) + 23 -> label "CST_CODE_INLINEASM_OLD2" $ do + tv <- parseInlineAsm InlineAsmCode23 getTy r + return (getTy, tv:cs) -- [opty, flags, n x operands] 24 -> label "CST_CODE_CE_GEP_WITH_INRANGE_INDEX" $ do @@ -461,6 +431,20 @@ parseConstantEntry t (getTy,cs) (fromEntry -> Just r) = ty <- getTy return (getTy, Typed ty ValPoison : cs) + 27 -> label "CST_CODE_DSO_LOCAL_EQUIVALENT" $ do + notImplemented + + 28 -> label "CST_CODE_INLINEASM_OLD3" $ do + tv <- parseInlineAsm InlineAsmCode28 getTy r + return (getTy, tv:cs) + + 29 -> label "CST_CODE_NO_CFI_VALUE" $ do + notImplemented + + 30 -> label "CST_CODE_INLINEASM" $ do + tv <- parseInlineAsm InlineAsmCode30 getTy r + return (getTy, tv:cs) + code -> Assert.unknownEntity "constant record code" code parseConstantEntry _ st (abbrevDef -> Just _) = @@ -498,6 +482,61 @@ resolveNull ty = case typeNull ty of HasNull nv -> return nv ResolveNull i -> resolveNull =<< getType' =<< getTypeId i +-- | The different codes for inline @asm@ constants. Each one has minor +-- differences in how they are parsed. +data InlineAsmCode + = InlineAsmCode18 + -- ^ @CST_CODE_INLINEASM_OLD = 18@. The original. + | InlineAsmCode23 + -- ^ @CST_CODE_INLINEASM_OLD2 = 23@. This adds an @asmdialect@ field. + | InlineAsmCode28 + -- ^ @CST_CODE_INLINEASM_OLD3 = 28@. This adds an @unwind@ field (which is + -- referred to as @canThrow@ in the LLVM source code). + | InlineAsmCode30 + -- ^ @CST_CODE_INLINEASM = 30@. This adds an explicit function type field. + +-- | Parse a 'ValAsm' value. There are several variations on this theme that are +-- captured in the 'InlineAsmCode' argument. +parseInlineAsm :: InlineAsmCode -> Parse Type -> Record -> Parse (Typed PValue) +parseInlineAsm code getTy r = do + let field = parseField r + + -- If using InlineAsmCode30 or later, we parse the type as an explicit + -- field. + let parseTy = do ty <- getType =<< field 0 numeric + return (PtrTo ty, 1) + -- If using an older InlineAsmCode, then we retrieve the type from the + -- current context. + let useCurTy = do ty <- getTy + return (ty, 0) + (ty, ix) <- case code of + InlineAsmCode18 -> useCurTy + InlineAsmCode23 -> useCurTy + InlineAsmCode28 -> useCurTy + InlineAsmCode30 -> parseTy + + mask <- field ix numeric + + let test = testBit (mask :: Word32) + hasSideEffects = test 0 + isAlignStack = test 1 + -- We don't store these in the llvm-pretty AST at the moment: + _asmDialect = test 2 -- Only with InlineAsmCode23 or later + _canThrow = test 3 -- Only with InlineAsmCode28 or later + + asmStrSize <- field (ix + 1) numeric + Assert.recordSizeGreater r (ix + 1 + asmStrSize) + + constStrSize <- field (ix + 2 + asmStrSize) numeric + Assert.recordSizeGreater r (ix + 2 + asmStrSize + constStrSize) + + asmStr <- fmap UTF8.decode $ parseSlice r (ix + 2) asmStrSize char + constStr <- fmap UTF8.decode $ parseSlice r (ix + 3 + asmStrSize) constStrSize char + + let val = ValAsm hasSideEffects isAlignStack asmStr constStr + + return (Typed ty val) + -- Float/Double Casting -------------------------------------------------------- diff --git a/src/Data/LLVM/BitCode/IR/Function.hs b/src/Data/LLVM/BitCode/IR/Function.hs index a7696faf..ee9d5c0d 100644 --- a/src/Data/LLVM/BitCode/IR/Function.hs +++ b/src/Data/LLVM/BitCode/IR/Function.hs @@ -16,7 +16,6 @@ import Data.LLVM.BitCode.IR.Attrs import Data.LLVM.BitCode.Match import Data.LLVM.BitCode.Parse import Data.LLVM.BitCode.Record -import Data.LLVM.BitCode.Record import Text.LLVM.AST import Text.LLVM.Labels diff --git a/src/Data/LLVM/BitCode/IR/Metadata.hs b/src/Data/LLVM/BitCode/IR/Metadata.hs index 88343dff..12f2d953 100644 --- a/src/Data/LLVM/BitCode/IR/Metadata.hs +++ b/src/Data/LLVM/BitCode/IR/Metadata.hs @@ -327,19 +327,32 @@ unnamedEntries pm = bimap Seq.fromList Seq.fromList (partitionEithers (mapMaybe -- TODO: is this silently eating errors with metadata that's not in the -- value table (when the lookupValueTableAbs fails)? + resolveNode :: (Int, (Bool, Bool, Int)) + -> Maybe (Either PartialUnnamedMd PartialUnnamedMd) resolveNode (ref,(fnLocal,d,ix)) = ((if fnLocal then Right else Left) <$> lookupNode ref d ix) - lookupNode ref d ix = flip fmap (lookupValueTableAbs ref (mtEntries mt)) $ - \case - Typed { typedValue = ValMd v } -> - PartialUnnamedMd + lookupNode :: Int -> Bool -> Int -> Maybe PartialUnnamedMd + lookupNode ref d ix = do + tv <- lookupValueTableAbs ref (mtEntries mt) + case tv of + Typed { typedValue = ValMd v } -> do + guard (not (mustAppearInline v)) + pure $! PartialUnnamedMd { pumIndex = ix , pumValues = v , pumDistinct = d } _ -> error "Impossible: Only ValMds are stored in mtEntries" + -- DIExpressions and DIArgLists are always printed inline and should never be + -- printed in the global list of unnamed metadata. See + -- https://github.com/llvm/llvm-project/blob/65600cb2a7e940babf6c493503b9d3fd19f8cb06/llvm/lib/IR/AsmWriter.cpp#L1242-L1245 + mustAppearInline :: PValMd -> Bool + mustAppearInline (ValMdDebugInfo (DebugInfoExpression{})) = True + mustAppearInline (ValMdDebugInfo (DebugInfoArgList{})) = True + mustAppearInline _ = False + type InstrMdAttachments = Map.Map Int [(KindMd,PValMd)] type PKindMd = Int @@ -680,8 +693,8 @@ parseMetadataEntry vt mt pm (fromEntry -> Just r) = then pure 0 else parseField r 18 numeric) -- dicuNameTableKind <*> (if recordSize <= 19 - then pure 0 - else parseField r 19 numeric) -- dicuRangesBaseAddress + then pure False + else parseField r 19 nonzero) -- dicuRangesBaseAddress <*> (if recordSize <= 20 then pure Nothing else mdStringOrNull ctx pm <$> parseField r 20 numeric) -- dicuSysRoot @@ -705,9 +718,9 @@ parseMetadataEntry vt mt pm (fromEntry -> Just r) = (diFlags0, spFlags0) <- if hasSPFlags then - (,) <$> parseField r (11 + 2) numeric <*> pure 0 - else (,) <$> parseField r 11 numeric <*> parseField r 9 numeric + else + (,) <$> parseField r (11 + 2) numeric <*> pure 0 let diFlagMainSubprogram = bit 21 :: Word32 hasOldMainSubprogramFlag = (diFlags0 .&. diFlagMainSubprogram) /= 0 @@ -1087,6 +1100,35 @@ parseMetadataEntry vt mt pm (fromEntry -> Just r) = return $! updateMetadataTable (addDebugInfo isDistinct (DebugInfoLabel dil)) pm + 41 -> label "METADATA_STRING_TYPE" $ do + notImplemented + + -- Codes 42 and 43 are reserved for Fortran array–specific debug info, see + -- https://github.com/llvm/llvm-project/blob/4681f6111e655057f5015564a9bf3705f87495bf/llvm/include/llvm/Bitcode/LLVMBitCodes.h#L348-L349 + + 44 -> label "METADATA_COMMON_BLOCK" $ do + notImplemented + + 45 -> label "METADATA_GENERIC_SUBRANGE" $ do + notImplemented + + 46 -> label "METADATA_ARG_LIST" $ do + cxt <- getContext + dial <- DIArgList + <$> (map (mdForwardRef cxt mt) <$> parseFields r 0 numeric) + return $! updateMetadataTable + -- Unlike most other forms of metadata, METADATA_ARG_LIST never updates + -- the @IsDistinct@ value. At least, that's my reading of the source + -- code here: + -- https://github.com/llvm/llvm-project/blob/8bad4ae679df6fc7dbd016dccbd3da34206e836b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp#L2142-L2158 + -- + -- As a result, we use False below, which is the default value of + -- IsDistinct set here. It doesn't actually matter that much whether it + -- is True or False, since we filter out DIArgLists from the list of + -- global unnamed metadata entries anyway (see `unnamedEntries`). As + -- such, the value of IsDistinct is never used for anything meaningful. + (addDebugInfo False (DebugInfoArgList dial)) pm + code -> fail ("unknown record code: " ++ show code) parseMetadataEntry _ _ pm (abbrevDef -> Just _) = diff --git a/unit-test/Tests/Instances.hs b/unit-test/Tests/Instances.hs index 98430591..2947ce27 100644 --- a/unit-test/Tests/Instances.hs +++ b/unit-test/Tests/Instances.hs @@ -80,6 +80,7 @@ instance Arbitrary lab => Arbitrary (DISubprogram' lab) where arbi instance Arbitrary DISubrange where arbitrary = genericArbitrary uniform instance Arbitrary lab => Arbitrary (DISubroutineType' lab) where arbitrary = genericArbitrary uniform instance Arbitrary lab => Arbitrary (DILabel' lab) where arbitrary = genericArbitrary uniform +instance Arbitrary lab => Arbitrary (DIArgList' lab) where arbitrary = genericArbitrary uniform -- Newtypes instance Arbitrary Ident where arbitrary = genericArbitrary uniform