| # Copyright (c) 2017, Google Inc. |
| # |
| # Permission to use, copy, modify, and/or distribute this software for any |
| # purpose with or without fee is hereby granted, provided that the above |
| # copyright notice and this permission notice appear in all copies. |
| # |
| # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
| # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
| # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY |
| # SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
| # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION |
| # OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN |
| # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ |
| |
| # This is a rough parser for x86-64 and ppc64le assembly designed to work with |
| # https://github.com/pointlander/peg. delocate.go has a go:generate line for |
| # rebuilding delocate.peg.go from this file. |
| |
| package main |
| |
| type Asm Peg {} |
| |
| AsmFile <- Statement* !. |
| Statement <- WS? (Label / ((GlobalDirective / |
| LocationDirective / |
| LabelContainingDirective / |
| Instruction / |
| Directive / |
| Comment / ) WS? ((Comment? '\n') / ';'))) |
| GlobalDirective <- (".global" / ".globl") WS SymbolName |
| Directive <- '.' DirectiveName (WS Args)? |
| DirectiveName <- [[A-Z0-9_]]+ |
| LocationDirective <- FileDirective / LocDirective |
| FileDirective <- ".file" WS [^#\n]+ |
| LocDirective <- ".loc" WS [^#/\n]+ |
| Args <- Arg ((WS? ',' WS?) Arg)* |
| Arg <- QuotedArg / [[0-9a-z%+\-*_@.]]* |
| QuotedArg <- '"' QuotedText '"' |
| QuotedText <- (EscapedChar / [^"])* |
| LabelContainingDirective <- LabelContainingDirectiveName WS SymbolArgs |
| LabelContainingDirectiveName <- ".xword" / ".word" / ".long" / ".set" / ".8byte" / ".4byte" / ".quad" / ".tc" / ".localentry" / ".size" / ".type" / ".uleb128" / ".sleb128" |
| SymbolArgs <- SymbolArg ((WS? ',' WS?) SymbolArg)* |
| SymbolArg <- Offset / |
| SymbolType / |
| (Offset / LocalSymbol / SymbolName / Dot) WS? Operator WS? (Offset / LocalSymbol / SymbolName) / |
| LocalSymbol TCMarker? / |
| SymbolName Offset / |
| SymbolName TCMarker? |
| SymbolType <- [@%] ('function' / 'object') |
| Dot <- '.' |
| TCMarker <- '[TC]' |
| EscapedChar <- '\\' . |
| WS <- [ \t]+ |
| Comment <- ("//" / '#') [^\n]* |
| Label <- (LocalSymbol / LocalLabel / SymbolName) ':' |
| SymbolName <- [[A-Z._]][[A-Z.0-9$_]]* |
| LocalSymbol <- '.L' [[A-Za-z.0-9$_]]+ |
| LocalLabel <- [0-9][0-9$]* |
| LocalLabelRef <- [0-9][0-9$]*[bf] |
| Instruction <- InstructionName (WS InstructionArg ((WS? ',' WS?) InstructionArg)*)? |
| InstructionName <- [[A-Z]][[A-Z.0-9]]* [.+\-]? |
| InstructionArg <- IndirectionIndicator? (ARMConstantTweak / RegisterOrConstant / LocalLabelRef / TOCRefHigh / TOCRefLow / GOTLocation / GOTSymbolOffset / MemoryRef) AVX512Token* |
| GOTLocation <- '$_GLOBAL_OFFSET_TABLE_-' LocalSymbol |
| GOTSymbolOffset <- ('$' SymbolName '@GOT' 'OFF'?) / (":got:" SymbolName) |
| AVX512Token <- WS? '{' '%'? [0-9a-z]* '}' |
| TOCRefHigh <- '.TOC.-' ('0b' / ('.L' [a-zA-Z_0-9]+)) "@ha" |
| TOCRefLow <- '.TOC.-' ('0b' / ('.L' [a-zA-Z_0-9]+)) "@l" |
| IndirectionIndicator <- '*' |
| RegisterOrConstant <- (('%'[[A-Z]][[A-Z0-9]]*) / |
| ('$'? ((Offset Offset) / Offset)) / |
| ('#' Offset ('*' [0-9]+ ('-' [0-9] [0-9]*)?)? ) / |
| ('#' '~'? '(' [0-9] WS? "<<" WS? [0-9] ')' ) / |
| ARMRegister) |
| ![fb:(+\-] |
| ARMConstantTweak <- ("lsl" / "sxtw" / "uxtw" / "uxtb" / "lsr" / "ror" / "asr") (WS '#' Offset)? |
| ARMRegister <- "sp" / ([xwdqs] [0-9] [0-9]?) / "xzr" / "wzr" / ARMVectorRegister / ('{' WS? ARMVectorRegister (',' WS? ARMVectorRegister)* WS? '}' ('[' [0-9] ']')? ) |
| ARMVectorRegister <- "v" [0-9] [0-9]? ('.' [0-9]* [bsdhq] ('[' [0-9] [0-9]? ']')? )? |
| # Compilers only output a very limited number of expression forms. Rather than |
| # implement a full expression parser, this enumerate those forms plus a few |
| # that appear in our hand-written assembly. |
| MemoryRef <- (SymbolRef BaseIndexScale / |
| SymbolRef / |
| Low12BitsSymbolRef / |
| Offset* BaseIndexScale / |
| SegmentRegister Offset BaseIndexScale / |
| SegmentRegister BaseIndexScale / |
| SegmentRegister Offset / |
| ARMBaseIndexScale / |
| BaseIndexScale) |
| SymbolRef <- (Offset* '+')? (LocalSymbol / SymbolName) Offset* ('@' Section Offset*)? |
| Low12BitsSymbolRef <- ":lo12:" (LocalSymbol / SymbolName) Offset? |
| ARMBaseIndexScale <- '[' ARMRegister (',' WS? (('#' Offset ('*' [0-9]+)? ) / ARMGOTLow12 / Low12BitsSymbolRef / ARMRegister) (',' WS? ARMConstantTweak)?)? ']' ARMPostincrement? |
| ARMGOTLow12 <- ":got_lo12:" SymbolName |
| ARMPostincrement <- '!' |
| BaseIndexScale <- '(' RegisterOrConstant? WS? (',' WS? RegisterOrConstant WS? (',' [0-9]+)? )? ')' |
| Operator <- [+\-] |
| Offset <- '+'? '-'? (("0b" [01]+) / ("0x" [[0-9A-F]]+) / [0-9]+) |
| Section <- [[A-Z@]]+ |
| SegmentRegister <- '%' [c-gs] 's:' |