diff --git a/Cargo.lock b/Cargo.lock index f456f8120..8295d9e5b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -66,6 +66,15 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + [[package]] name = "align_ext" version = "0.1.0" @@ -76,6 +85,56 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" +[[package]] +name = "anstream" +version = "0.6.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" + +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2" +dependencies = [ + "windows-sys 0.60.2", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.60.2", +] + [[package]] name = "aster-bigtcp" version = "0.1.0" @@ -483,12 +542,58 @@ dependencies = [ "generic-array", ] +[[package]] +name = "clap" +version = "4.5.51" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c26d721170e0295f191a69bd9a1f93efcdb0aff38684b61ab5750468972e5f5" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.51" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75835f0c7bf681bfd05abe44e965760fea999a5286c6eb2d59883634fd02011a" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim 0.11.1", +] + +[[package]] +name = "clap_derive" +version = "4.5.49" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "clap_lex" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" + [[package]] name = "cobs" version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15" +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + [[package]] name = "component" version = "0.1.0" @@ -600,7 +705,7 @@ dependencies = [ "ident_case", "proc-macro2", "quote", - "strsim", + "strsim 0.10.0", "syn 1.0.109", ] @@ -886,6 +991,12 @@ dependencies = [ "stable_deref_trait", ] +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + [[package]] name = "iced-x86" version = "1.21.0" @@ -963,6 +1074,12 @@ dependencies = [ "ghost", ] +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + [[package]] name = "itertools" version = "0.10.5" @@ -1171,6 +1288,12 @@ dependencies = [ "walkdir", ] +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "multiboot2" version = "0.24.0" @@ -1201,6 +1324,27 @@ version = "6.6.666" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf5a574dadd7941adeaa71823ecba5e28331b8313fb2e1c6a5c7e5981ea53ad6" +[[package]] +name = "nix" +version = "0.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2eb04e9c688eff1c89d72b407f168cf79bb9e867a9d3323ed6c01519eb9cc053" +dependencies = [ + "bitflags 2.9.1", + "cfg-if", + "libc", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "nougat" version = "0.2.4" @@ -1243,6 +1387,12 @@ version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + [[package]] name = "opaque-debug" version = "0.3.1" @@ -1541,6 +1691,35 @@ dependencies = [ "bitflags 1.3.2", ] +[[package]] +name = "regex" +version = "1.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" + [[package]] name = "riscv" version = "0.15.0" @@ -1622,6 +1801,16 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "sctrace" +version = "0.16.1" +dependencies = [ + "clap", + "nix", + "nom", + "regex", +] + [[package]] name = "semver" version = "1.0.26" @@ -1704,6 +1893,12 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + [[package]] name = "subtle" version = "2.4.1" @@ -1940,6 +2135,12 @@ dependencies = [ "gimli 0.31.1", ] +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + [[package]] name = "version_check" version = "0.9.5" @@ -1992,16 +2193,31 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys", + "windows-sys 0.59.0", ] +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + [[package]] name = "windows-sys" version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" dependencies = [ - "windows-targets", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.5", ] [[package]] @@ -2010,14 +2226,31 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_gnullvm", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm 0.52.6", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.53.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", ] [[package]] @@ -2026,48 +2259,96 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" + [[package]] name = "windows_aarch64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" + [[package]] name = "windows_i686_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" + [[package]] name = "windows_i686_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_i686_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" + [[package]] name = "windows_x86_64_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" + [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" + [[package]] name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" + [[package]] name = "winnow" version = "0.5.40" diff --git a/Cargo.toml b/Cargo.toml index b94fec5b0..27093ce3f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,6 +41,7 @@ members = [ "kernel/libs/typeflags-util", "kernel/libs/atomic-integer-wrapper", "kernel/libs/xarray", + "tools/sctrace", ] exclude = [ "kernel/libs/comp-sys/cargo-component", @@ -48,6 +49,7 @@ exclude = [ "kernel/libs/comp-sys/component-macro", "kernel/libs/comp-sys/controlled", "osdk", + "tools/sctrace", ] [workspace.lints.rust] diff --git a/Makefile b/Makefile index 6c561360f..5f522379f 100644 --- a/Makefile +++ b/Makefile @@ -189,7 +189,8 @@ NON_OSDK_CRATES := \ kernel/libs/keyable-arc \ kernel/libs/logo-ascii-art \ kernel/libs/typeflags \ - kernel/libs/typeflags-util + kernel/libs/typeflags-util \ + tools/sctrace # In contrast, OSDK crates depend on OSTD (or being `ostd` itself) # and need to be built or tested with OSDK. diff --git a/tools/sctrace/.gitignore b/tools/sctrace/.gitignore new file mode 100644 index 000000000..ea8c4bf7f --- /dev/null +++ b/tools/sctrace/.gitignore @@ -0,0 +1 @@ +/target diff --git a/tools/sctrace/Cargo.lock b/tools/sctrace/Cargo.lock new file mode 100644 index 000000000..215f436e4 --- /dev/null +++ b/tools/sctrace/Cargo.lock @@ -0,0 +1,343 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "anstream" +version = "0.6.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ae563653d1938f79b1ab1b5e668c87c76a9930414574a6583a7b7e11a8e6192" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" + +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys", +] + +[[package]] +name = "bitflags" +version = "2.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" + +[[package]] +name = "cfg-if" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" + +[[package]] +name = "clap" +version = "4.5.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7eac00902d9d136acd712710d71823fb8ac8004ca445a89e73a41d45aa712931" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ad9bbf750e73b5884fb8a211a9424a1906c1e156724260fdae972f31d70e1d6" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbfd7eae0b0f1a6e63d4b13c9c478de77c2eb546fba158ad50b4203dc24b9f9c" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" + +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + +[[package]] +name = "libc" +version = "0.2.175" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543" + +[[package]] +name = "memchr" +version = "2.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "nix" +version = "0.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2eb04e9c688eff1c89d72b407f168cf79bb9e867a9d3323ed6c01519eb9cc053" +dependencies = [ + "bitflags", + "cfg-if", + "libc", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "once_cell_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" + +[[package]] +name = "proc-macro2" +version = "1.0.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23d7fd106d8c02486a8d64e778353d1cffe08ce79ac2e82f540c86d0facf6912" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b9458fa0bfeeac22b5ca447c63aaf45f28439a709ccd244698632f9aa6394d6" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" + +[[package]] +name = "sctrace" +version = "0.1.0" +dependencies = [ + "clap", + "nix", + "nom", + "regex", +] + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "windows-link" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.53.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" + +[[package]] +name = "windows_i686_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" + +[[package]] +name = "windows_i686_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" diff --git a/tools/sctrace/Cargo.toml b/tools/sctrace/Cargo.toml new file mode 100644 index 000000000..7d222b7a1 --- /dev/null +++ b/tools/sctrace/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "sctrace" +version = "0.16.1" +edition = "2024" +description = "Syscall Compatibility Tracer (sctrace) that analyzes and validates system call against System Call Matching Language (SCML)" +license = "MPL-2.0" +readme = "README.md" +repository = "https://github.com/asterinas/asterinas" + +[lib] +name = "sctrace" +path = "src/lib.rs" + +[dependencies] +clap = { version = "4.5.47", features = ["derive"] } +regex = "1.11.2" +nom = "7" +nix = { version = "0.27", features = ["process"] } + +[lints] +workspace = true diff --git a/tools/sctrace/README.md b/tools/sctrace/README.md new file mode 100644 index 000000000..628b765e8 --- /dev/null +++ b/tools/sctrace/README.md @@ -0,0 +1,189 @@ +# Syscall Compatibility Tracer + +Syscall Compatibility Tracer (`sctrace`) is a powerful system call compatibility +verification tool that analyzes and validates system call against user-defined +patterns. Written in +[SCML (System Call Matching Language)](https://asterinas.github.io/book/kernel/linux-compatibility/syscall-feature-coverage/system-call-matching-language.html), +these patterns describe supported functionality of system calls. +`sctrace` supports both real-time monitoring of running programs and post-analysis of +existing trace logs, providing comprehensive insights into system call compatibility +with intuitive pattern matching and visual feedback. + +## Features + +- **Pattern-based filtering**: Define system call patterns using SCML syntax +- **Dual mode operation**: + - Online mode: Real-time tracing of running programs + - Offline mode: Analysis of existing strace log files +- **Multi-threaded support**: Automatic handling of multi-threaded program traces with syscall reconstruction. +When tracing multi-threaded programs, strace may split system calls across multiple lines due to thread interleaving. +`sctrace` automatically handles this reconstruction. +- **Multiple SCML files support**: Specify multiple `.scml` files as arguments to load all of them. +Each file maintains its own scope for bitflags and struct definitions, preventing cross-file pollution. + +## How to build and install + +### Prerequisites + +- [**strace**](https://strace.io/) version 5.15 or higher (for online mode) + - Debian/Ubuntu: `sudo apt install strace` + - Fedora/RHEL: `sudo dnf install strace` +- Rust toolchain + +### Build instructions + +Make sure you have Rust installed, then build the project: + +```bash +cargo build --release +``` + +The binary will be available at `target/release/sctrace`. + +### Installation instructions + +To install the binary (for example, to `/usr/local/bin`), +you can use: + +```bash +sudo cp target/release/sctrace /usr/local/bin/ +``` + +Or you can install from `crates.io` directly (Recommended): + +```bash +cargo install sctrace +``` + +This will automatically download, build, and install the latest version of `sctrace`. + +## Usage + +### Basic Syntax + +```bash +sctrace [SCML_FILE2 ...] [OPTIONS] -- [program] [args...] +``` + +### Options + +- `--input `: Specify input file for offline mode +- `--quiet`: Enable quiet mode (only show unsupported calls) + +### Online Mode (Real-time tracing) + +Trace a program in real-time: + +```bash +sctrace pattern1.scml pattern2.scml -- ls -la +sctrace file_ops.scml network.scml --quiet -- ./my_program arg1 arg2 +``` + +### Offline Mode (Log file analysis) + +Analyze an existing strace log file: + +```bash +sctrace pattern1.scml pattern2.scml --input trace.log +sctrace file_ops.scml network.scml --input trace.log --quiet +``` + +**Note**: When generating strace logs for offline analysis, use `-yy` and `-f` flags: + +```bash +strace -yy -f -o trace.log ls -la +``` + +- `-yy`: Print paths associated with file descriptor arguments +- `-f`: Trace child processes created by fork/vfork/clone + +## Examples + +### Example 1: Basic File Operations + +Create `file_ops.scml`: +```scml +openat(dirfd, flags = O_RDONLY | O_WRONLY | O_RDWR, mode); +read(fd, buf, count = ); +write(fd, buf, count = ); +close(fd); +``` + +Run: +```bash +sctrace file_ops.scml -- cat /etc/passwd +``` + +### Example 2: Network Operations + +Create `network.scml`: +```scml +socket(domain = AF_INET | AF_INET6, type = SOCK_STREAM | SOCK_DGRAM, protocol); +connect(sockfd, addr, addrlen); +send(sockfd, buf, len, flags); +recv(sockfd, buf, len, flags); +``` + +Run: +```bash +sctrace network.scml -- curl http://example.com +``` + +### Example 3: Using Asterinas Compatibility Patterns + +Use the provided directory [syscall-feature-coverage](../../book/src/kernel/linux-compatibility/syscall-feature-coverage) (work in progress) and +test with various commands: + +```bash +# Monitor file system operations +sctrace $(find . -name "*.scml") -- tree . + +# Monitor process information calls +sctrace $(find . -name "*.scml") -- top + +# Monitor network operations +sctrace $(find . -name "*.scml") -- ping 127.0.0.1 +``` + +### Example 4: Offline Analysis + +```bash +# Generate trace log +strace -yy -f -o trace.log ls -la + +# Analyze with sctrace +sctrace patterns.scml --input trace.log +``` + +## Output + +`sctrace` provides colored output to distinguish between supported and unsupported system calls: + +- **Supported calls**: Normal output (or hidden in quiet mode) +- **Unsupported calls**: Highlighted in red with "unsupported" message + +### Example Output + +``` +openat(AT_FDCWD, "/etc/passwd", O_RDONLY) = 3 +read(3, "root:x:0:0:root:/root:/bin/bash\n"..., 4096) = 1234 +close(3) = 0 +chmod("/tmp/test", 0755) (unsupported) +``` + +## Dependencies + +- `clap`: Command-line argument parsing +- `regex`: Regular expression support +- `nom`: Parser combinator library +- `nix`: Unix system interface for process management + +## Troubleshooting + +### Permission Issues + +For online tracing, you may need elevated privileges: + +```bash +sudo sctrace patterns.scml -- target_program +``` diff --git a/tools/sctrace/src/lib.rs b/tools/sctrace/src/lib.rs new file mode 100644 index 000000000..0bf7db0b7 --- /dev/null +++ b/tools/sctrace/src/lib.rs @@ -0,0 +1,250 @@ +// SPDX-License-Identifier: MPL-2.0 + +//! # Sctrace - Syscall Compatibility Tracer +//! +//! `sctrace` is a library for analyzing `strace` logs against SCML (Syscall Matching Language) +//! patterns to determine syscall compatibility. It provides APIs for parsing `strace` output, +//! matching syscalls against pattern specifications, and reporting compatibility results. +//! +//! ## Examples +//! +//! ```no_run +//! use sctrace::{CliReporterBuilder, Patterns, SctraceBuilder, StraceLogStream}; +//! +//! fn main() -> Result<(), String> { +//! let scml_files = vec!["patterns1.scml", "patterns2.scml"]; +//! +//! let sctrace = SctraceBuilder::new() +//! .strace(StraceLogStream::open_file("strace.log")?) +//! .patterns(Patterns::from_scml_files(&scml_files)?) +//! .reporter(CliReporterBuilder::new().quiet().build()) +//! .build(); +//! +//! let _ = sctrace.run(); +//! Ok(()) +//! } +//! ``` +mod scml_matcher; +mod scml_parser; +mod strace_parser; +mod trace; + +use std::io::Lines; + +use scml_matcher::Matcher; +pub use scml_parser::Patterns; +use strace_parser::{StraceParseError, Syscall}; +pub use trace::StraceLogStream; + +/// Builder for creating an `Sctrace` instance. +pub struct SctraceBuilder<'a> { + log_stream: Option, + patterns: Option>, + reporter: Option, +} + +impl<'a> SctraceBuilder<'a> { + /// Creates a new `SctraceBuilder` instance. + pub fn new() -> Self { + Self { + log_stream: None, + patterns: None, + reporter: None, + } + } + + /// Sets the strace log stream for the tracer. + pub fn strace(mut self, stream: StraceLogStream) -> Self { + self.log_stream = Some(stream); + self + } + + /// Sets the SCML patterns for the tracer. + pub fn patterns(mut self, patterns: Patterns<'a>) -> Self { + self.patterns = Some(patterns); + self + } + + /// Sets the reporter for the tracer. + pub fn reporter(mut self, reporter: CliReporter) -> Self { + self.reporter = Some(reporter); + self + } + + /// Builds the `Sctrace` instance with the specified components. + pub fn build(self) -> Sctrace<'a> { + Sctrace { + strace_iter: self.log_stream.expect("`log_stream` is required").lines(), + patterns: self.patterns.expect("`patterns` is required"), + reporter: self.reporter.expect("`reporter` is required"), + } + } +} + +impl Default for SctraceBuilder<'_> { + fn default() -> Self { + Self::new() + } +} + +/// The high-level API of syscall compatibility tracer. +pub struct Sctrace<'a> { + strace_iter: Lines, + patterns: Patterns<'a>, + reporter: CliReporter, +} + +impl Sctrace<'_> { + /// Runs the syscall trace analysis. + pub fn run(mut self) -> Result>, String> { + let matcher = Matcher::new(self.patterns.clone()); + + for line_result in &mut self.strace_iter { + let line = line_result.map_err(|e| format!("Failed to read trace line: {}", e))?; + + match Syscall::fetch(line) { + Ok(line) => match Syscall::parse(&line) { + Ok(syscall) => { + if matcher.match_syscall(&syscall).is_some() { + self.reporter.report_supported(&syscall); + } else { + self.reporter.report_unsupported(&syscall); + } + } + Err(_) => { + self.reporter.report_parse_error(&line); + } + }, + Err(e) => { + match e { + StraceParseError::BlockedLine + | StraceParseError::SignalLine + | StraceParseError::ExitLine + | StraceParseError::EmptyLine => { + // Ignore blocked, signal, exit and empty lines + continue; + } + _ => { + panic!("Unexpected error: {}", e); + } + } + } + } + } + + Ok(self.reporter.outputs()) + } +} + +/// Builder for creating a `CliReporter` with customizable settings. +pub struct CliReporterBuilder { + quiet: bool, + collect: bool, +} + +impl CliReporterBuilder { + /// Creates a new `CliReporterBuilder` instance. + pub fn new() -> Self { + Self { + quiet: false, + collect: false, + } + } + + /// Enables quiet mode, suppressing supported syscall output. + pub fn quiet(mut self) -> Self { + self.quiet = true; + self + } + + /// Sets quiet mode. + pub fn set_quiet(mut self, quiet: bool) -> Self { + self.quiet = quiet; + self + } + + /// Enables collection of output strings instead of printing to stdout/stderr. + pub fn collect(mut self) -> Self { + self.collect = true; + self + } + + /// Sets collection mode. + pub fn set_collect(mut self, collect: bool) -> Self { + self.collect = collect; + self + } + + /// Builds the `CliReporter` instance with the specified settings. + pub fn build(self) -> CliReporter { + CliReporter::new( + self.quiet, + if self.collect { Some(Vec::new()) } else { None }, + ) + } +} + +impl Default for CliReporterBuilder { + fn default() -> Self { + Self::new() + } +} + +/// Reporter for outputting syscall trace analysis results. +pub struct CliReporter { + quiet: bool, + outputs: Option>, +} + +macro_rules! report { + ($vec:expr, err: $($arg:tt)*) => { + if let Some(ref mut vec) = $vec { + (&mut *vec).push(format!($($arg)*)); + } else { + eprintln!($($arg)*); + } + }; + ($vec:expr, $($arg:tt)*) => { + if let Some(ref mut vec) = $vec { + (&mut *vec).push(format!($($arg)*)); + } else { + println!($($arg)*); + } + }; +} + +impl CliReporter { + fn new(quiet: bool, outputs: Option>) -> Self { + Self { quiet, outputs } + } + + fn report_supported(&mut self, syscall: &Syscall) { + if !self.quiet { + report!(self.outputs, "{}", syscall.original_line()); + } + } + + fn report_unsupported(&mut self, syscall: &Syscall) { + if self.quiet { + report!( + self.outputs, + err: "Unsupported syscall: {}", + syscall.original_line() + ); + } else { + report!( + self.outputs, + err: "{} \x1b[31m(unsupported)\x1b[0m", + syscall.original_line() + ); + } + } + + fn report_parse_error(&mut self, line: &str) { + report!(self.outputs, err: "Strace Parse Error: {}", line); + } + + fn outputs(self) -> Option> { + self.outputs + } +} diff --git a/tools/sctrace/src/main.rs b/tools/sctrace/src/main.rs new file mode 100644 index 000000000..22e3a88eb --- /dev/null +++ b/tools/sctrace/src/main.rs @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: MPL-2.0 + +mod parameter; + +use std::env; + +use parameter::Parameters; +use sctrace::{CliReporterBuilder, Patterns, SctraceBuilder, StraceLogStream}; + +fn run() -> Result<(), String> { + let args: Vec = env::args().collect(); + let params = Parameters::new(args)?; + + let sctrace = SctraceBuilder::new() + .patterns(Patterns::from_scml_files(params.scml_paths())?) + .strace(if params.offline() { + StraceLogStream::open_file(params.input_path()) + } else { + StraceLogStream::run_cmd(params.program_path(), params.program_args()) + }?) + .reporter(CliReporterBuilder::new().set_quiet(params.quiet()).build()) + .build(); + + let _ = sctrace.run(); + Ok(()) +} + +fn main() { + if let Err(err) = run() { + eprintln!("{}", err); + std::process::exit(1); + } +} diff --git a/tools/sctrace/src/parameter.rs b/tools/sctrace/src/parameter.rs new file mode 100644 index 000000000..f6d15b6f4 --- /dev/null +++ b/tools/sctrace/src/parameter.rs @@ -0,0 +1,250 @@ +// SPDX-License-Identifier: MPL-2.0 + +use std::{error::Error, fmt}; + +use clap::{Arg, ArgAction, Command}; + +/// Holds parsed command-line parameters for the sctrace tool. +#[derive(Debug)] +pub(crate) struct Parameters { + /// Path to the SCML pattern files + scml_paths: Vec, + /// Path to the input strace log file (offline mode) + input_path: String, + /// Whether quiet mode is enabled + quiet: bool, + /// Path to the program to trace (online mode) + program_path: String, + /// Arguments to pass to the program (online mode) + program_args: Vec, +} + +#[derive(Debug)] +pub(crate) struct ParameterError { + message: String, +} + +impl fmt::Display for ParameterError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.message) + } +} + +impl From for String { + fn from(err: ParameterError) -> Self { + err.to_string() + } +} + +impl Error for ParameterError {} + +impl Parameters { + /// Creates a new Parameters instance from command line arguments. + /// + /// ```text + /// sctrace [SCML_PATH2...] [--input FILE] [--quiet] [-- PROGRAM] [PROGRAM_ARGS...] + /// ``` + pub(crate) fn new(args: Vec) -> Result { + let app = Command::new("sctrace") + .about("Syscall Compatibility Tracer (sctrace) - trace and validate syscalls against SCML patterns") + .version(env!("CARGO_PKG_VERSION")) + .after_help("EXAMPLES:\n \ + Offline mode: sctrace patterns.scml --input trace.log\n \ + Online mode: sctrace patterns.scml -- /bin/ls -la") + .arg( + Arg::new("scml_paths") + .help("Path(s) to SCML file(s)") + .required(true) + .num_args(1..) + .value_name("SCML_PATHS"), + ) + .arg( + Arg::new("input") + .long("input") + .help("Input file path") + .value_name("FILE") + .action(ArgAction::Set), + ) + .arg( + Arg::new("quiet") + .long("quiet") + .help("Enable quiet mode") + .action(ArgAction::SetTrue), + ) + .arg( + Arg::new("program") + .help("Program to execute and its arguments (use -- to separate)") + .num_args(0..) + .value_name("PROGRAM") + .last(true) + .allow_hyphen_values(true), + ); + + let matches = app.try_get_matches_from(args).map_err(|e| ParameterError { + message: e.to_string(), + })?; + + let scml_paths: Vec = matches + .get_many::("scml_paths") + .map(|values| values.cloned().collect()) + .unwrap_or_default(); + + let input_path = matches + .get_one::("input") + .unwrap_or(&String::new()) + .clone(); + let quiet = matches.get_flag("quiet"); + + let mut program_iter = matches + .get_many::("program") + .map(|values| values.cloned()) + .into_iter() + .flatten(); + + let program_path = program_iter.next().unwrap_or_default(); + let program_args: Vec = program_iter.collect(); + + let has_input = !input_path.is_empty(); + let has_program = !program_path.is_empty(); + + if has_input && has_program { + return Err(ParameterError { + message: "Cannot specify both --input and program arguments".to_string(), + }); + } + + if !has_input && !has_program { + return Err(ParameterError { + message: "Must specify either --input or program arguments".to_string(), + }); + } + + Ok(Parameters { + scml_paths, + input_path, + quiet, + program_path, + program_args, + }) + } + + pub(crate) fn scml_paths(&self) -> &Vec { + &self.scml_paths + } + + pub(crate) fn input_path(&self) -> &str { + &self.input_path + } + + pub(crate) fn quiet(&self) -> bool { + self.quiet + } + + pub(crate) fn offline(&self) -> bool { + !self.input_path.is_empty() + } + + pub(crate) fn program_path(&self) -> &str { + &self.program_path + } + + pub(crate) fn program_args(&self) -> Vec<&str> { + self.program_args.iter().map(String::as_str).collect() + } +} + +#[cfg(test)] +mod tests { + use std::vec; + + use super::*; + + fn to_string_vec(args: &[&str]) -> Vec { + args.iter().map(|s| s.to_string()).collect() + } + + #[test] + fn test_new_offline_mode() { + let args = to_string_vec(&["sctrace", "test.scml", "--input", "input.txt"]); + let params = Parameters::new(args).unwrap(); + assert_eq!(params.scml_paths, vec!["test.scml"]); + assert_eq!(params.input_path(), "input.txt"); + assert_eq!(params.quiet(), false); + assert_eq!(params.program_path(), ""); + assert!(params.program_args().is_empty()); + assert!(params.offline()); + } + + #[test] + fn test_new_online_mode() { + let args = to_string_vec(&["sctrace", "test.scml", "--", "my_program", "arg1", "arg2"]); + let params = Parameters::new(args).unwrap(); + assert_eq!(params.scml_paths, vec!["test.scml"]); + assert_eq!(params.input_path(), ""); + assert_eq!(params.quiet(), false); + assert_eq!(params.program_path(), "my_program"); + assert_eq!(params.program_args(), &["arg1", "arg2"]); + assert!(!params.offline()); + } + + #[test] + fn test_new_online_mode_no_args() { + let args = to_string_vec(&["sctrace", "test.scml", "--", "my_program"]); + let params = Parameters::new(args).unwrap(); + assert_eq!(params.scml_paths, vec!["test.scml"]); + assert_eq!(params.program_path(), "my_program"); + assert!(params.program_args().is_empty()); + assert!(!params.offline()); + } + + #[test] + fn test_new_quiet_mode() { + let args = to_string_vec(&["sctrace", "test.scml", "--quiet", "--input", "input.txt"]); + let params = Parameters::new(args).unwrap(); + assert!(params.quiet()); + } + + #[test] + fn test_new_missing_scml_file() { + let args = to_string_vec(&["sctrace", "--input", "input.txt"]); + let params = Parameters::new(args); + assert!(params.is_err()); + } + + #[test] + fn test_new_missing_input_and_program() { + let args = to_string_vec(&["sctrace", "test.scml"]); + let result = Parameters::new(args); + assert!(result.is_err()); + assert_eq!( + result.err().unwrap().to_string(), + "Must specify either --input or program arguments" + ); + } + + #[test] + fn test_many_scml_paths() { + let args = to_string_vec(&["sctrace", "--input", "in.txt", "a.scml", "b.scml", "c.scml"]); + let params = Parameters::new(args).unwrap(); + assert_eq!(params.scml_paths, vec!["a.scml", "b.scml", "c.scml"]); + + let args = to_string_vec(&["sctrace", "a.scml", "b.scml", "c.scml", "--", "prog", "arg"]); + let params = Parameters::new(args).unwrap(); + assert_eq!(params.scml_paths, vec!["a.scml", "b.scml", "c.scml"]); + assert_eq!(params.program_path(), "prog"); + assert_eq!(params.program_args(), &["arg"]); + } + + #[test] + fn test_new_both_input_and_program() { + let args = to_string_vec(&[ + "sctrace", "--input", "in.txt", "a.scml", "b.scml", "c.scml", "--", "prog", + ]); + let result = Parameters::new(args); + assert!(result.is_err()); + assert_eq!( + result.err().unwrap().to_string(), + "Cannot specify both --input and program arguments" + ); + } +} diff --git a/tools/sctrace/src/scml_matcher.rs b/tools/sctrace/src/scml_matcher.rs new file mode 100644 index 000000000..96559d2df --- /dev/null +++ b/tools/sctrace/src/scml_matcher.rs @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: MPL-2.0 + +use crate::{ + scml_parser::{ + ParserCtx, Pattern, PatternArg, PatternArray, PatternFlagSet, PatternStruct, Patterns, + }, + strace_parser::{Syscall, SyscallArg, SyscallArray, SyscallFlagSet, SyscallStruct}, +}; + +/// Matcher for syscalls against SCML patterns. +pub(crate) struct Matcher<'a> { + patterns: Patterns<'a>, +} + +impl<'a> Matcher<'a> { + pub(crate) fn new(patterns: Patterns<'a>) -> Self { + Self { patterns } + } + + /// Attempts to match a syscall against available patterns. + pub(crate) fn match_syscall(&self, syscall: &Syscall) -> Option<&Pattern<'_>> { + let related_patterns = self.patterns.get(syscall.name())?; + + related_patterns + .iter() + .find(|pattern| Self::matches(self.patterns.ctx(), syscall, pattern)) + } + + /// Matches a syscall against a specific pattern. + fn matches(ctx: &ParserCtx, syscall: &Syscall, pattern: &Pattern) -> bool { + assert_eq!(syscall.name(), pattern.name()); + + // Argument count must match exactly if wildcard is not enabled + if !pattern.wildcard() && syscall.args().len() != pattern.args().len() { + return false; + } + + // syscall arguments must be at least as many as pattern arguments + if syscall.args().len() < pattern.args().len() { + return false; + } + + // Match each argument pair + for (syscall_arg, pattern_arg) in syscall.args().iter().zip(pattern.args().iter()) { + if !Self::matches_arg(ctx, syscall_arg, pattern_arg) { + return false; + } + } + + true + } + + /// Matches a single syscall argument against a pattern argument. + fn matches_arg(ctx: &ParserCtx, syscall_arg: &SyscallArg, pattern_arg: &PatternArg) -> bool { + match pattern_arg { + // Matches any syscall argument + PatternArg::None => true, + + // Type constraint - matches only integer arguments + PatternArg::Integer => matches!(syscall_arg, SyscallArg::Integer(_)), + + // Path matching not yet implemented + PatternArg::Path => todo!("Path matching not implemented yet"), + + // Flag set matching - supports both zero values and flag combinations + PatternArg::Flags(pattern_flags) => match syscall_arg { + // Special case: integer zero matches any flag pattern + SyscallArg::Integer(value) if *value == "0" => true, + // Validate that all syscall flags are present in pattern + SyscallArg::Flags(syscall_flags) => { + Self::matches_flags(ctx, syscall_flags, pattern_flags) + } + _ => false, + }, + + // Array matching - recursive element validation + PatternArg::Array(pattern_array) => match syscall_arg { + SyscallArg::Array(syscall_array) => { + Self::matches_array(ctx, syscall_array, pattern_array) + } + // Handle NULL array case - single Flag("NULL") matches any array pattern + SyscallArg::Flags(flag_set) if flag_set.flags().len() == 1 => { + matches!( + flag_set.flags().first(), + Some(SyscallArg::Flag(flag_name)) if *flag_name == "NULL" + ) + } + _ => false, + }, + + // Struct matching - field-by-field validation with wildcard support + PatternArg::Struct(pattern_struct) => match syscall_arg { + SyscallArg::Struct(syscall_struct) => { + Self::matches_struct(ctx, syscall_struct, pattern_struct) + } + // Handle NULL pointer case - single Flag("NULL") matches any struct pattern + SyscallArg::Flags(flag_set) if flag_set.flags().len() == 1 => { + matches!( + flag_set.flags().first(), + Some(SyscallArg::Flag(flag_name)) if *flag_name == "NULL" + ) + } + _ => false, + }, + + // Multiple struct alternatives - matches if any alternative matches + PatternArg::MultipleStruct(pattern_structs) => match syscall_arg { + SyscallArg::Struct(syscall_struct) => { + pattern_structs.structs().iter().any(|pattern_struct| { + if let PatternArg::Struct(pattern_struct) = pattern_struct { + Self::matches_struct(ctx, syscall_struct, pattern_struct) + } else { + panic!("Expected PatternArg::Struct inside MultipleStruct"); + } + }) + } + // Handle NULL pointer case - single Flag("NULL") matches any struct pattern + SyscallArg::Flags(flag_set) if flag_set.flags().len() == 1 => { + matches!( + flag_set.flags().first(), + Some(SyscallArg::Flag(flag_name)) if *flag_name == "NULL" + ) + } + _ => false, + }, + + PatternArg::FlagsVariable(_) | PatternArg::StructVariable(_) => { + let expanded = pattern_arg.get(ctx); + Self::matches_arg(ctx, syscall_arg, expanded) + } + + // Single flags should not appear in patterns + PatternArg::Flag(_) => { + panic!( + "Single Flag should not appear in pattern matching - should be wrapped in Flags" + ) + } + } + } + + fn matches_flags( + ctx: &ParserCtx, + syscall_flags: &SyscallFlagSet, + pattern_flags: &PatternFlagSet, + ) -> bool { + // Every syscall flag must find a match in the pattern flags + syscall_flags + .flags() + .iter() + .all(|syscall_flag| Self::matches_flag(ctx, syscall_flag, pattern_flags)) + } + + /// Matches a single syscall flag against a pattern flag set. + fn matches_flag( + ctx: &ParserCtx, + syscall_flag: &SyscallArg, + pattern_flags: &PatternFlagSet, + ) -> bool { + match syscall_flag { + SyscallArg::Flag(_) => (), + _ => { + panic!("Syscall flag must be of type Flag for flag matching"); + } + }; + + let mut found_match = false; + + for pattern_flag in pattern_flags.flags() { + match (syscall_flag, pattern_flag) { + // Named flag matching - exact name comparison + (SyscallArg::Flag(syscall_name), PatternArg::Flag(pattern_name)) => { + if syscall_name == pattern_name { + found_match = true; + break; + } + } + + // Integer flag matching - pattern accepts any integer + (SyscallArg::Integer(_), PatternArg::Integer) => { + found_match = true; + break; + } + + // Flags variable - expand and attempt matching + (_, PatternArg::FlagsVariable(_)) => { + if let PatternArg::Flags(expanded) = pattern_flag.get(ctx) { + found_match = Self::matches_flag(ctx, syscall_flag, expanded); + if found_match { + break; + } + } + } + _ => {} + } + } + + found_match + } + + fn matches_array( + ctx: &ParserCtx, + syscall_array: &SyscallArray, + pattern_array: &PatternArray, + ) -> bool { + syscall_array.elements().iter().all(|syscall_element| { + pattern_array + .args() + .iter() + .any(|pattern_element| Self::matches_arg(ctx, syscall_element, pattern_element)) + }) + } + + fn matches_struct( + ctx: &ParserCtx, + syscall_struct: &SyscallStruct, + pattern_struct: &PatternStruct, + ) -> bool { + // Without wildcard, field count must match exactly + if !pattern_struct.wildcard() + && syscall_struct.fields().len() != pattern_struct.fields().len() + { + return false; + } + + // Every pattern field must be found and matched in the syscall struct + for (pattern_field_name, pattern_field_value) in pattern_struct.fields() { + match syscall_struct.get_value(pattern_field_name) { + Some(syscall_field_value) => { + if !Self::matches_arg(ctx, syscall_field_value, pattern_field_value) { + return false; + } + } + None => { + // Required pattern field not found in syscall + return false; + } + } + } + + true + } +} diff --git a/tools/sctrace/src/scml_parser.rs b/tools/sctrace/src/scml_parser.rs new file mode 100644 index 000000000..67aa1c0e2 --- /dev/null +++ b/tools/sctrace/src/scml_parser.rs @@ -0,0 +1,824 @@ +// SPDX-License-Identifier: MPL-2.0 + +use std::{collections::HashMap, error::Error, fmt, fs, path::Path}; + +use nom::{ + IResult, + branch::alt, + bytes::complete::{tag, take_while}, + character::complete::{char, multispace0}, + combinator::{opt, recognize}, + multi::{separated_list0, separated_list1}, + sequence::{delimited, pair, preceded}, +}; + +#[derive(Debug, Clone)] +pub enum ScmlParseError { + IoError(String), + ParseError(String), + IncompleteStatement(String), +} + +impl fmt::Display for ScmlParseError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ScmlParseError::IoError(msg) => write!(f, "I/O error: {}", msg), + ScmlParseError::ParseError(msg) => write!(f, "Parsing error: {}", msg), + ScmlParseError::IncompleteStatement(stmt) => { + write!(f, "Incomplete statement: {}", stmt) + } + } + } +} + +impl From for String { + fn from(err: ScmlParseError) -> Self { + err.to_string() + } +} + +impl Error for ScmlParseError {} + +/// Collection of syscall patterns parsed from SCML files. +#[derive(Debug, Clone, PartialEq)] +pub struct Patterns<'a> { + /// Map from syscall names to their associated patterns. + patterns: HashMap<&'a str, Vec>>, + + /// Parser context with variable definitions. + ctx: ParserCtx<'a>, +} + +impl<'a> Patterns<'a> { + /// Reads SCML file(s) and parses all pattern and variable definitions. + pub fn from_scml_files>(paths: &Vec

) -> Result { + let mut all_patterns = Patterns::default(); + + for path in paths { + let patterns = { + let content = fs::read_to_string(path).map_err(|e| { + ScmlParseError::IoError(format!( + "Failed to read file '{}': {}", + path.as_ref().display(), + e + )) + })?; + // Using the file path as variable prefix to avoid name clashes + Self::from_scml_with_var_prefix(path.as_ref().display().to_string(), &content)? + }; + + all_patterns.merge(patterns); + } + + Ok(all_patterns) + } + + /// Parses SCML content into patterns. + pub fn from_scml(content: &str) -> Result { + Self::from_scml_with_var_prefix("".to_string(), content) + } + + /// Retrieves all patterns for a specific syscall name. + pub(crate) fn get(&self, name: &str) -> Option<&Vec>> { + self.patterns.get(name) + } + + /// Returns the parser context with variable definitions. + pub(crate) fn ctx(&self) -> &ParserCtx<'_> { + &self.ctx + } + + fn new(patterns: HashMap<&'a str, Vec>>, ctx: ParserCtx<'a>) -> Self { + Self { patterns, ctx } + } +} + +impl Default for Patterns<'_> { + fn default() -> Self { + Self::new(HashMap::new(), ParserCtx::new()) + } +} + +#[derive(Debug, Clone, PartialEq)] +pub(crate) struct Pattern<'a> { + /// Name of the pattern (corresponds to syscall name). + name: &'a str, + + /// Ordered list of argument patterns for this syscall. + args: Vec>, + + /// Whether this pattern accepts additional unspecified arguments. + wildcard: bool, +} + +impl<'a> Pattern<'a> { + pub(crate) fn name(&self) -> &'a str { + self.name + } + + pub(crate) fn args(&self) -> &Vec> { + &self.args + } + + pub(crate) fn wildcard(&self) -> bool { + self.wildcard + } + + fn new(name: &'a str, args: Vec>, wildcard: bool) -> Self { + Self { + name, + args, + wildcard, + } + } + + fn parse(ctx: &ParserCtxBuilder<'a>, input: &'a str) -> IResult<&'a str, Pattern<'a>> { + let (input, _) = multispace0(input)?; + let (input, name) = identifier(input)?; + let (input, _) = multispace0(input)?; + let (input, _) = char('(')(input)?; + let (input, _) = multispace0(input)?; + let (input, args) = opt(|i| Self::parse_param_list(ctx, i))(input)?; + let (input, _) = multispace0(input)?; + let (input, _) = opt(char(','))(input)?; // Allow trailing comma + let (input, has_wildcard) = opt(preceded( + multispace0, + delimited(multispace0, tag(".."), multispace0), + ))(input)?; + let (input, _) = multispace0(input)?; + let (input, _) = char(')')(input)?; + let (input, _) = char(';')(input)?; + + let args = args.unwrap_or_default(); + Ok((input, Pattern::new(name, args, has_wildcard.is_some()))) + } + + fn parse_definition(ctx: &mut ParserCtxBuilder<'a>, input: &'a str) -> IResult<&'a str, ()> { + if let Ok((input, (name, flags))) = Self::parse_flags_definition(ctx, input) { + ctx.insert_flags_variable(name, flags); + return Ok((input, ())); + } + + if let Ok((input, (name, struct_def))) = Self::parse_struct_definition(ctx, input) { + ctx.insert_struct_variable(name, struct_def); + return Ok((input, ())); + } + + ctx.set_last_struct(None); + + Err(nom::Err::Error(nom::error::Error::new( + input, + nom::error::ErrorKind::Tag, + ))) + } +} + +#[derive(Debug, Clone, PartialEq)] +pub(crate) enum PatternArg<'a> { + /// Matches all values (no constraint). + None, + + /// Matches integer arguments. + Integer, + + /// Matches path arguments. + Path, + + /// Matches specific flag values like `O_RDONLY`. + Flag(&'a str), + + /// Matches array arguments where each element matches the corresponding + /// pattern in the array. + Array(PatternArray<'a>), + + /// Matches struct arguments with specified field constraints. + Struct(PatternStruct<'a>), + + /// Matches if any of the struct patterns match. Used when a struct + /// variable is defined multiple times with different field combinations. + MultipleStruct(PatternMultipleStruct<'a>), + + /// Matches flag combinations like `O_RDWR | O_CREAT`. + Flags(PatternFlagSet<'a>), + + /// Contains the variable ID that resolves to a `PatternFlagSet`. + FlagsVariable(&'a str), + + /// Contains the variable ID that resolves to a `PatternStruct` + /// or `PatternMultipleStruct`. + StructVariable(&'a str), +} + +impl PatternArg<'_> { + /// Dereferences `FlagsVariable` and `StructVariable` types to + /// retrieve their actual pattern definitions from the parser context. + pub(crate) fn get<'b>(&self, ctx: &'b ParserCtx) -> &'b PatternArg<'b> { + match self { + PatternArg::FlagsVariable(id) => ctx.flags_lookup(id).unwrap(), + + PatternArg::StructVariable(id) => ctx.struct_lookup(id).unwrap(), + + _ => { + panic!("get() can only be called on variable reference types"); + } + } + } +} + +/// An array pattern matches array arguments where each element must match +/// one of the specified patterns in the array. +#[derive(Debug, Clone, PartialEq)] +pub(crate) struct PatternArray<'a>(Vec>); + +impl<'a> PatternArray<'a> { + pub(crate) fn args(&self) -> &Vec> { + &self.0 + } + + fn new(args: Vec>) -> Self { + Self(args) + } +} + +/// A flag set pattern matches bitwise OR combinations of flags. +#[derive(Debug, Clone, PartialEq)] +pub(crate) struct PatternFlagSet<'a>(Vec>); + +impl<'a> PatternFlagSet<'a> { + pub(crate) fn flags(&self) -> &Vec> { + &self.0 + } + + fn new(flags: Vec>) -> Self { + // Validate that all elements are either Flag or Integer types + for flag in &flags { + match flag { + PatternArg::Flag(_) | PatternArg::Integer | PatternArg::FlagsVariable(_) => { + // Valid flag type + } + _ => { + panic!("PatternFlagSet can only contain Flag or Integer types"); + } + } + } + + Self(flags) + } +} + +/// A struct pattern matches structured data with named fields. It can specify +/// exact field matching or allow additional fields via wildcard. +#[derive(Debug, Clone, PartialEq)] +pub(crate) struct PatternStruct<'a> { + fields: HashMap<&'a str, PatternArg<'a>>, + wildcard: bool, +} + +impl<'a> PatternStruct<'a> { + pub(crate) fn fields(&self) -> &HashMap<&'a str, PatternArg<'a>> { + &self.fields + } + + pub(crate) fn wildcard(&self) -> bool { + self.wildcard + } + + fn new(fields: HashMap<&'a str, PatternArg<'a>>, wildcard: bool) -> Self { + Self { fields, wildcard } + } +} + +/// A multiple struct pattern allows defining a struct variable multiple times +/// with different field combinations. The pattern matches if any of the alternatives +/// match. +#[derive(Debug, Clone, PartialEq)] +pub(crate) struct PatternMultipleStruct<'a>(Vec>); + +impl<'a> PatternMultipleStruct<'a> { + pub(crate) fn structs(&self) -> &Vec> { + &self.0 + } + + fn new(structs: Vec>) -> Self { + for struct_arg in &structs { + match struct_arg { + PatternArg::Struct(_) => { + // Valid struct type + } + _ => { + panic!("PatternMultipleStruct can only contain Struct types"); + } + } + } + Self(structs) + } +} + +#[derive(Debug, Clone, PartialEq)] +pub(crate) struct ParserCtx<'a> { + /// Map of flags variable IDs to their pattern definitions. + flags_variables: HashMap<&'a str, PatternArg<'a>>, + /// Map of struct variable IDs to their pattern definitions. + struct_variables: HashMap<&'a str, PatternArg<'a>>, + /// Map of multiple struct variable IDs to their pattern definitions. + multiple_struct_variables: HashMap<&'a str, PatternArg<'a>>, +} + +impl<'a> ParserCtx<'a> { + fn new() -> Self { + Self { + flags_variables: HashMap::new(), + struct_variables: HashMap::new(), + multiple_struct_variables: HashMap::new(), + } + } + + fn flags_lookup(&self, id: &str) -> Option<&PatternArg<'a>> { + self.flags_variables.get(id) + } + + fn struct_lookup(&self, id: &str) -> Option<&PatternArg<'a>> { + if let Some(pattern) = self.struct_variables.get(id) { + Some(pattern) + } else if let Some(pattern) = self.multiple_struct_variables.get(id) { + Some(pattern) + } else { + None + } + } +} + +#[derive(Debug, Clone, PartialEq)] +struct ParserCtxBuilder<'a> { + variable_prefix: String, + /// The parser context being built. + parser_ctx: ParserCtx<'a>, + /// Map of named bitflags to their internal IDs. + named_bitflags: HashMap<&'a str, &'a str>, + /// Map of named structs to their internal IDs. + named_structs: HashMap<&'a str, &'a str>, + /// Last parsed struct definition. + last_struct: Option<(&'a str, &'a str)>, + /// Counter for generating unique variable IDs. + counter: usize, +} + +impl<'a> ParserCtxBuilder<'a> { + fn new(name: String) -> Self { + Self { + variable_prefix: name, + parser_ctx: ParserCtx::new(), + named_bitflags: HashMap::new(), + named_structs: HashMap::new(), + last_struct: None, + counter: 0, + } + } + + fn build(self) -> ParserCtx<'a> { + self.parser_ctx + } + + fn generate_variable_id(&mut self) -> &'static str { + self.counter += 1; + Box::leak(Box::new(format!( + "{}/var_{}", + self.variable_prefix, self.counter + ))) + } + + fn flags_variables_mut(&mut self) -> &mut HashMap<&'a str, PatternArg<'a>> { + &mut self.parser_ctx.flags_variables + } + + fn struct_variables_mut(&mut self) -> &mut HashMap<&'a str, PatternArg<'a>> { + &mut self.parser_ctx.struct_variables + } + + fn multiple_struct_variables_mut(&mut self) -> &mut HashMap<&'a str, PatternArg<'a>> { + &mut self.parser_ctx.multiple_struct_variables + } + + fn insert_named_bitflag(&mut self, name: &'a str, id: &'a str) { + self.named_bitflags.insert(name, id); + } + + fn insert_named_struct(&mut self, name: &'a str, id: &'a str) { + self.named_structs.insert(name, id); + } + + fn add_flags_variable(&mut self, id: &'a str, flags: PatternArg<'a>) { + self.flags_variables_mut().insert(id, flags); + } + + fn add_struct_variable(&mut self, id: &'a str, struct_def: PatternArg<'a>) { + self.struct_variables_mut().insert(id, struct_def); + } + + fn add_multiple_struct_variable(&mut self, id: &'a str, struct_def: PatternArg<'a>) { + if self.multiple_struct_variables_mut().contains_key(id) { + self.append_to_multiple_struct(id, struct_def); + } else { + self.convert_struct_to_multiple(id); + self.append_to_multiple_struct(id, struct_def); + } + } + + fn convert_struct_to_multiple(&mut self, id: &'a str) { + if let Some(existing_struct) = self.struct_variables_mut().remove(id) { + let multiple_struct = + PatternArg::MultipleStruct(PatternMultipleStruct::new(vec![existing_struct])); + self.multiple_struct_variables_mut() + .insert(id, multiple_struct); + } else { + panic!("Struct variable should exist for conversion"); + } + } + + fn append_to_multiple_struct(&mut self, id: &str, struct_def: PatternArg<'a>) { + let multiple_struct = self + .parser_ctx + .multiple_struct_variables + .get_mut(id) + .expect("Multiple struct variable should exist"); + + if let PatternArg::MultipleStruct(multi_struct) = multiple_struct { + multi_struct.0.push(struct_def); + } else { + panic!("Expected MultipleStruct variant"); + } + } + + fn set_last_struct(&mut self, value: Option<(&'a str, &'a str)>) { + self.last_struct = value; + } + + fn get_flags_id(&self, name: &str) -> Option<&'a str> { + self.named_bitflags.get(name).copied() + } + + fn get_struct_id(&self, name: &str) -> Option<&'a str> { + self.named_structs.get(name).copied() + } + + fn insert_flags_variable(&mut self, name: &'a str, flags: PatternArg<'a>) { + let id = self.generate_variable_id(); + self.insert_named_bitflag(name, id); + self.add_flags_variable(id, flags); + self.set_last_struct(None); + } + + fn insert_struct_variable(&mut self, name: &'a str, struct_def: PatternArg<'a>) { + if let Some((last_struct_name, last_struct_id)) = &self.last_struct { + // Continuous definition - append as alternative + if &name == last_struct_name { + self.add_multiple_struct_variable(last_struct_id, struct_def); + return; + } + } + + // New struct definition + let id = self.generate_variable_id(); + self.insert_named_struct(name, id); + self.add_struct_variable(id, struct_def); + self.set_last_struct(Some((name, id))); + } +} + +fn identifier(input: &str) -> IResult<&str, &str> { + alt(( + nom::character::complete::digit1, + recognize(pair( + alt((nom::character::complete::alpha1, tag("_"))), + take_while(|c: char| c.is_alphanumeric() || c == '_'), + )), + ))(input) +} + +impl Patterns<'_> { + /// Get patterns from SCML content with a specific variable prefix. + fn from_scml_with_var_prefix( + variable_prefix: String, + content: &str, + ) -> Result { + let stmt_iterator = StatementIterator::new(content); + let mut patterns: HashMap<&str, Vec> = HashMap::new(); + let mut ctx = ParserCtxBuilder::new(variable_prefix); + let mut errors = Vec::new(); + + for stmt in stmt_iterator { + let statement = stmt?; + + if Pattern::parse_definition(&mut ctx, statement).is_ok() { + // Successfully parsed a definition, continue to next statement + continue; + } + + match Pattern::parse(&ctx, statement) { + Ok((remaining, pattern)) => { + if !remaining.trim().is_empty() { + errors.push(format!( + "Warning: Unparsed input remaining in statement '{}': '{}'", + statement, remaining + )); + } + patterns.entry(pattern.name()).or_default().push(pattern); + } + Err(err) => { + errors.push(format!("Error parsing statement '{}': {}", statement, err)); + break; + } + } + } + + if !errors.is_empty() { + return Err(ScmlParseError::ParseError(errors.join("\n"))); + } + + Ok(Self::new(patterns, ctx.build())) + } + + /// Merges another Patterns instance into self. + fn merge(&mut self, other: Self) { + for (syscall_name, mut other_patterns) in other.patterns { + self.patterns + .entry(syscall_name) + .or_default() + .append(&mut other_patterns); + } + + self.ctx.flags_variables.extend(other.ctx.flags_variables); + self.ctx.struct_variables.extend(other.ctx.struct_variables); + self.ctx + .multiple_struct_variables + .extend(other.ctx.multiple_struct_variables); + } +} + +impl<'a> Pattern<'a> { + fn parse_param_list( + ctx: &ParserCtxBuilder<'a>, + input: &'a str, + ) -> IResult<&'a str, Vec>> { + separated_list0(delimited(multispace0, char(','), multispace0), |i| { + Self::parse_param(ctx, i) + })(input) + } + + fn parse_param(ctx: &ParserCtxBuilder<'a>, input: &'a str) -> IResult<&'a str, PatternArg<'a>> { + let (input, _) = multispace0(input)?; + let (input, _) = identifier(input)?; + let (input, _) = multispace0(input)?; + + // Check if parameter has constraint (= value) + if let Ok((input, _)) = char::<&str, nom::error::Error<&str>>('=')(input) { + let (input, _) = multispace0(input)?; + Self::parse_expr(ctx, input) + } else { + // Unconstrained parameter + Ok((input, PatternArg::None)) + } + } + + fn parse_expr(ctx: &ParserCtxBuilder<'a>, input: &'a str) -> IResult<&'a str, PatternArg<'a>> { + alt(( + |i| Self::parse_struct(ctx, i), + |i| Self::parse_array(ctx, i), + Self::parse_builtin_type, + |i| Self::parse_flags(ctx, i), + |i| Self::parse_struct_variable(ctx, i), + |i| Self::parse_flags_variable(ctx, i), + ))(input) + } + + fn parse_struct( + ctx: &ParserCtxBuilder<'a>, + input: &'a str, + ) -> IResult<&'a str, PatternArg<'a>> { + let (input, _) = multispace0(input)?; + let (input, _) = char('{')(input)?; + let (input, _) = multispace0(input)?; + + let (input, fields) = + separated_list0(delimited(multispace0, char(','), multispace0), |i| { + Self::parse_struct_field(ctx, i) + })(input)?; + + let (input, _) = multispace0(input)?; + + // Check for wildcard (..) + let (input, wildcard) = opt(preceded( + opt(char(',')), + delimited(multispace0, tag(".."), multispace0), + ))(input)?; + + let (input, _) = multispace0(input)?; + let (input, _) = char('}')(input)?; + + let mut field_map = HashMap::new(); + for (name, arg) in fields { + field_map.insert(name, arg); + } + + let has_wildcard = wildcard.is_some(); + Ok(( + input, + PatternArg::Struct(PatternStruct::new(field_map, has_wildcard)), + )) + } + + fn parse_struct_field( + ctx: &ParserCtxBuilder<'a>, + input: &'a str, + ) -> IResult<&'a str, (&'a str, PatternArg<'a>)> { + let (input, _) = multispace0(input)?; + let (input, name) = identifier(input)?; + let (input, _) = multispace0(input)?; + + if let Ok((input, _)) = char::<&str, nom::error::Error<&str>>('=')(input) { + let (input, _) = multispace0(input)?; + let (input, expr) = Self::parse_expr(ctx, input)?; + Ok((input, (name, expr))) + } else { + Ok((input, (name, PatternArg::None))) + } + } + + fn parse_array(ctx: &ParserCtxBuilder<'a>, input: &'a str) -> IResult<&'a str, PatternArg<'a>> { + let (input, _) = multispace0(input)?; + let (input, _) = char('[')(input)?; + let (input, _) = multispace0(input)?; + let (input, elements) = + separated_list0(delimited(multispace0, char(','), multispace0), |i| { + Self::parse_expr(ctx, i) + })(input)?; + let (input, _) = multispace0(input)?; + let (input, _) = char(']')(input)?; + + Ok((input, PatternArg::Array(PatternArray::new(elements)))) + } + + fn parse_flags(ctx: &ParserCtxBuilder<'a>, input: &'a str) -> IResult<&'a str, PatternArg<'a>> { + let (input, flags) = separated_list1( + delimited(multispace0, char('|'), multispace0), + alt((Self::parse_builtin_type, Self::parse_flag, |i| { + Self::parse_flags_variable(ctx, i) + })), + )(input)?; + + Ok((input, PatternArg::Flags(PatternFlagSet::new(flags)))) + } + + fn parse_builtin_type(input: &'a str) -> IResult<&'a str, PatternArg<'a>> { + let (input, _) = multispace0(input)?; + let (input, _) = char('<')(input)?; + let (input, type_name) = identifier(input)?; + let (input, _) = char('>')(input)?; + + match type_name { + "INTEGER" => Ok((input, PatternArg::Integer)), + "PATH" => Ok((input, PatternArg::Path)), + _ => Err(nom::Err::Error(nom::error::Error::new( + input, + nom::error::ErrorKind::Tag, + ))), + } + } + + fn parse_flags_variable( + ctx: &ParserCtxBuilder<'a>, + input: &'a str, + ) -> IResult<&'a str, PatternArg<'a>> { + let (input, _) = multispace0(input)?; + let (input, _) = char('<')(input)?; + let (input, var_name) = identifier(input)?; + let (input, _) = char('>')(input)?; + + if ctx.get_flags_id(var_name).is_none() { + return Err(nom::Err::Error(nom::error::Error::new( + input, + nom::error::ErrorKind::Tag, + ))); + } + + Ok(( + input, + PatternArg::FlagsVariable(ctx.get_flags_id(var_name).unwrap()), + )) + } + + fn parse_struct_variable( + ctx: &ParserCtxBuilder<'a>, + input: &'a str, + ) -> IResult<&'a str, PatternArg<'a>> { + let (input, _) = multispace0(input)?; + let (input, _) = char('<')(input)?; + let (input, var_name) = identifier(input)?; + let (input, _) = char('>')(input)?; + + if ctx.get_struct_id(var_name).is_none() { + return Err(nom::Err::Error(nom::error::Error::new( + input, + nom::error::ErrorKind::Tag, + ))); + } + + Ok(( + input, + PatternArg::StructVariable(ctx.get_struct_id(var_name).unwrap()), + )) + } + + fn parse_flag(input: &'a str) -> IResult<&'a str, PatternArg<'a>> { + let (input, _) = multispace0(input)?; + let (input, flag_name) = identifier(input)?; + Ok((input, PatternArg::Flag(flag_name))) + } + + fn parse_flags_definition( + ctx: &ParserCtxBuilder<'a>, + input: &'a str, + ) -> IResult<&'a str, (&'a str, PatternArg<'a>)> { + let (input, _) = multispace0(input)?; + let (input, name) = identifier(input)?; + let (input, _) = multispace0(input)?; + let (input, _) = char('=')(input)?; + let (input, flags) = Self::parse_flags(ctx, input)?; + let (input, _) = multispace0(input)?; + let (input, _) = char(';')(input)?; + + Ok((input, (name, flags))) + } + + fn parse_struct_definition( + ctx: &ParserCtxBuilder<'a>, + input: &'a str, + ) -> IResult<&'a str, (&'a str, PatternArg<'a>)> { + let (input, _) = multispace0(input)?; + let (input, _) = tag("struct")(input)?; + let (input, _) = multispace0(input)?; + let (input, name) = identifier(input)?; + let (input, _) = multispace0(input)?; + let (input, _) = char('=')(input)?; + let (input, _) = multispace0(input)?; + let (input, struct_body) = Self::parse_struct(ctx, input)?; + let (input, _) = multispace0(input)?; + let (input, _) = char(';')(input)?; + + Ok((input, (name, struct_body))) + } +} + +/// Iterator that yields complete statements from SCML content. +struct StatementIterator<'a> { + lines: std::str::Lines<'a>, + current_statement: String, +} + +impl<'a> StatementIterator<'a> { + fn new(content: &'a str) -> Self { + Self { + lines: content.lines(), + current_statement: String::new(), + } + } +} + +impl Iterator for StatementIterator<'_> { + type Item = Result<&'static str, ScmlParseError>; + + fn next(&mut self) -> Option { + loop { + match self.lines.next() { + Some(line) => { + let line = line.trim(); + // Skip comments and empty lines + if line.starts_with("//") || line.is_empty() { + continue; + } + + self.current_statement.push_str(line.trim_end()); + self.current_statement.push(' '); + + // Check if statement is complete + if self.current_statement.trim().ends_with(';') { + let statement = self.current_statement.trim().to_string(); + self.current_statement.clear(); + // Leak the string to get a 'static lifetime reference + let leaked: &'static str = Box::leak(statement.into_boxed_str()); + return Some(Ok(leaked)); + } + } + None => { + // End of input + if !self.current_statement.trim().is_empty() { + let incomplete = self.current_statement.trim().to_string(); + self.current_statement.clear(); + return Some(Err(ScmlParseError::IncompleteStatement(incomplete))); + } + return None; + } + } + } + } +} diff --git a/tools/sctrace/src/strace_parser.rs b/tools/sctrace/src/strace_parser.rs new file mode 100644 index 000000000..dda07a789 --- /dev/null +++ b/tools/sctrace/src/strace_parser.rs @@ -0,0 +1,614 @@ +// SPDX-License-Identifier: MPL-2.0 + +use std::{cell::RefCell, collections::HashMap, error::Error, fmt}; + +use nom::{ + IResult, + branch::alt, + bytes::complete::{tag, take_until, take_while1}, + character::complete::{char, digit1, space0, space1}, + combinator::{map, opt, peek, recognize, rest, value}, + multi::{separated_list0, separated_list1}, + sequence::{delimited, preceded, separated_pair, terminated, tuple}, +}; + +thread_local! { + /// Storage for blocked syscalls by PID in multi-threaded strace output. + static BLOCKED_SYSCALL: RefCell> = RefCell::new(HashMap::new()); +} + +#[derive(Debug, Clone, PartialEq)] +pub(crate) enum StraceParseError { + BlockedLine, + SignalLine, + ExitLine, + EmptyLine, + ParseError { message: String, input: String }, + TypeError(String), +} + +impl fmt::Display for StraceParseError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + StraceParseError::BlockedLine => write!(f, "Error: Blocked syscall line"), + StraceParseError::SignalLine => write!(f, "Error: Signal line"), + StraceParseError::ExitLine => write!(f, "Error: Exit status line"), + StraceParseError::EmptyLine => write!(f, "Error: Empty line"), + StraceParseError::ParseError { message, input } => { + write!(f, "{} (input: {})", message, input) + } + StraceParseError::TypeError(msg) => write!(f, "Type error: {}", msg), + } + } +} + +impl Error for StraceParseError {} + +/// Syscall representation parsed from strace output. +#[derive(Debug, PartialEq, Clone)] +pub(crate) struct Syscall<'a> { + pid: u32, + name: &'a str, + args: Vec>, + return_value: &'a str, + original_line: &'a str, +} + +impl<'a> Syscall<'a> { + /// Fetches and preprocesses a strace line before parsing. + pub(crate) fn fetch(line: String) -> Result { + let trimmed = line.as_str().trim(); + + if trimmed.is_empty() { + return Err(StraceParseError::EmptyLine); + } + + // Skip signal lines + if Self::parse_signal_line(trimmed).is_ok() { + return Err(StraceParseError::SignalLine); + } + + // Skip exit status lines + if Self::parse_exit_line(trimmed).is_ok() { + return Err(StraceParseError::ExitLine); + } + + // Save blocked syscalls for later reconstruction + if let Ok((_, (pid, str))) = Self::parse_multithread_blocked(trimmed) { + BLOCKED_SYSCALL.with(|blocked| { + blocked.borrow_mut().insert(pid, str); + }); + return Err(StraceParseError::BlockedLine); + } + + if let Ok((_, (pid, resumed))) = Self::parse_multithread_resumed(trimmed) { + let blocked_call = + BLOCKED_SYSCALL.with(|blocked| blocked.borrow().get(&pid).cloned().unwrap()); + let reconstructed = format!("{} {}{}", pid, blocked_call, resumed); + return Ok(reconstructed); + } + + Ok(line) + } + + /// Parses a single line of strace output into a Syscall. + pub(crate) fn parse(input: &'a str) -> Result { + let trimmed = input.trim(); + + let syscall = Self::parse_syscall(trimmed) + .map(|(_, syscall)| syscall) + .map_err(|e| StraceParseError::ParseError { + message: e.to_string(), + input: trimmed.to_string(), + })?; + + let syscall = Self::handle_special_cases(syscall); + Ok(syscall) + } + + pub(crate) fn name(&self) -> &str { + self.name + } + + pub(crate) fn original_line(&self) -> &str { + self.original_line + } + + pub(crate) fn args(&self) -> &[SyscallArg<'_>] { + &self.args + } + + fn new( + pid: u32, + name: &'a str, + args: Vec>, + return_value: &'a str, + original_line: &'a str, + ) -> Self { + Self { + pid, + name, + args, + return_value, + original_line, + } + } +} + +#[derive(Debug, PartialEq, Clone)] +pub(crate) enum SyscallArg<'a> { + /// Integer argument represented as a string. + Integer(&'a str), + + /// Quoted string argument. + String(&'a str), + + /// Unquoted flag argument. + Flag(&'a str), + + /// File descriptor with absolute path. + FdPath(&'a str), + + /// Combination of flags and/or integer values. + Flags(SyscallFlagSet<'a>), + + /// Structured data represented as key-value pairs. + Struct(SyscallStruct<'a>), + + /// Array of syscall arguments. + Array(SyscallArray<'a>), + + /// Argument is ignored. + Ignored, +} + +/// Wrapper for arrays of syscall arguments. +#[derive(Debug, PartialEq, Clone)] +pub(crate) struct SyscallArray<'a>(Vec>); + +impl<'a> SyscallArray<'a> { + pub(crate) fn elements(&self) -> &[SyscallArg<'a>] { + &self.0 + } + + fn new(elements: Vec>) -> Result { + Ok(Self(elements)) + } +} + +/// Wrapper for flag sets with restricted element types. +#[derive(Debug, PartialEq, Clone)] +pub(crate) struct SyscallFlagSet<'a>(Vec>); + +impl<'a> SyscallFlagSet<'a> { + pub(crate) fn flags(&self) -> &[SyscallArg<'_>] { + &self.0 + } + + fn new(flags: Vec>) -> Result { + // Validates that all elements are either `Flag` or `Integer` types, + // as these are the only valid types for flag combinations. + for flag in &flags { + match flag { + SyscallArg::Flag(_) | SyscallArg::Integer(_) => {} + _ => { + return Err(StraceParseError::TypeError( + "SyscallFlagSet elements can only be Flag or Integer types".to_string(), + )); + } + } + } + + Ok(Self(flags)) + } +} + +/// Wrapper for structured data represented as key-value pairs. +#[derive(Debug, PartialEq, Clone)] +pub(crate) struct SyscallStruct<'a>(HashMap<&'a str, SyscallArg<'a>>); + +impl<'a> SyscallStruct<'a> { + pub(crate) fn fields(&self) -> &HashMap<&'a str, SyscallArg<'a>> { + &self.0 + } + + pub(crate) fn get_value(&self, key: &str) -> Option<&SyscallArg<'_>> { + self.0.get(key) + } + + fn new(fields: HashMap<&'a str, SyscallArg<'a>>) -> Self { + Self(fields) + } +} + +impl Syscall<'_> { + fn parse_syscall(input: &str) -> IResult<&str, Syscall<'_>> { + let original_input = input; + let (input, _) = space0(input)?; + let (input, pid) = opt(terminated(Self::parse_pid, space1))(input)?; + let (input, _) = space0(input)?; + let (input, name) = Self::parse_name(input)?; + let (input, _) = space0(input)?; + let (input, args) = Self::parse_args(input)?; + let (input, _) = space0(input)?; + let (input, return_value) = Self::parse_return_value(input)?; + + Ok(( + input, + Syscall::new(pid.unwrap_or(0), name, args, return_value, original_input), + )) + } + + fn parse_name(input: &str) -> IResult<&str, &str> { + delimited( + space0, + take_while1(|c: char| c.is_ascii_alphanumeric() || c == '_'), + space0, + )(input) + } + + fn parse_args(input: &str) -> IResult<&str, Vec>> { + delimited( + char('('), + separated_list0(char(','), delimited(space0, Self::parse_arg, space0)), + char(')'), + )(input) + } + + fn parse_arg(input: &str) -> IResult<&str, SyscallArg<'_>> { + // Skip parameter's name + let (input, _) = opt(terminated( + take_while1(|c: char| c.is_ascii_alphanumeric() || c == '_'), + char('='), + ))(input)?; + + let (input, arg) = alt(( + Self::parse_none, + Self::parse_fd_path, + Self::parse_struct, + Self::parse_array, + Self::parse_quoted_string, + Self::parse_mask, + Self::parse_hex, + Self::parse_number, + Self::parse_flags, + ))(input)?; + + // Skip comment + let (input, _) = opt(delimited( + delimited(space0, tag("/*"), space0), + take_until("*/"), + tag("*/"), + ))(input)?; + + // Skip output parameter with arrow + let (input, _) = opt(preceded( + delimited(space0, tag("=>"), space0), + Self::parse_arg, + ))(input)?; + + Ok((input, arg)) + } + + /// Parses a file descriptor or `AT_FDCWD` with absolute path argument. + fn parse_fd_path(input: &str) -> IResult<&str, SyscallArg<'_>> { + let (input, _fd) = + alt((tag("AT_FDCWD"), take_while1(|c: char| c.is_ascii_digit())))(input)?; + let (input, path) = Self::parse_angle_bracket_content(input)?; + + Ok((input, SyscallArg::FdPath(path))) + } + + /// Parses content within angle brackets, handling nested brackets and arrows. + fn parse_angle_bracket_content(input: &str) -> IResult<&str, &str> { + let (input, _) = char::<&str, nom::error::Error<&str>>('<')(input)?; + + let mut depth = 1; + let mut end_pos = 0; + let chars = input.char_indices().peekable(); + + for (i, ch) in chars { + match ch { + '<' => depth += 1, + '>' => { + // Check if this is part of '->' arrow + // Look back to see if previous char was '-' + if i > 0 && input.as_bytes().get(i - 1) == Some(&b'-') { + // This '>' is part of '->', not a closing bracket + continue; + } + + depth -= 1; + if depth == 0 { + end_pos = i; + break; + } + } + _ => {} + } + } + + if depth != 0 { + return Err(nom::Err::Error(nom::error::Error::new( + input, + nom::error::ErrorKind::Tag, + ))); + } + + let content = &input[..end_pos]; + let remaining = &input[end_pos + 1..]; + Ok((remaining, content)) + } + + /// Parses a quoted string argument. + fn parse_quoted_string(input: &str) -> IResult<&str, SyscallArg<'_>> { + map( + tuple(( + opt(char('@')), // Optional @ prefix + delimited(char('"'), Self::take_until_unescaped_quote, char('"')), + opt(tag("...")), // Optional ... suffix + )), + |(_, content, _)| SyscallArg::String(content), + )(input) + } + + /// Helper to take characters until an unescaped quote is found. + fn take_until_unescaped_quote(input: &str) -> IResult<&str, &str> { + let chars = input.char_indices(); + let mut last_was_escape = false; + + for (i, ch) in chars { + if ch == '"' && !last_was_escape { + return Ok((&input[i..], &input[..i])); + } + last_was_escape = ch == '\\' && !last_was_escape; + } + + Err(nom::Err::Error(nom::error::Error::new( + input, + nom::error::ErrorKind::Tag, + ))) + } + + fn parse_hex(input: &str) -> IResult<&str, SyscallArg<'_>> { + map( + recognize(preceded( + tag("0x"), + take_while1(|c: char| c.is_ascii_hexdigit()), + )), + |s: &str| SyscallArg::Integer(s), + )(input) + } + + fn parse_number(input: &str) -> IResult<&str, SyscallArg<'_>> { + // Supports negative numbers and arithmetic operations like left shift + // and multiplication. + // + // First, check if this looks like `number<, _>(tuple((digit1, tag("<<"))))(input) + { + // Check if what follows is an identifier (not a digit) + if peek(take_while1::<_, _, nom::error::Error<&str>>(|c: char| { + c.is_ascii_alphabetic() || c == '_' + }))(remaining) + .is_ok() + { + // This is `number< IResult<&str, SyscallArg<'_>> { + // Flags can be separated by `|` or ` or ` + map( + separated_list1( + alt((tag(" or "), tag("|"))), + alt(( + Self::parse_hex, + Self::parse_number, + Self::parse_unquoted_flag, + )), + ), + |flags| SyscallArg::Flags(SyscallFlagSet::new(flags).unwrap()), + )(input) + } + + fn parse_unquoted_flag(input: &str) -> IResult<&str, SyscallArg<'_>> { + // Flags are symbolic constants that may optionally include: + // - Parenthetical parameters: `FLAG(param)` + // - Left shift operations: `FLAG<<2` and `1<>('('), + take_until(")"), + char::<&str, nom::error::Error<&str>>(')'), + )), + opt(tuple(( + tag("<<"), + take_while1(|c: char| c.is_ascii_digit()), + ))), + ))), + |matched: &str| SyscallArg::Flag(matched), + ), + ))(input) + } + + fn parse_mask(input: &str) -> IResult<&str, SyscallArg<'_>> { + // Parse the format like `~[flags]` + value( + SyscallArg::Ignored, + delimited(tag("~["), take_until("]"), char(']')), + )(input) + } + + fn parse_struct(input: &str) -> IResult<&str, SyscallArg<'_>> { + let (input, _) = char('{')(input)?; + let (input, pairs) = separated_list0( + char(','), + preceded( + space0, + alt(( + value(None, tag("...")), + map( + separated_pair( + take_while1(|c: char| c.is_ascii_alphanumeric() || c == '_'), + char('='), + Self::parse_arg, + ), + |(k, v)| Some((k, v)), + ), + )), + ), + )(input)?; + let (input, _) = char('}')(input)?; + + let fields: HashMap<&str, SyscallArg> = pairs.into_iter().flatten().collect(); + let result = if fields.is_empty() { + SyscallArg::Ignored + } else { + SyscallArg::Struct(SyscallStruct::new(fields)) + }; + + Ok((input, result)) + } + + fn parse_array(input: &str) -> IResult<&str, SyscallArg<'_>> { + alt(( + |input| { + let (remaining, elements) = delimited( + char('['), + separated_list0(char(','), preceded(space0, Self::parse_arg)), + char(']'), + )(input)?; + + match SyscallArray::new(elements) { + Ok(array) => Ok((remaining, SyscallArg::Array(array))), + Err(_) => Err(nom::Err::Error(nom::error::Error::new( + input, + nom::error::ErrorKind::Verify, + ))), + } + }, + value( + SyscallArg::Ignored, + delimited(char('['), take_until("]"), char(']')), + ), + ))(input) + } + + fn parse_return_value(input: &str) -> IResult<&str, &str> { + preceded( + tuple((space0, char('='), space0)), + map(rest, |s: &str| s.trim()), + )(input) + } + + fn parse_pid(input: &str) -> IResult<&str, u32> { + map(digit1, |s: &str| s.parse::().unwrap())(input) + } + + fn parse_none(input: &str) -> IResult<&str, SyscallArg<'_>> { + // Parse the format like: + // func(arg1, , arg3) + // func(arg1,) + value(SyscallArg::Ignored, recognize(peek(char(','))))(input) + } + + fn parse_signal_line(input: &str) -> IResult<&str, ()> { + // Parse the format like: + // --- SIGTERM {si_signo=SIGTERM, si_code=SI_USER, si_pid=123, si_uid=1000} --- + value( + (), + tuple(( + opt(terminated(Self::parse_pid, space1)), + delimited(tag("---"), take_until("---"), tag("---")), + )), + )(input) + } + + fn parse_exit_line(input: &str) -> IResult<&str, ()> { + // Parse the format like: + // +++ exited with 0 +++ + value( + (), + tuple(( + opt(terminated(Self::parse_pid, space1)), + delimited(tag("+++"), take_until("+++"), tag("+++")), + )), + )(input) + } + + fn parse_multithread_blocked(input: &str) -> IResult<&str, (u32, String)> { + // Parse the format like: + // 123 read(3, + map( + tuple(( + Self::parse_pid, + preceded( + space1, + terminated(take_until(""), tag("")), + ), + )), + |(pid, content)| (pid, content.trim().to_string()), + )(input) + } + + fn parse_multithread_resumed(input: &str) -> IResult<&str, (u32, String)> { + // Parse the format like: + // 123 <... read resumed> "\x00\x01\x02", 1024) = 3 + map( + tuple(( + Self::parse_pid, + preceded( + space1, + tuple(( + delimited(tag("<..."), take_until("resumed>"), tag("resumed>")), + rest, + )), + ), + )), + |(pid, (_, remaining))| (pid, remaining.trim().to_string()), + )(input) + } + + /// Handles special cases for certain syscalls whose strace output is non-standard. + fn handle_special_cases(mut syscall: Syscall) -> Syscall { + match syscall.name { + // For `clone`, strace removes the first and fourth arguments, just insert + // ignored args. + "clone" => { + syscall.args.insert(0, SyscallArg::Ignored); + syscall.args.insert(3, SyscallArg::Ignored); + } + _ => {} + } + + syscall + } +} diff --git a/tools/sctrace/src/trace.rs b/tools/sctrace/src/trace.rs new file mode 100644 index 000000000..c4837aa61 --- /dev/null +++ b/tools/sctrace/src/trace.rs @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: MPL-2.0 + +use std::{ + error::Error, + fmt, + fs::File, + io::{BufRead, BufReader, Lines, Read}, + os::unix::io::FromRawFd, + path::Path, + process::Command, +}; + +#[derive(Debug)] +pub struct TraceError { + message: String, +} + +impl fmt::Display for TraceError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.message) + } +} + +impl From for String { + fn from(err: TraceError) -> Self { + err.message + } +} + +impl Error for TraceError {} + +impl TraceError { + fn new(message: &str) -> Self { + Self { + message: message.to_string(), + } + } +} + +/// A stream of strace log entries. +pub struct StraceLogStream(Box); + +impl Read for StraceLogStream { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + self.0.read(buf) + } +} + +impl BufRead for StraceLogStream { + fn fill_buf(&mut self) -> std::io::Result<&[u8]> { + self.0.fill_buf() + } + + fn consume(&mut self, amt: usize) { + self.0.consume(amt) + } +} + +impl StraceLogStream { + /// Creates a new stream by opening an existing strace log file. + pub fn open_file>(path: P) -> Result { + let file = File::open(path.as_ref()) + .map_err(|e| TraceError::new(&format!("Failed to open log file: {}", e)))?; + Ok(Self(Box::new(BufReader::new(file)))) + } + + /// Creates a new stream by running a new command with strace. + pub fn run_cmd>(path: P, args: Vec<&str>) -> Result { + let mut command_str = path.as_ref().to_string_lossy().to_string(); + for arg in args { + command_str.push(' '); + command_str.push_str(arg); + } + + // Create pipe + let (read_fd, write_fd) = nix::unistd::pipe() + .map_err(|e| TraceError::new(&format!("Failed to create pipe: {}", e)))?; + + // Convert read end to File + let read_file = unsafe { std::fs::File::from_raw_fd(read_fd) }; + + // Start strace, using /proc/self/fd/N to access the write end + Command::new("strace") + .args([ + "-o", + &format!("/proc/self/fd/{}", write_fd), + "-yy", + "-f", + "sh", + "-c", + &command_str, + ]) + .spawn() + .map_err(|e| { + // Clean up file descriptors + nix::unistd::close(read_fd).ok(); + nix::unistd::close(write_fd).ok(); + + if e.kind() == std::io::ErrorKind::NotFound { + TraceError::new( + "strace command not found. Please install strace:\n\ + - Debian/Ubuntu: sudo apt-get install strace\n\ + - Fedora/RHEL: sudo dnf install strace", + ) + } else { + TraceError::new(&format!( + "Failed to start strace: {}\n\ + If this is a permission error, try:\n\ + sudo sctrace -- {}", + e, command_str + )) + } + })?; + + // Close write end (strace has already inherited it) + nix::unistd::close(write_fd).ok(); + + Ok(Self(Box::new(BufReader::new(read_file)))) + } + + /// Creates a new stream by a string of strace log. + pub fn from_string(log_str: &str) -> Result { + let cursor = std::io::Cursor::new(log_str.to_string().into_bytes()); + Ok(Self(Box::new(BufReader::new(cursor)))) + } + + /// Returns an iterator over the lines of this stream. + pub fn lines(self) -> Lines { + BufRead::lines(self) + } +} diff --git a/tools/sctrace/tests/integration_test.rs b/tools/sctrace/tests/integration_test.rs new file mode 100644 index 000000000..3d55eedb4 --- /dev/null +++ b/tools/sctrace/tests/integration_test.rs @@ -0,0 +1,336 @@ +// SPDX-License-Identifier: MPL-2.0 + +use sctrace::{CliReporterBuilder, Patterns, SctraceBuilder, StraceLogStream}; + +#[test] +fn test_open_syscall() { + let scml_content = r#" + access_mode = + O_RDONLY | + O_WRONLY | + O_RDWR; + creation_flags = + O_CLOEXEC | + O_DIRECTORY | + O_EXCL | + O_NOCTTY | + O_NOFOLLOW | + O_TRUNC; + status_flags = + O_APPEND | + O_ASYNC | + O_DIRECT | + O_LARGEFILE | + O_NOATIME | + O_NONBLOCK | + O_SYNC; + + // Open an existing file + open( + path, + flags = | | , + ); + openat( + dirfd, + path, + flags = | | , + ); + + // Create a new file + open( + path, + flags = O_CREAT | | | , + mode + ); + openat( + dirfd, + path, + flags = O_CREAT | | | , + mode + ); + + // Status flags that are meaningful with O_PATH + opath_valid_flags = O_CLOEXEC | O_DIRECTORY | O_NOFOLLOW; + // All other flags are ignored with O_PATH + opath_ignored_flags = O_CREAT | | ; + // Obtain a file descriptor to indicate a location in FS + open( + path, + flags = O_PATH | | + ); + openat( + dirfd, + path, + flags = O_PATH | | + ); + + // Create an unnamed file + // open(path, flags = O_TMPFILE | | ) + "#; + + let log_lines = r#" + openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3 + open("/dev/tdx_guest", O_RDWR|O_NONBLOCK) = 3 + open("/tmp/sctrace_testfile", O_CREAT|O_RDWR|O_CLOEXEC, 0666) = 4 + openat(AT_FDCWD, "/tmp/sctrace_testfile2", O_PATH|O_CREAT) = 5 + "#; + + let sctrace = SctraceBuilder::new() + .patterns(Patterns::from_scml(scml_content).unwrap()) + .strace(StraceLogStream::from_string(log_lines).unwrap()) + .reporter(CliReporterBuilder::new().quiet().collect().build()) + .build(); + + let result = sctrace.run().unwrap().unwrap(); + assert_eq!(result.len(), 0); +} + +#[test] +fn test_timer_create_syscall() { + let scml_content = r#" + opt_notify_methods = SIGEV_NONE | SIGEV_SIGNAL | SIGEV_THREAD_ID; + + // Create a timer with predefined clock source + timer_create( + clockid = CLOCK_PROCESS_CPUTIME_ID | CLOCK_THREAD_CPUTIME_ID | CLOCK_REALTIME | CLOCK_MONOTONIC | CLOCK_BOOTTIME, + sevp = { + sigev_notify = , + .. + }, + timerid + ); + + // Create a timer based on a per-process or per-thread clock + timer_create( + clockid = , + sevp = { + sigev_notify = , + .. + }, + timerid + ); + "#; + + let log_lines = r#" + timer_create(CLOCK_REALTIME, {sigev_notify=SIGEV_SIGNAL, sigev_signo=SIGALRM, sigev_value={sival_ptr=0x559b4d3e2e70}}, 0x7ffcb1f4d9c0) = 0 + timer_create(0xff5be79e /* CLOCK_??? */, {sigev_value={sival_int=565425088, sival_ptr=0x562221b3b3c0}, sigev_signo=SIGRTMIN, sigev_notify=SIGEV_THREAD_ID, sigev_notify_thread_id=1344269}, [0]) = 0 + "#; + + let sctrace = SctraceBuilder::new() + .patterns(Patterns::from_scml(scml_content).unwrap()) + .strace(StraceLogStream::from_string(log_lines).unwrap()) + .reporter(CliReporterBuilder::new().quiet().collect().build()) + .build(); + + let result = sctrace.run().unwrap().unwrap(); + assert_eq!(result.len(), 0); +} + +#[test] +fn test_multiple_struct_with_same_name() { + let scml_content = r#" + struct cmsghdr = { + cmsg_level = SOL_SOCKET, + cmsg_type = SO_TIMESTAMP_OLD | SCM_RIGHTS | SCM_CREDENTIALS, + .. + }; + struct cmsghdr = { + cmsg_level = SOL_IP, + cmsg_type = IP_TTL, + .. + }; + + // Rule for message header, which refers to the rules for control message header + struct msghdr = { + msg_control = [ ], + .. + }; + + recvmsg(socket, message = , flags); + "#; + + let log_lines = &[ + "recvmsg(4, {msg_name=NULL, msg_namelen=0, msg_iov=NULL, msg_iovlen=0, msg_control=[{cmsg_len=16, cmsg_level=SOL_SOCKET, cmsg_type=SCM_RIGHTS}], msg_controllen=16, msg_flags=0}, 0) = 24", + "recvmsg(5, {msg_name=NULL, msg_namelen=0, msg_iov=NULL, msg_iovlen=0, msg_control=[{cmsg_len=16, cmsg_level=SOL_IP, cmsg_type=IP_TTL}], msg_controllen=16, msg_flags=0}, 0) = 24", + "recvmsg(6, {msg_name=NULL, msg_namelen=0, msg_iov=NULL, msg_iovlen=0, msg_control=[{cmsg_len=16, cmsg_level=SOL_IPV6, cmsg_type=IPV6_UNICAST_HOPS}], msg_controllen=16, msg_flags=0}, 0) = 24", + ]; + + let sctrace = SctraceBuilder::new() + .patterns(Patterns::from_scml(scml_content).unwrap()) + .strace(StraceLogStream::from_string(log_lines.join("\n").as_str()).unwrap()) + .reporter(CliReporterBuilder::new().quiet().collect().build()) + .build(); + + let result = sctrace.run().unwrap().unwrap(); + assert_eq!(result.len(), 1); + assert_eq!(result[0], format!("Unsupported syscall: {}", log_lines[2])); +} + +#[test] +fn test_clone_syscall() { + let scml_content = r#" + signal_flags = SIGHUP | SIGINT | SIGQUIT | SIGILL | + SIGTRAP | SIGABRT | SIGSTKFLT | SIGFPE | + SIGKILL | SIGBUS | SIGSEGV | SIGXCPU | + SIGPIPE | SIGALRM | SIGTERM | SIGUSR1 | + SIGUSR2 | SIGCHLD | SIGPWR | SIGVTALRM | + SIGPROF | SIGIO | SIGWINCH | SIGSTOP | + SIGTSTP | SIGCONT | SIGTTIN | SIGTTOU | + SIGURG | SIGXFSZ | SIGSYS | SIGRTMIN; + + opt_flags = + // Optional flags + // + // Share the parent's virtual memory + CLONE_VM | + // Share the parent's filesystem + CLONE_FS | + // Share the parent's file descriptor table + CLONE_FILES | + // Share the parent's signal handlers + CLONE_SIGHAND | + // Place child in the same thread group as parent + CLONE_THREAD | + // Share the parent's System V semaphore adjustments + CLONE_SYSVSEM | + // Suspend parent until the child exits or calls `execve` + CLONE_VFORK | + // Create a new mount namespace for the child + CLONE_NEWNS | + // Write child `TID` to parent's memory + CLONE_PARENT_SETTID | + // Allocate a `PID` file descriptor for the child + CLONE_PIDFD | + // Set thread-local storage for the child + CLONE_SETTLS | + // Write child `TID` to child's memory + CLONE_CHILD_SETTID | + // Clear child `TID` in child's memory on exit + CLONE_CHILD_CLEARTID | + // Make the child's parent the same as the caller's parent + CLONE_PARENT; + + // Create a thread or process + clone( + fn, stack, + flags = | , + func_arg, .. + ); + "#; + + let log_lines = r#" + clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f7745c1ca10) = 141614 + "#; + + let sctrace = SctraceBuilder::new() + .patterns(Patterns::from_scml(scml_content).unwrap()) + .strace(StraceLogStream::from_string(log_lines).unwrap()) + .reporter(CliReporterBuilder::new().quiet().collect().build()) + .build(); + + let result = sctrace.run().unwrap().unwrap(); + assert_eq!(result.len(), 0); +} + +#[test] +fn test_multiple_threads_syscalls() { + let scml_content = r#" + wait4( + pid, wstatus, + options = WNOHANG | WSTOPPED | WCONTINUED | WNOWAIT, + rusage + ); + "#; + + let log_lines = r#" + 141611 wait4(-1, + 141611 <... wait4 resumed>[{WIFEXITED(s) && WEXITSTATUS(s) == 0}], WNOHANG, NULL) = 141612 + "#; + + let sctrace = SctraceBuilder::new() + .patterns(Patterns::from_scml(scml_content).unwrap()) + .strace(StraceLogStream::from_string(log_lines).unwrap()) + .reporter(CliReporterBuilder::new().quiet().collect().build()) + .build(); + + let result = sctrace.run().unwrap().unwrap(); + assert_eq!(result.len(), 0); +} + +#[test] +fn test_check_logfile_wildcard_pattern() { + let scml_content = r#" + openat(dirfd, pathname, flags, ..); + "#; + + let log_lines = r#" + openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3 + openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC, 0755) = 4 + "#; + + let sctrace = SctraceBuilder::new() + .patterns(Patterns::from_scml(scml_content).unwrap()) + .strace(StraceLogStream::from_string(log_lines).unwrap()) + .reporter(CliReporterBuilder::new().quiet().collect().build()) + .build(); + + let result = sctrace.run().unwrap().unwrap(); + assert_eq!(result.len(), 0); +} + +#[test] +fn test_check_program_simple_command() { + let scml_content = r#" + execve(filename, argv, envp); + "#; + + let sctrace = SctraceBuilder::new() + .patterns(Patterns::from_scml(scml_content).unwrap()) + .strace(StraceLogStream::run_cmd("/bin/true", vec![]).unwrap()) + .reporter(CliReporterBuilder::new().quiet().collect().build()) + .build(); + + let result = sctrace.run().unwrap().unwrap(); + assert!(result.iter().all(|error| !error.contains("execve("))); +} + +#[test] +fn test_heterogeneous_arrays() { + let scml_content = r#" + struct iovec = { + iov_base = [ + [ + { + nlmsg_type = RTM_NEWADDR, + .. + }, + [ + [ { nla_type = IFA_CACHEINFO, .. } ] + ] + ] + ], + .. + }; + recvmsg( + sockfd, + msg = { + msg_iov = [ ], + .. + }, + flags + ); + "#; + + let log_lines = r#" + 1370921 recvmsg(7, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=0, msg_iov=[{iov_base=[[{nlmsg_len=76, nlmsg_type=RTM_NEWADDR, nlmsg_flags=NLM_F_MULTI, nlmsg_seq=1758615457, nlmsg_pid=1370920}, [[{nla_len=20, nla_type=IFA_CACHEINFO}]]]], iov_len=4096}], msg_iovlen=1, msg_control=NULL, msg_controllen=0, msg_flags=0}, 0) = 1280 + "#; + + let sctrace = SctraceBuilder::new() + .patterns(Patterns::from_scml(scml_content).unwrap()) + .strace(StraceLogStream::from_string(log_lines).unwrap()) + .reporter(CliReporterBuilder::new().quiet().collect().build()) + .build(); + + let result = sctrace.run().unwrap().unwrap(); + assert_eq!(result.len(), 0); +}