diff options
| author | Liguros - Gitlab CI/CD [feature/flatten] <gitlab@liguros.net> | 2020-07-25 06:34:56 +0000 |
|---|---|---|
| committer | Liguros - Gitlab CI/CD [feature/flatten] <gitlab@liguros.net> | 2020-07-25 06:34:56 +0000 |
| commit | 61258f80a567df6bd03762a8a95ab0761c125e4f (patch) | |
| tree | d966b6d0228b1204db29a130d7f2f0e5b73153db /sys-kernel/debian-sources | |
| parent | 7945acd24a6334d20e462f348d028dcb6fff7e04 (diff) | |
| download | baldeagleos-repo-61258f80a567df6bd03762a8a95ab0761c125e4f.tar.gz baldeagleos-repo-61258f80a567df6bd03762a8a95ab0761c125e4f.tar.xz baldeagleos-repo-61258f80a567df6bd03762a8a95ab0761c125e4f.zip | |
Updating liguros repo
Diffstat (limited to 'sys-kernel/debian-sources')
64 files changed, 2749 insertions, 53339 deletions
diff --git a/sys-kernel/debian-sources/Manifest b/sys-kernel/debian-sources/Manifest index b7273c3b60ab..35c561c865c9 100644 --- a/sys-kernel/debian-sources/Manifest +++ b/sys-kernel/debian-sources/Manifest @@ -1,27 +1,12 @@ -AUX 5.6.14/debian-sources-5.6.14-fix-bluetooth-polling.patch 1809 BLAKE2B d59992e91d7924df307da42d956131145986ea650b26f268eff9372cd848ead23b7a85b4b0a896bb44c21a9c2313c7b840134f087bd1a8ad8b893388c3b2ee1f SHA512 605a8bc34c159cc28d60ae975b94b772d009171f659c1a4e0f4820f1b1e5e5e146c65c1c318053c7c3515a2f3a95d50c5b6493751612f0273d06a16754d66d5c -AUX 5.6.14/dtrace-patches/0001-ctf-generate-CTF-information-for-the-kernel.patch 246296 BLAKE2B 58a386c1d32fe72c536a373445bea413e974e11989ae864c9d162d89fd71191fd8ab8e810dd5c3840d05766855233c08277080cd1d8acc7a7852a1d008e6e589 SHA512 ec9efc51340ff2a4da2505716df4300c17b32fd44262b79dc19796316f5464a1dbf5e71ab6876b04bbe28f56e8cc5fd09430c9ad326b9d5aa6d03813df1f9721 -AUX 5.6.14/dtrace-patches/0002-kallsyms-introduce-new-proc-kallmodsyms-including-bu.patch 33362 BLAKE2B 58e5ee8db3a12209b0785a94c8acbdb7d84f7a9b4ca47d11a180a6754c4893decb9776ed1bba124e71f9d26a605fc5205961658e75f413ebf424cc3525e3cf91 SHA512 56d2d4707684acda56799cdefd025f3baac4416ece585a0966dd00be9719339ad940d3bb35f58b516b9371e628613f148b06f808eec8c2b910c7a1c37a362330 -AUX 5.6.14/dtrace-patches/0003-waitfd-new-syscall-implementing-waitpid-over-fds.patch 26424 BLAKE2B 7c2ac8d263da13b68d73e83e97151840ff0ddec22620243fd4d99440d0db61c59197b0ff251b844f3c5f1ed4094a7a6f9bd010a1952b307c7177702e7213cd42 SHA512 1f2e0a65b24c53b0de059e42f911f00f929df9d6c6dba5fd68afd940a5fc9e62d8457a1494d35c163e60a5d42d286abfc9b82a159104685fbe95c237b6eb5920 -AUX 5.6.14/dtrace-patches/0004-dtrace-core-and-x86.patch 276484 BLAKE2B 41e9439de997744bce0baa27630cd7e086df8fc653f46372865d171ea1f87a7da7ecfe36ef78e4f0ef131f623c20210e917b25c1cba2333cdb1a50979e67ea5f SHA512 ecd448d9c5c3bb0c8706fcd5caa6269ef53df14af1cc49c02b339d4bce291a36bcb7df5f09e7d846f617e25507633bab8bb84c21160f386721658f2ba02b71c6 -AUX 5.6.14/dtrace-patches/0005-dtrace-modular-components-and-x86-support.patch 484575 BLAKE2B f7081a419a445c077b156445a776e8263f7d0d4f026c080b93a9c1bb180eaecc6e8ccb25e8cfb373d1d41851f73fe9018df219090e458ae2825af18b1c648f40 SHA512 ca24005a52000df0c2a79bd8ad48cf0799f8cafdb211f704a789573909ab3adc7944bd150b281e2f413c35f70311ea5d047560d10943a8c1f6889c2b01fb8344 -AUX 5.6.14/dtrace-patches/0006-dtrace-systrace-provider-core-components.patch 10194 BLAKE2B 8937dbbaddf6fde8f4a7e8f3e592fb87c3dd9b3410f8c45832412047fdf0591680ee45a89c01a5b265c306a26d2391aefccc3ca138a5f762d602e16c646244ff SHA512 848c177070e610d30b7f8f3bda2aa2b04ddfbbb3a3af8275ae0a3990ab76c359b1e2eed3253d16ebab21b22978ccb9d7bb42de4b040ed435e34a375d2ea97bd9 -AUX 5.6.14/dtrace-patches/0007-dtrace-systrace-provider.patch 11199 BLAKE2B fe4c429f7b1da5bf9354dfb8568bfc9520cf7194fb5afb06c6514b00dec4437a15101b7369b2f1cac025635daf8ad510c13f190991d7f1830ebc9f4f5cce1c5a SHA512 345f1ae43a4f4972b1d15d8002fc9626670a784418cb2d1994cb60d9087184cf5ef311e8149475ab36bccd87c3b10e868062c3df54a844529d6c8a249fa08a8e -AUX 5.6.14/dtrace-patches/0008-dtrace-sdt-provider-core-components.patch 101012 BLAKE2B 567510e0f9888394c331c28e7c8240e73213b888be4b82243825cce85db7f5b1aef5c01f095f752da71b340b69195b7680581fbc6ac18645c8abed4af170a1bd SHA512 59029fc027e13cd0075826b7355d63067152c8568575aacb9b4de0447aedb6cd165cd2c31a34752cc7b7a8ca12b67ca9ef595d5c934f85fbc4e1176be9b9b1f5 -AUX 5.6.14/dtrace-patches/0009-dtrace-sdt-provider-for-x86.patch 33276 BLAKE2B dc58cdf58389f6f996f7274ff5bcb68fbb7a9fca7947df25d3a5d6ba6bc85ef227bad06feafc53705410e94df9863dee0ed41c92ec2545ffcf21a575889efebc SHA512 71a69ba28649e986a189c6039f9f1bbdb3decf3d05704fa75f3e0abac5366c500c73c2da89242a47c81033f3a80d9a8f1c7f6718766e3cb6c8e4980220cbd943 -AUX 5.6.14/dtrace-patches/0010-dtrace-profile-provider-and-test-probe-core-componen.patch 1426 BLAKE2B 1d9b7fcf0a2148c3e764894d1dc80d8bae9ed574392ab17a1ee90c253edb13480ac44042e107d88b6611c483abe62fa51d75e3141c1afb71361e4d19a333f109 SHA512 b19dd2fcde374147d7d29a1d9cd680eee7b2d3e58b9044c2d9d5d5df76442b8108b134906bc077e502c295dd49fc0488ee3ae482d85fe71b34990a1e842e3faf -AUX 5.6.14/dtrace-patches/0011-dtrace-profile-and-tick-providers-built-on-cyclics.patch 17675 BLAKE2B 0480f0a2a0857c4bb2ee44bd247ebdfb516c9b575b55244aa85d6a55241bfdf1b680dd0ff81e91055a535ce4350cf8d37cc400961be9773513e4c7b6cfc1dcf8 SHA512 15265a0283162a64ef8c98e5587ef0e5ea8a25a90b0f2968e7929e665636b0a47a152413cd06a0e57179ba8c13f60b5dbec15813822d1031ddc3f5e02edfa043 -AUX 5.6.14/dtrace-patches/0012-dtrace-USDT-and-pid-provider-core-and-x86-components.patch 10985 BLAKE2B 1ca953a22efe5c40da5c9ebda4314269389c35e92b38463ed37b7b41fa8fba3b51c4763490c56885c592bd49b8b063be2014dfc4099a014fb715e4a8204e6133 SHA512 cdfeaa5f1cc22e7ab1740f2c272a960f1e29c9826394674fe35d34997b9202097cd2d7b81f3717db9813e26f7ab5b3c519060a5c3fa19480cc931cb770bd2cbe -AUX 5.6.14/dtrace-patches/0013-dtrace-USDT-and-pid-providers.patch 73642 BLAKE2B a3f3396b343e898fbd0ef2a3b23796bff4b4801ee7321f239b2266d7d7dbc1f5ffd80ac6820158f5019f8c8ae5a9e22cee89aacaba527bf06c70052aa408d21e SHA512 6a1684208a593557b7af804061deadbea87ef30f917308021d73177bb8d0f1b8ff7c77dd4720326cecea869424f6833668b828b7bc7dda86de000f7926d71c07 -AUX 5.6.14/dtrace-patches/0014-dtrace-function-boundary-tracing-FBT-core-and-x86-co.patch 41889 BLAKE2B abba40effcf0108d5ddc0f9c87196c3d438ccb44743d467a7fc40d61020d66ef30a529750591fb1cb9d3d3e39914afa22daaa08086acbe21c1393a7da20d0023 SHA512 476a4d88a98e2d49a49ded3c59d7c99a53202780a3e009bd3b3aa6a2eae7caf2ee6da7fcd3f55cb4e6a8657cabe2e7c58141e295fb6ea12fa63e99f3cc516962 -AUX 5.6.14/dtrace-patches/0015-dtrace-fbt-provider-modular-components.patch 19711 BLAKE2B d53206f1cd211034deb6b3cd56be4f4f827a66f4b1201fb9c109bc53723df2b055328dfe07af8bd8163cc44e324e35dbd77dfb8a19acfe5dfeaaf0d8a0fae1c9 SHA512 2eb18602052cfc10b345e5279094c6da577b9a4b36c68f131321504622b905002d2aae1f9986d39493b950f57fa0bcf8bddad6eaae2202980f0c3949928e31b7 -AUX 5.6.14/dtrace-patches/0016-dtrace-arm-arm64-port.patch 76550 BLAKE2B 3d2e5f1abf6ef377394a1c2fe86845e7880492da959758eeab8efa6429aa127041dfa63679db243ce805a3bc352612f331aef6329c659f772c2e0825a2b2b3c5 SHA512 d5f9ea2152f28ff703d40dfc755f3aaac0afc7a836b9864e4cde9fc8a1bcea5b112aa702c006cfad332bf403d6c5740a0581d794951ce34607dbcd0b601d366c -AUX 5.6.14/dtrace-patches/0017-dtrace-add-SDT-probes.patch 105743 BLAKE2B c1ee5ca68dc77a33f49fbd22c56e1f538010f7cc56053b0705b743fcc6358a6be690652b4fd5e8a1e3da1a6c38a55948fad27f410f9b09c3da2a150a06b4b692 SHA512 984c0e3ffea49fbbe20b086b48b9fd1798e0ccaeb6935f1fd46e237d0b6e4ccae09ccc34bf4dfc85010d521119e42aa020f68ad2793f3ec625ae3580022b3703 -AUX 5.6.14/dtrace-patches/0018-dtrace-add-rcu_irq_exit-and-rcu_nmi_exit_common-to-F.patch 1470 BLAKE2B 7954093b06463d64c24f5dd956d2a5a8d3f6f9fb8e771f8212f48184ad4b14d7cd56bc31ea5b27e9f081d0dbfdab583c94c0596fed31b13b5cfbbd0c8e85d6c8 SHA512 3104f51a49bb36a16dc35951b50e0522ce009929a5e0870a5a70f5c74ca7d618bef50cd2b9f5d96ebe4c7b3d0387c2c8200e87ed37281073213c2e3cbb2c3b87 -AUX 5.6.14/dtrace-patches/0019-dtrace-add-sample-script-for-building-DTrace-on-Fedo.patch 8549 BLAKE2B c3bd55d59ed5c4c49721a4cdb0c3f36e697a75cd2d89664c6f7d50c1779023c00a5e8afe4dc44715c96c9b8459830df5bd792652bc3508fdc46c6f811398af71 SHA512 58a736e1130fb78bd5e5b1ba5f68b2fb637adcd17af046a6688b5e09469411d236348af44704d44491c0341d99769e6533286b9944ef0056e4088d88a2fcd166 -AUX 5.6.14/dtrace-patches/0020-locking-publicize-mutex_owner-and-mutex_owned-again.patch 2431 BLAKE2B 92c08a316143d6f3f039f5c50b19062f78a1afc7d476274a2f1dfcb6539463ec4ba70bdcdc9942e524c701a808e95eca17504dbaba15835f19ce7786f246a223 SHA512 5a9e532455b22701213a9bcc4d7f9978fd6a973d18b9239052c16ee4cf37b239ea9999914673cf7a0ccc6a37132891d811221cd68f13237eee8014681b112bde -AUX 5.6.14/export_kernel_fpu_functions_5_3.patch 1224 BLAKE2B 7922054672029120447da6c7dbb88e51b3c4a65c5476ff945220cc8851716fad89d8b3258abb86713444cfa603e51279ed80b512b1a4c9340087b13804040873 SHA512 bcd65aae8b039ce94c1a30b7ca99f180c4f0d6c4e09eb70f1e7d358f8759528df2e8c1d0924cfa80b29227a12b5145f296fac5919e9ccb75e8d7ecaf6ddf85d1 -AUX config-extract 6663 BLAKE2B 72ac29890b4a569957b56801c51c14d68a5d1b962ee1d3f42b8c5e4f90392698796100380fbf526447421520374156ad82f748b047930f7cfd8d94d1c8f9f05d SHA512 a664c9f7471eb98dac441149d4180ca00b0bc235a745aac82528f613bcc20f5e46423b2d92d44e3c3647b31c86fb2150cca7d40a7fe56b604928a57053c71deb -DIST linux_5.6.14-2~bpo10+1.debian.tar.xz 1314720 BLAKE2B 5e78e531df55e3d09a40f764a81c0a69b401e73da7a0531540e0b9b607aa86bf6e4f0a9d78385d1c6de3c3d0d18eccb3d2e66fe54754b20e1f55c2bf5d346a08 SHA512 2a8363a897d854d048691ef1611b9614a16ee335bd3f7cd38fac4bdad184fe5361fcd9ce92159f933cedc169605d897cc5efdfb922cc5526b4457d5b647722e3 +DIST linux_4.19.118-2.debian.tar.xz 3283568 BLAKE2B 7c809591573748f1718a2b0c97e439edb9ec92c71c6502440133371e10f6389a667f77b7316f4907ba95aa7c8cc143eee3db77b141e8d67fab4e91c8da9f26ad SHA512 fe3a8e9353375ccc914209dfc218f62ead73f88de81151cc86a8d5ab2bb19e37397721c33c6c4ace4d3dd3fb7ec4685089e2f3093ae87448ff384cdac0638641 +DIST linux_4.19.118.orig.tar.xz 107469088 BLAKE2B 79f906da78551c9d3070db3ab12eafa3ddbf1da81ad7c30ae354f78901f4f8fd7cd2a153496e295bf2afd80411bd47079e1f2271e5f14a37d7134353bc4e245c SHA512 d21618aefb825a5f3dfe11a38f2e2b0ac449e018fbf8d692d51de1495c3b09137b31b4842f5ceadb6021cad366594c202b25591e152c1a693fb4a4ed8018199a +DIST linux_4.19.98-1.debian.tar.xz 1347332 BLAKE2B bdbeac3d5a57db0b44b774b18f0f66894c90103c496b710db7ac5cf61d17c09d70f1cae978a7dce4125872bae2b7d4bf827b250bc110799a413f928481f52235 SHA512 0c9f8729bc10f5ea298bfd6428655344705ca2fa71c265b5feab96128de39e67b52245fceb4cbdc7aec59628bf9dff6fdaa2483243d6a0d566c59a3147458849 +DIST linux_4.19.98.orig.tar.xz 107435812 BLAKE2B edc211f19ec56bced9cd290c701242ecaa4be02a1b67e8c0f9fbb858d450324fc8c59400f312a3fad7021134d0e957675183c68b44154596fe2502ae3961d1c2 SHA512 a5150de7aa1971b9936199d6055861ff3b69ccc73563fadda02c284589ba46b5d0c8a1b2226f8fca44b88fd402e6831a9ed576acefbe77a81b292f75318baf53 +DIST linux_5.4.19-1.debian.tar.xz 1208900 BLAKE2B 37bc935de3dd6f85dce4a81e32e897ddd48a88602eebbdcd9e9382052b06903f097bf4ec40a93bc5aae50f83f3ca919a823f36b429122e4f74d680c5cd37810c SHA512 b5a4275cd07b5649d0d47e26134b2f2fe4edf8a92e81858ea8a1a8a63ef5a49e7f99cf2ebde126aac744c76a9fefdb7ec4d534043f2ecbd268710301d94e6c96 +DIST linux_5.4.19.orig.tar.xz 113678372 BLAKE2B 19c3a220523e27d4efda7788cfe817588d195c11520e98cf52eefafda0647cf243d4dcc1e414e32b226181c50a1f72f9281e3ae90166eff688927fe1877422d0 SHA512 fb51d82593867e927e757fedacb2c2606e7f62ebf114e5c64400632362ae21a412818f84f5fa6c59709dba5bd81e92714fe89965dc36058b3dfb17343a1de60e +DIST linux_5.5.17-1.debian.tar.xz 1335884 BLAKE2B ae97d148e6f1c59f88542805a290e677f6c1f641ef12cf566938e6c7a09a8371eba281242db081f4751e1fa9986973a8f833e4a0723f70e79d67ce962cb021fb SHA512 888ecfc2eeb061729778f8f5e5f7ec9b87e5b09dd471a23b3b222d77b1298d3e1d9a6c2c945e18b554ef6e7e4ba80e5ae7bf060b10d917326a2335fa52c1df88 +DIST linux_5.5.17.orig.tar.xz 115227304 BLAKE2B 2fe61be7a9aaf553711a2c09ebceae09e3f18d9067b8731b836f7c01fec0ba15caf12ce03df26ea5c99d735a092ebf14136d37699aa31956d097730b13f458d6 SHA512 2871d1fb52bf77963b86b89899ae1370edf25edb2013e2cfe3d93d28d77407a9c752beb8b0eb50e240f3bc65eaaf602d35417d9de80c89da14fad419bd112d3c +DIST linux_5.6.14-1.debian.tar.xz 1303696 BLAKE2B 54a502165fbfe14215bed557ded72afc83da3092b1fe479fa41be64fcc75a6a9173118120a5fb6c7acb6d62dcfee999b472ed863afdecb5596daaa7a33e43cf0 SHA512 d50a94c96476c3b52259ff062ecd24e0566c11e2cc3223585fb4c6c4e19ec572aa7a792abc98544e9bbea15231607ccdd4f2646f9589b700d4151d9fc848f160 DIST linux_5.6.14.orig.tar.xz 116098756 BLAKE2B 0c35a5287598de6f53447cac71c1e4884592ab031fdfb57fc73dc9584ffbb71c42c09f88f77ae646d2d90a075c53c18b3afe03d8ba166bfceb1707a799e2ad3c SHA512 5db09e66fad2ec65116d3a20a5f9436bf22c89f7ead8d0cc42c59dd40013450dc1a5520060abdf7ea1291d3e45e06f8da54f0a25d3a4bb56d3e18eb0ad5ac446 -EBUILD debian-sources-5.6.14_p2.ebuild 19817 BLAKE2B c69e346c678d1cae6f430f3338e48b20e3c1bb427d5af29baf13c0c6862c01a955f35bb233c65627f5b8b283af4b0ed080875b2f454335ad317e0968e27182b1 SHA512 35403b50056dc0a6d36fe5dde3a8ab6c04abfa849ee8e785614fd144ef64fc16ec0cc764dd721a76f7434ce39ebbbbfad05df717bad89f83bae5ba7cfd0b76c0 -MISC metadata.xml 279 BLAKE2B 00c8dce49aae16cc265c0b1073eaef85985856d7f5e06e93140d7b921e0d5a9641fc454a8ad8f74581d4572fe55b34e53f4e3322eeacc84bd91bbcbf92ebf84a SHA512 797be7c55ce2c20ed44d7b7c72685e215540b61e959139333c04ae03495fc73c32bfacf2f802691ab8c0823583c3311c12a635f7cc9d67493eef176a1337304e +DIST linux_5.6.7-1.debian.tar.xz 1292804 BLAKE2B aafb06dc6bec92bf98912be3f5e516f9c1d6bd126661191dd5bd70e0f1915c8c07d130348ce282e8587d15bb0b7fa9cf86d7f611c41e478d7b2b5129f5f47ffe SHA512 854f538e17e2020599593bf6bb295d2d2b919200e8719780798749ce4a26e0689bb6cf1c5bd150ce65f919ee211bd25c1a7b3126f8a68a5a224e17c7f9ff3b63 +DIST linux_5.6.7.orig.tar.xz 116087540 BLAKE2B 9ac537375be80fb6afed67854782a274f8e817f9799778813cd2c23acf77ecc81f92ef1e9347192613dcac837de9b255c3318077a5f2ac88f09422b4102b0cd9 SHA512 a965b0148cf86a0157b760134f167708959d790022ccc12dd9476088fd364bf3e40be2a08fbb0ce2e85efde5b1a26a7edeb093dc5d237a7678c081cac3cf73b6 diff --git a/sys-kernel/debian-sources/debian-sources-4.19.118_p2.ebuild b/sys-kernel/debian-sources/debian-sources-4.19.118_p2.ebuild new file mode 100644 index 000000000000..6525ad6fd661 --- /dev/null +++ b/sys-kernel/debian-sources/debian-sources-4.19.118_p2.ebuild @@ -0,0 +1,307 @@ +# Distributed under the terms of the GNU General Public License v2 + +# Documentation for adding new kernels -- do not remove! +# +# Find latest stable kernel release for debian here: +# https://packages.debian.org/stable/kernel/ + +EAPI=5 + +inherit check-reqs eutils mount-boot + +SLOT=$PF +CKV=${PV} +KV_FULL=${PN}-${PVR} +DEB_PV_BASE="4.19.118" +DEB_EXTRAVERSION="-2" +EXTRAVERSION="_p2" + +# install modules to /lib/modules/${DEB_PV_BASE}${EXTRAVERSION}-$MODULE_EXT +MODULE_EXT=${EXTRAVERSION} +[ "$PR" != "r0" ] && MODULE_EXT=$MODULE_EXT-$PR +MODULE_EXT=$MODULE_EXT-${PN} +# install sources to /usr/src/$LINUX_SRCDIR +LINUX_SRCDIR=linux-${PF} +DEB_PV="$DEB_PV_BASE${DEB_EXTRAVERSION}" +KERNEL_ARCHIVE="linux_${DEB_PV_BASE}.orig.tar.xz" +PATCH_ARCHIVE="linux_${DEB_PV}.debian.tar.xz" +RESTRICT="binchecks strip mirror" +LICENSE="GPL-2" +KEYWORDS="*" +IUSE="binary btrfs custom-cflags ec2 luks lvm sign-modules zfs" +DEPEND=" + virtual/libelf + binary? ( >=sys-kernel/genkernel-3.4.40.7 ) + btrfs? ( sys-fs/btrfs-progs sys-kernel/genkernel[btrfs] ) + zfs? ( sys-fs/zfs ) + luks? ( sys-kernel/genkernel[cryptsetup] )" +REQUIRED_USE=" +btrfs? ( binary ) +custom-cflags? ( binary ) +luks? ( binary ) +lvm? ( binary ) +sign-modules? ( binary ) +zfs? ( binary ) +" +DESCRIPTION="Debian Sources (and optional binary kernel)" +DEB_UPSTREAM="http://http.debian.net/debian/pool/main/l/linux" +HOMEPAGE="https://packages.debian.org/stable/kernel/" +SRC_URI="$DEB_UPSTREAM/${KERNEL_ARCHIVE} $DEB_UPSTREAM/${PATCH_ARCHIVE}" +S="$WORKDIR/linux-${DEB_PV_BASE}" + +get_patch_list() { + [[ -z "${1}" ]] && die "No patch series file specified" + local patch_series="${1}" + while read line ; do + if [[ "${line:0:1}" != "#" ]] ; then + echo "${line}" + fi + done < "${patch_series}" +} + +tweak_config() { + einfo "Setting $2=$3 in kernel config." + sed -i -e "/^$2=/d" $1 + echo "$2=$3" >> $1 +} + +setno_config() { + einfo "Setting $2*=y to n in kernel config." + sed -i -e "s/^$2\(.*\)=.*/$2\1=n/g" $1 +} + +setyes_config() { + einfo "Setting $2*=* to y in kernel config." + sed -i -e "s/^$2\(.*\)=.*/$2\1=y/g" $1 +} + +zap_config() { + einfo "Removing *$2* from kernel config." + sed -i -e "/$2/d" $1 +} + +pkg_pretend() { + # Ensure we have enough disk space to compile + if use binary ; then + CHECKREQS_DISK_BUILD="5G" + check-reqs_pkg_setup + fi +} + +get_certs_dir() { + # find a certificate dir in /etc/kernel/certs/ that contains signing cert for modules. + for subdir in $PF $P linux; do + certdir=/etc/kernel/certs/$subdir + if [ -d $certdir ]; then + if [ ! -e $certdir/signing_key.pem ]; then + eerror "$certdir exists but missing signing key; exiting." + exit 1 + fi + echo $certdir + return + fi + done +} + +pkg_setup() { + export REAL_ARCH="$ARCH" + unset ARCH; unset LDFLAGS #will interfere with Makefile if set +} + +src_prepare() { + cd "${S}" + for debpatch in $( get_patch_list "${WORKDIR}/debian/patches/series" ); do + epatch -p1 "${WORKDIR}/debian/patches/${debpatch}" + done + # end of debian-specific stuff... + + # do not include debian devs certificates + rm -rf "${WORKDIR}"/debian/certs + + sed -i -e "s:^\(EXTRAVERSION =\).*:\1 ${MODULE_EXT}:" Makefile || die + sed -i -e 's:#export\tINSTALL_PATH:export\tINSTALL_PATH:' Makefile || die + rm -f .config >/dev/null + cp -a "${WORKDIR}"/debian "${T}" + make -s mrproper || die "make mrproper failed" + #make -s include/linux/version.h || die "make include/linux/version.h failed" + cd "${S}" + cp -aR "${WORKDIR}"/debian "${S}"/debian + + ## XFS LIBCRC kernel config fixes, FL-823 + epatch "${FILESDIR}"/${DEB_PV_BASE}/${PN}-${DEB_PV_BASE}-xfs-libcrc32c-fix.patch + + ## FL-4424: enable legacy support for MCELOG. + epatch "${FILESDIR}"/${DEB_PV_BASE}/${PN}-${DEB_PV_BASE}-mcelog.patch + + ## do not configure debian devs certs. + epatch "${FILESDIR}"/${DEB_PV_BASE}/${PN}-${DEB_PV_BASE}-nocerts.patch + + ## FL-3381. enable IKCONFIG + epatch "${FILESDIR}"/${DEB_PV_BASE}/${PN}-${DEB_PV_BASE}-ikconfig.patch + + ## increase bluetooth polling patch + epatch "${FILESDIR}"/${DEB_PV_BASE}/${PN}-${DEB_PV_BASE}-fix-bluetooth-polling.patch + + ## add support for newer AMD APUs to AMDGPU + epatch "${FILESDIR}"/${DEB_PV_BASE}/amdgpu-picasso.patch + + ## fix for USB device enumeration for USBPre2: + ## does not apply cleanly and may have been fixed as of 4.19.118 needs looking into + ##epatch "${FILESDIR}/${DEB_PV_BASE}/usb-blacklist-endpoint-sound-devices-usbpre2.patch" + + local arch featureset subarch + featureset="standard" + if [[ ${REAL_ARCH} == x86 ]]; then + arch="i386" + subarch="686-pae" + elif [[ ${REAL_ARCH} == amd64 ]]; then + arch="amd64" + subarch="amd64" + else + die "Architecture not handled in ebuild" + fi + cp "${FILESDIR}"/config-extract . || die + chmod +x config-extract || die + ./config-extract ${arch} ${featureset} ${subarch} || die + setno_config .config CONFIG_DEBUG + if use ec2; then + setyes_config .config CONFIG_BLK_DEV_NVME + setyes_config .config CONFIG_XEN_BLKDEV_FRONTEND + setyes_config .config CONFIG_XEN_BLKDEV_BACKEND + setyes_config .config CONFIG_IXGBEVF + fi + if use sign-modules; then + certs_dir=$(get_certs_dir) + echo + if [ -z "$certs_dir" ]; then + eerror "No certs dir found in /etc/kernel/certs; aborting." + die + else + einfo "Using certificate directory of $certs_dir for kernel module signing." + fi + echo + # turn on options for signing modules. + # first, remove existing configs and comments: + zap_config .config CONFIG_MODULE_SIG + # now add our settings: + tweak_config .config CONFIG_MODULE_SIG y + tweak_config .config CONFIG_MODULE_SIG_FORCE n + tweak_config .config CONFIG_MODULE_SIG_ALL n + tweak_config .config CONFIG_MODULE_SIG_HASH \"sha512\" + tweak_config .config CONFIG_MODULE_SIG_KEY \"${certs_dir}/signing_key.pem\" + tweak_config .config CONFIG_SYSTEM_TRUSTED_KEYRING y + tweak_config .config CONFIG_SYSTEM_EXTRA_CERTIFICATE y + tweak_config .config CONFIG_SYSTEM_EXTRA_CERTIFICATE_SIZE 4096 + echo "CONFIG_MODULE_SIG_SHA512=y" >> .config + ewarn "This kernel will ALLOW non-signed modules to be loaded with a WARNING." + ewarn "To enable strict enforcement, YOU MUST add module.sig_enforce=1 as a kernel boot" + ewarn "parameter (to params in /etc/boot.conf, and re-run boot-update.)" + echo + fi + if use custom-cflags; then + MARCH="$(python -c "import portage; print(portage.settings[\"CFLAGS\"])" | sed 's/ /\n/g' | grep "march")" + if [ -n "$MARCH" ]; then + sed -i -e 's/-mtune=generic/$MARCH/g' arch/x86/Makefile || die "Canna optimize this kernel anymore, captain!" + fi + fi + # get config into good state: + yes "" | make oldconfig >/dev/null 2>&1 || die + cp .config "${T}"/config || die + make -s mrproper || die "make mrproper failed" +} + +src_compile() { + ! use binary && return + install -d "${WORKDIR}"/out/{lib,boot} + install -d "${T}"/{cache,twork} + install -d "${WORKDIR}"/build + cp "${T}"/config "${WORKDIR}"/build/.config + DEFAULT_KERNEL_SOURCE="${S}" CMD_KERNEL_DIR="${S}" genkernel ${GKARGS} \ + --no-save-config \ + --no-oldconfig \ + --kernel-config=${T}/config \ + --kernname="${PN}" \ + --build-src="${S}" \ + --build-dst="${WORKDIR}"/build \ + --makeopts="${MAKEOPTS}" \ + --cachedir="${T}"/cache \ + --tempdir="${T}"/twork \ + --logfile="${WORKDIR}"/genkernel.log \ + --bootdir="${WORKDIR}"/out/boot \ + --disklabel \ + $(usex lvm --lvm --no-lvm ) \ + $(usex luks --luks --no-luks ) \ + --mdadm \ + $(usex btrfs --btrfs --no-btrfs) \ + $(usex zfs --zfs --no-zfs) \ + --module-prefix="${WORKDIR}"/out \ + all || die +} + +src_install() { + # copy sources into place: + dodir /usr/src + cp -a "${S}" "${D}"/usr/src/${LINUX_SRCDIR} || die + cd "${D}"/usr/src/${LINUX_SRCDIR} + # prepare for real-world use and 3rd-party module building: + make mrproper || die + cp "${T}"/config .config || die + cp -a "${T}"/debian debian || die + + + # if we didn't use genkernel, we're done. The kernel source tree is left in + # an unconfigured state - you can't compile 3rd-party modules against it yet. + use binary || return + make prepare || die + make scripts || die + # OK, now the source tree is configured to allow 3rd-party modules to be + # built against it, since we want that to work since we have a binary kernel + # built. + cp -a "${WORKDIR}"/out/* "${D}"/ || die "couldn't copy output files into place" + # module symlink fixup: + rm -f "${D}"/lib/modules/*/source || die + rm -f "${D}"/lib/modules/*/build || die + cd "${D}"/lib/modules + local moddir="$(ls -d [234]*)" + ln -s /usr/src/${LINUX_SRCDIR} "${D}"/lib/modules/${moddir}/source || die + ln -s /usr/src/${LINUX_SRCDIR} "${D}"/lib/modules/${moddir}/build || die + # Fixes FL-14 + cp "${WORKDIR}/build/System.map" "${D}/usr/src/${LINUX_SRCDIR}/" || die + cp "${WORKDIR}/build/Module.symvers" "${D}/usr/src/${LINUX_SRCDIR}/" || die + if use sign-modules; then + for x in $(find "${D}"/lib/modules -iname *.ko); do + # $certs_dir defined previously in this function. + ${WORKDIR}/build/scripts/sign-file sha512 $certs_dir/signing_key.pem $certs_dir/signing_key.x509 $x || die + done + # install the sign-file executable for future use. + exeinto /usr/src/${LINUX_SRCDIR}/scripts + doexe ${WORKDIR}/build/scripts/sign-file + fi + + # The new naming scheme leaves an extra -${PN} at the name of various things in /boot. This should fix that. + cd ${D}/boot + for x in $(ls *); do + xnew=${x%-${PN}} + mv $x ${xnew} || die + done + + +} + +pkg_postinst() { + if use binary && [[ -h "${ROOT}"usr/src/linux ]]; then + rm "${ROOT}"usr/src/linux + fi + if use binary && [[ ! -e "${ROOT}"usr/src/linux ]]; then + ewarn "With binary use flag enabled /usr/src/linux" + ewarn "symlink automatically set to debian kernel" + ln -sf ${LINUX_SRCDIR} "${ROOT}"usr/src/linux + fi + + if [ -e ${ROOT}lib/modules ]; then + depmod -a $DEP_PV + fi + if [ -e /etc/boot.conf ]; then + ego boot update + fi +} diff --git a/sys-kernel/debian-sources/debian-sources-4.19.98_p1.ebuild b/sys-kernel/debian-sources/debian-sources-4.19.98_p1.ebuild new file mode 100644 index 000000000000..ea7001b74f72 --- /dev/null +++ b/sys-kernel/debian-sources/debian-sources-4.19.98_p1.ebuild @@ -0,0 +1,300 @@ +# Distributed under the terms of the GNU General Public License v2 + +# Documentation for adding new kernels -- do not remove! +# +# Find latest stable kernel release for debian here: +# https://packages.debian.org/unstable/kernel/ + +EAPI=5 + +inherit check-reqs eutils mount-boot + +SLOT=$PF +CKV=${PV} +KV_FULL=${PN}-${PVR} +DEB_PV_BASE="4.19.98" +DEB_EXTRAVERSION="-1" +EXTRAVERSION="_p1" + +# install modules to /lib/modules/${DEB_PV_BASE}${EXTRAVERSION}-$MODULE_EXT +MODULE_EXT=${EXTRAVERSION} +[ "$PR" != "r0" ] && MODULE_EXT=$MODULE_EXT-$PR +MODULE_EXT=$MODULE_EXT-${PN} +# install sources to /usr/src/$LINUX_SRCDIR +LINUX_SRCDIR=linux-${PF} +DEB_PV="$DEB_PV_BASE${DEB_EXTRAVERSION}" +KERNEL_ARCHIVE="linux_${DEB_PV_BASE}.orig.tar.xz" +PATCH_ARCHIVE="linux_${DEB_PV}.debian.tar.xz" +RESTRICT="binchecks strip mirror" +LICENSE="GPL-2" +KEYWORDS="*" +IUSE="binary btrfs custom-cflags ec2 luks lvm sign-modules zfs" +DEPEND=" + virtual/libelf + binary? ( >=sys-kernel/genkernel-3.4.40.7 ) + btrfs? ( sys-fs/btrfs-progs sys-kernel/genkernel[btrfs] ) + zfs? ( sys-fs/zfs ) + luks? ( sys-kernel/genkernel[cryptsetup] )" +REQUIRED_USE=" +btrfs? ( binary ) +custom-cflags? ( binary ) +luks? ( binary ) +lvm? ( binary ) +sign-modules? ( binary ) +zfs? ( binary ) +" +DESCRIPTION="Debian Sources (and optional binary kernel)" +DEB_UPSTREAM="http://http.debian.net/debian/pool/main/l/linux" +HOMEPAGE="https://packages.debian.org/unstable/kernel/" +SRC_URI="$DEB_UPSTREAM/${KERNEL_ARCHIVE} $DEB_UPSTREAM/${PATCH_ARCHIVE}" +S="$WORKDIR/linux-${DEB_PV_BASE}" + +get_patch_list() { + [[ -z "${1}" ]] && die "No patch series file specified" + local patch_series="${1}" + while read line ; do + if [[ "${line:0:1}" != "#" ]] ; then + echo "${line}" + fi + done < "${patch_series}" +} + +tweak_config() { + einfo "Setting $2=$3 in kernel config." + sed -i -e "/^$2=/d" $1 + echo "$2=$3" >> $1 +} + +setno_config() { + einfo "Setting $2*=y to n in kernel config." + sed -i -e "s/^$2\(.*\)=.*/$2\1=n/g" $1 +} + +setyes_config() { + einfo "Setting $2*=* to y in kernel config." + sed -i -e "s/^$2\(.*\)=.*/$2\1=y/g" $1 +} + +zap_config() { + einfo "Removing *$2* from kernel config." + sed -i -e "/$2/d" $1 +} + +pkg_pretend() { + # Ensure we have enough disk space to compile + if use binary ; then + CHECKREQS_DISK_BUILD="5G" + check-reqs_pkg_setup + fi +} + +get_certs_dir() { + # find a certificate dir in /etc/kernel/certs/ that contains signing cert for modules. + for subdir in $PF $P linux; do + certdir=/etc/kernel/certs/$subdir + if [ -d $certdir ]; then + if [ ! -e $certdir/signing_key.pem ]; then + eerror "$certdir exists but missing signing key; exiting." + exit 1 + fi + echo $certdir + return + fi + done +} + +pkg_setup() { + export REAL_ARCH="$ARCH" + unset ARCH; unset LDFLAGS #will interfere with Makefile if set +} + +src_prepare() { + cd "${S}" + for debpatch in $( get_patch_list "${WORKDIR}/debian/patches/series" ); do + epatch -p1 "${WORKDIR}/debian/patches/${debpatch}" + done + # end of debian-specific stuff... + + # do not include debian devs certificates + rm -rf "${WORKDIR}"/debian/certs + + sed -i -e "s:^\(EXTRAVERSION =\).*:\1 ${MODULE_EXT}:" Makefile || die + sed -i -e 's:#export\tINSTALL_PATH:export\tINSTALL_PATH:' Makefile || die + rm -f .config >/dev/null + cp -a "${WORKDIR}"/debian "${T}" + make -s mrproper || die "make mrproper failed" + #make -s include/linux/version.h || die "make include/linux/version.h failed" + cd "${S}" + cp -aR "${WORKDIR}"/debian "${S}"/debian + + ## XFS LIBCRC kernel config fixes, FL-823 + epatch "${FILESDIR}"/${DEB_PV_BASE}/${PN}-${DEB_PV_BASE}-xfs-libcrc32c-fix.patch + + ## FL-4424: enable legacy support for MCELOG. + epatch "${FILESDIR}"/${DEB_PV_BASE}/${PN}-${DEB_PV_BASE}-mcelog.patch + + ## do not configure debian devs certs. + epatch "${FILESDIR}"/${DEB_PV_BASE}/${PN}-${DEB_PV_BASE}-nocerts.patch + + ## FL-3381. enable IKCONFIG + epatch "${FILESDIR}"/${DEB_PV_BASE}/${PN}-${DEB_PV_BASE}-ikconfig.patch + + ## increase bluetooth polling patch + epatch "${FILESDIR}"/${DEB_PV_BASE}/${PN}-${DEB_PV_BASE}-fix-bluetooth-polling.patch + + local arch featureset subarch + featureset="standard" + if [[ ${REAL_ARCH} == x86 ]]; then + arch="i386" + subarch="686-pae" + elif [[ ${REAL_ARCH} == amd64 ]]; then + arch="amd64" + subarch="amd64" + else + die "Architecture not handled in ebuild" + fi + cp "${FILESDIR}"/config-extract . || die + chmod +x config-extract || die + ./config-extract ${arch} ${featureset} ${subarch} || die + setno_config .config CONFIG_DEBUG + if use ec2; then + setyes_config .config CONFIG_BLK_DEV_NVME + setyes_config .config CONFIG_XEN_BLKDEV_FRONTEND + setyes_config .config CONFIG_XEN_BLKDEV_BACKEND + setyes_config .config CONFIG_IXGBEVF + fi + if use sign-modules; then + certs_dir=$(get_certs_dir) + echo + if [ -z "$certs_dir" ]; then + eerror "No certs dir found in /etc/kernel/certs; aborting." + die + else + einfo "Using certificate directory of $certs_dir for kernel module signing." + fi + echo + # turn on options for signing modules. + # first, remove existing configs and comments: + zap_config .config CONFIG_MODULE_SIG + # now add our settings: + tweak_config .config CONFIG_MODULE_SIG y + tweak_config .config CONFIG_MODULE_SIG_FORCE n + tweak_config .config CONFIG_MODULE_SIG_ALL n + tweak_config .config CONFIG_MODULE_SIG_HASH \"sha512\" + tweak_config .config CONFIG_MODULE_SIG_KEY \"${certs_dir}/signing_key.pem\" + tweak_config .config CONFIG_SYSTEM_TRUSTED_KEYRING y + tweak_config .config CONFIG_SYSTEM_EXTRA_CERTIFICATE y + tweak_config .config CONFIG_SYSTEM_EXTRA_CERTIFICATE_SIZE 4096 + echo "CONFIG_MODULE_SIG_SHA512=y" >> .config + ewarn "This kernel will ALLOW non-signed modules to be loaded with a WARNING." + ewarn "To enable strict enforcement, YOU MUST add module.sig_enforce=1 as a kernel boot" + ewarn "parameter (to params in /etc/boot.conf, and re-run boot-update.)" + echo + fi + if use custom-cflags; then + MARCH="$(python -c "import portage; print(portage.settings[\"CFLAGS\"])" | sed 's/ /\n/g' | grep "march")" + if [ -n "$MARCH" ]; then + sed -i -e 's/-mtune=generic/$MARCH/g' arch/x86/Makefile || die "Canna optimize this kernel anymore, captain!" + fi + fi + # get config into good state: + yes "" | make oldconfig >/dev/null 2>&1 || die + cp .config "${T}"/config || die + make -s mrproper || die "make mrproper failed" +} + +src_compile() { + ! use binary && return + install -d "${WORKDIR}"/out/{lib,boot} + install -d "${T}"/{cache,twork} + install -d "${WORKDIR}"/build + cp "${T}"/config "${WORKDIR}"/build/.config + DEFAULT_KERNEL_SOURCE="${S}" CMD_KERNEL_DIR="${S}" genkernel ${GKARGS} \ + --no-save-config \ + --no-oldconfig \ + --kernel-config=${T}/config \ + --kernname="${PN}" \ + --build-src="${S}" \ + --build-dst="${WORKDIR}"/build \ + --makeopts="${MAKEOPTS}" \ + --cachedir="${T}"/cache \ + --tempdir="${T}"/twork \ + --logfile="${WORKDIR}"/genkernel.log \ + --bootdir="${WORKDIR}"/out/boot \ + --disklabel \ + $(usex lvm --lvm --no-lvm ) \ + $(usex luks --luks --no-luks ) \ + --mdadm \ + $(usex btrfs --btrfs --no-btrfs) \ + $(usex zfs --zfs --no-zfs) \ + --module-prefix="${WORKDIR}"/out \ + all || die +} + +src_install() { + # copy sources into place: + dodir /usr/src + cp -a "${S}" "${D}"/usr/src/${LINUX_SRCDIR} || die + cd "${D}"/usr/src/${LINUX_SRCDIR} + # prepare for real-world use and 3rd-party module building: + make mrproper || die + cp "${T}"/config .config || die + cp -a "${T}"/debian debian || die + + + # if we didn't use genkernel, we're done. The kernel source tree is left in + # an unconfigured state - you can't compile 3rd-party modules against it yet. + use binary || return + make prepare || die + make scripts || die + # OK, now the source tree is configured to allow 3rd-party modules to be + # built against it, since we want that to work since we have a binary kernel + # built. + cp -a "${WORKDIR}"/out/* "${D}"/ || die "couldn't copy output files into place" + # module symlink fixup: + rm -f "${D}"/lib/modules/*/source || die + rm -f "${D}"/lib/modules/*/build || die + cd "${D}"/lib/modules + local moddir="$(ls -d [234]*)" + ln -s /usr/src/${LINUX_SRCDIR} "${D}"/lib/modules/${moddir}/source || die + ln -s /usr/src/${LINUX_SRCDIR} "${D}"/lib/modules/${moddir}/build || die + # Fixes FL-14 + cp "${WORKDIR}/build/System.map" "${D}/usr/src/${LINUX_SRCDIR}/" || die + cp "${WORKDIR}/build/Module.symvers" "${D}/usr/src/${LINUX_SRCDIR}/" || die + if use sign-modules; then + for x in $(find "${D}"/lib/modules -iname *.ko); do + # $certs_dir defined previously in this function. + ${WORKDIR}/build/scripts/sign-file sha512 $certs_dir/signing_key.pem $certs_dir/signing_key.x509 $x || die + done + # install the sign-file executable for future use. + exeinto /usr/src/${LINUX_SRCDIR}/scripts + doexe ${WORKDIR}/build/scripts/sign-file + fi + + # The new naming scheme leaves an extra -${PN} at the name of various things in /boot. This should fix that. + cd ${D}/boot + for x in $(ls *); do + xnew=${x%-${PN}} + mv $x ${xnew} || die + done + + +} + +pkg_postinst() { + if use binary && [[ -h "${ROOT}"usr/src/linux ]]; then + rm "${ROOT}"usr/src/linux + fi + if use binary && [[ ! -e "${ROOT}"usr/src/linux ]]; then + ewarn "With binary use flag enabled /usr/src/linux" + ewarn "symlink automatically set to debian kernel" + ln -sf ${LINUX_SRCDIR} "${ROOT}"usr/src/linux + fi + + if [ -e ${ROOT}lib/modules ]; then + depmod -a $DEP_PV + fi + if [ -e /etc/boot.conf ]; then + ego boot update + fi +} diff --git a/sys-kernel/debian-sources/debian-sources-5.4.19_p1.ebuild b/sys-kernel/debian-sources/debian-sources-5.4.19_p1.ebuild new file mode 100644 index 000000000000..397ad9eff948 --- /dev/null +++ b/sys-kernel/debian-sources/debian-sources-5.4.19_p1.ebuild @@ -0,0 +1,300 @@ +# Distributed under the terms of the GNU General Public License v2 + +# Documentation for adding new kernels -- do not remove! +# +# Find latest stable kernel release for debian here: +# https://packages.debian.org/unstable/kernel/ + +EAPI=5 + +inherit check-reqs eutils mount-boot + +SLOT=$PF +CKV=${PV} +KV_FULL=${PN}-${PVR} +DEB_PV_BASE="5.4.19" +DEB_EXTRAVERSION="-1" +EXTRAVERSION="_p1" + +# install modules to /lib/modules/${DEB_PV_BASE}${EXTRAVERSION}-$MODULE_EXT +MODULE_EXT=${EXTRAVERSION} +[ "$PR" != "r0" ] && MODULE_EXT=$MODULE_EXT-$PR +MODULE_EXT=$MODULE_EXT-${PN} +# install sources to /usr/src/$LINUX_SRCDIR +LINUX_SRCDIR=linux-${PF} +DEB_PV="$DEB_PV_BASE${DEB_EXTRAVERSION}" +KERNEL_ARCHIVE="linux_${DEB_PV_BASE}.orig.tar.xz" +PATCH_ARCHIVE="linux_${DEB_PV}.debian.tar.xz" +RESTRICT="binchecks strip mirror" +LICENSE="GPL-2" +KEYWORDS="*" +IUSE="binary btrfs custom-cflags ec2 luks lvm sign-modules zfs" +DEPEND=" + virtual/libelf + binary? ( >=sys-kernel/genkernel-3.4.40.7 ) + btrfs? ( sys-fs/btrfs-progs sys-kernel/genkernel[btrfs] ) + zfs? ( sys-fs/zfs ) + luks? ( sys-kernel/genkernel[cryptsetup] )" +REQUIRED_USE=" +btrfs? ( binary ) +custom-cflags? ( binary ) +luks? ( binary ) +lvm? ( binary ) +sign-modules? ( binary ) +zfs? ( binary ) +" +DESCRIPTION="Debian Sources (and optional binary kernel)" +DEB_UPSTREAM="http://http.debian.net/debian/pool/main/l/linux" +HOMEPAGE="https://packages.debian.org/unstable/kernel/" +SRC_URI="$DEB_UPSTREAM/${KERNEL_ARCHIVE} $DEB_UPSTREAM/${PATCH_ARCHIVE}" +S="$WORKDIR/linux-${DEB_PV_BASE}" + +get_patch_list() { + [[ -z "${1}" ]] && die "No patch series file specified" + local patch_series="${1}" + while read line ; do + if [[ "${line:0:1}" != "#" ]] ; then + echo "${line}" + fi + done < "${patch_series}" +} + +tweak_config() { + einfo "Setting $2=$3 in kernel config." + sed -i -e "/^$2=/d" $1 + echo "$2=$3" >> $1 +} + +setno_config() { + einfo "Setting $2*=y to n in kernel config." + sed -i -e "s/^$2\(.*\)=.*/$2\1=n/g" $1 +} + +setyes_config() { + einfo "Setting $2*=* to y in kernel config." + sed -i -e "s/^$2\(.*\)=.*/$2\1=y/g" $1 +} + +zap_config() { + einfo "Removing *$2* from kernel config." + sed -i -e "/$2/d" $1 +} + +pkg_pretend() { + # Ensure we have enough disk space to compile + if use binary ; then + CHECKREQS_DISK_BUILD="5G" + check-reqs_pkg_setup + fi +} + +get_certs_dir() { + # find a certificate dir in /etc/kernel/certs/ that contains signing cert for modules. + for subdir in $PF $P linux; do + certdir=/etc/kernel/certs/$subdir + if [ -d $certdir ]; then + if [ ! -e $certdir/signing_key.pem ]; then + eerror "$certdir exists but missing signing key; exiting." + exit 1 + fi + echo $certdir + return + fi + done +} + +pkg_setup() { + export REAL_ARCH="$ARCH" + unset ARCH; unset LDFLAGS #will interfere with Makefile if set +} + +src_prepare() { + cd "${S}" + for debpatch in $( get_patch_list "${WORKDIR}/debian/patches/series" ); do + epatch -p1 "${WORKDIR}/debian/patches/${debpatch}" + done + # end of debian-specific stuff... + + # do not include debian devs certificates + rm -rf "${WORKDIR}"/debian/certs + + sed -i -e "s:^\(EXTRAVERSION =\).*:\1 ${MODULE_EXT}:" Makefile || die + sed -i -e 's:#export\tINSTALL_PATH:export\tINSTALL_PATH:' Makefile || die + rm -f .config >/dev/null + cp -a "${WORKDIR}"/debian "${T}" + make -s mrproper || die "make mrproper failed" + #make -s include/linux/version.h || die "make include/linux/version.h failed" + cd "${S}" + cp -aR "${WORKDIR}"/debian "${S}"/debian + + ## XFS LIBCRC kernel config fixes, FL-823 + epatch "${FILESDIR}"/${DEB_PV_BASE}/${PN}-${DEB_PV_BASE}-xfs-libcrc32c-fix.patch + + ## FL-4424: enable legacy support for MCELOG. + epatch "${FILESDIR}"/${DEB_PV_BASE}/${PN}-${DEB_PV_BASE}-mcelog.patch + + ## do not configure debian devs certs. + epatch "${FILESDIR}"/${DEB_PV_BASE}/${PN}-${DEB_PV_BASE}-nocerts.patch + + ## FL-3381. enable IKCONFIG + epatch "${FILESDIR}"/${DEB_PV_BASE}/${PN}-${DEB_PV_BASE}-ikconfig.patch + + ## increase bluetooth polling patch + epatch "${FILESDIR}"/${DEB_PV_BASE}/${PN}-${DEB_PV_BASE}-fix-bluetooth-polling.patch + epatch "${FILESDIR}"/${DEB_PV_BASE}/export_kernel_fpu_functions_5_3.patch + local arch featureset subarch + featureset="standard" + if [[ ${REAL_ARCH} == x86 ]]; then + arch="i386" + subarch="686-pae" + elif [[ ${REAL_ARCH} == amd64 ]]; then + arch="amd64" + subarch="amd64" + else + die "Architecture not handled in ebuild" + fi + cp "${FILESDIR}"/config-extract . || die + chmod +x config-extract || die + ./config-extract ${arch} ${featureset} ${subarch} || die + setno_config .config CONFIG_DEBUG + if use ec2; then + setyes_config .config CONFIG_BLK_DEV_NVME + setyes_config .config CONFIG_XEN_BLKDEV_FRONTEND + setyes_config .config CONFIG_XEN_BLKDEV_BACKEND + setyes_config .config CONFIG_IXGBEVF + fi + if use sign-modules; then + certs_dir=$(get_certs_dir) + echo + if [ -z "$certs_dir" ]; then + eerror "No certs dir found in /etc/kernel/certs; aborting." + die + else + einfo "Using certificate directory of $certs_dir for kernel module signing." + fi + echo + # turn on options for signing modules. + # first, remove existing configs and comments: + zap_config .config CONFIG_MODULE_SIG + # now add our settings: + tweak_config .config CONFIG_MODULE_SIG y + tweak_config .config CONFIG_MODULE_SIG_FORCE n + tweak_config .config CONFIG_MODULE_SIG_ALL n + tweak_config .config CONFIG_MODULE_SIG_HASH \"sha512\" + tweak_config .config CONFIG_MODULE_SIG_KEY \"${certs_dir}/signing_key.pem\" + tweak_config .config CONFIG_SYSTEM_TRUSTED_KEYRING y + tweak_config .config CONFIG_SYSTEM_EXTRA_CERTIFICATE y + tweak_config .config CONFIG_SYSTEM_EXTRA_CERTIFICATE_SIZE 4096 + echo "CONFIG_MODULE_SIG_SHA512=y" >> .config + ewarn "This kernel will ALLOW non-signed modules to be loaded with a WARNING." + ewarn "To enable strict enforcement, YOU MUST add module.sig_enforce=1 as a kernel boot" + ewarn "parameter (to params in /etc/boot.conf, and re-run boot-update.)" + echo + fi + if use custom-cflags; then + MARCH="$(python -c "import portage; print(portage.settings[\"CFLAGS\"])" | sed 's/ /\n/g' | grep "march")" + if [ -n "$MARCH" ]; then + sed -i -e 's/-mtune=generic/$MARCH/g' arch/x86/Makefile || die "Canna optimize this kernel anymore, captain!" + fi + fi + # get config into good state: + yes "" | make oldconfig >/dev/null 2>&1 || die + cp .config "${T}"/config || die + make -s mrproper || die "make mrproper failed" +} + +src_compile() { + ! use binary && return + install -d "${WORKDIR}"/out/{lib,boot} + install -d "${T}"/{cache,twork} + install -d "${WORKDIR}"/build + cp "${T}"/config "${WORKDIR}"/build/.config + DEFAULT_KERNEL_SOURCE="${S}" CMD_KERNEL_DIR="${S}" genkernel ${GKARGS} \ + --no-save-config \ + --no-oldconfig \ + --kernel-config=${T}/config \ + --kernname="${PN}" \ + --build-src="${S}" \ + --build-dst="${WORKDIR}"/build \ + --makeopts="${MAKEOPTS}" \ + --cachedir="${T}"/cache \ + --tempdir="${T}"/twork \ + --logfile="${WORKDIR}"/genkernel.log \ + --bootdir="${WORKDIR}"/out/boot \ + --disklabel \ + $(usex lvm --lvm --no-lvm ) \ + $(usex luks --luks --no-luks ) \ + --mdadm \ + $(usex btrfs --btrfs --no-btrfs) \ + $(usex zfs --zfs --no-zfs) \ + --module-prefix="${WORKDIR}"/out \ + all || die +} + +src_install() { + # copy sources into place: + dodir /usr/src + cp -a "${S}" "${D}"/usr/src/${LINUX_SRCDIR} || die + cd "${D}"/usr/src/${LINUX_SRCDIR} + # prepare for real-world use and 3rd-party module building: + make mrproper || die + cp "${T}"/config .config || die + cp -a "${T}"/debian debian || die + + + # if we didn't use genkernel, we're done. The kernel source tree is left in + # an unconfigured state - you can't compile 3rd-party modules against it yet. + use binary || return + make prepare || die + make scripts || die + # OK, now the source tree is configured to allow 3rd-party modules to be + # built against it, since we want that to work since we have a binary kernel + # built. + cp -a "${WORKDIR}"/out/* "${D}"/ || die "couldn't copy output files into place" + # module symlink fixup: + rm -f "${D}"/lib/modules/*/source || die + rm -f "${D}"/lib/modules/*/build || die + cd "${D}"/lib/modules + local moddir="$(ls -d [234]*)" + ln -s /usr/src/${LINUX_SRCDIR} "${D}"/lib/modules/${moddir}/source || die + ln -s /usr/src/${LINUX_SRCDIR} "${D}"/lib/modules/${moddir}/build || die + # Fixes FL-14 + cp "${WORKDIR}/build/System.map" "${D}/usr/src/${LINUX_SRCDIR}/" || die + cp "${WORKDIR}/build/Module.symvers" "${D}/usr/src/${LINUX_SRCDIR}/" || die + if use sign-modules; then + for x in $(find "${D}"/lib/modules -iname *.ko); do + # $certs_dir defined previously in this function. + ${WORKDIR}/build/scripts/sign-file sha512 $certs_dir/signing_key.pem $certs_dir/signing_key.x509 $x || die + done + # install the sign-file executable for future use. + exeinto /usr/src/${LINUX_SRCDIR}/scripts + doexe ${WORKDIR}/build/scripts/sign-file + fi + + # The new naming scheme leaves an extra -${PN} at the name of various things in /boot. This should fix that. + cd ${D}/boot + for x in $(ls *); do + xnew=${x%-${PN}} + mv $x ${xnew} || die + done + + +} + +pkg_postinst() { + if use binary && [[ -h "${ROOT}"usr/src/linux ]]; then + rm "${ROOT}"usr/src/linux + fi + if use binary && [[ ! -e "${ROOT}"usr/src/linux ]]; then + ewarn "With binary use flag enabled /usr/src/linux" + ewarn "symlink automatically set to debian kernel" + ln -sf ${LINUX_SRCDIR} "${ROOT}"usr/src/linux + fi + + if [ -e ${ROOT}lib/modules ]; then + depmod -a $DEP_PV + fi + if [ -e /etc/boot.conf ]; then + ego boot update + fi +} diff --git a/sys-kernel/debian-sources/debian-sources-5.5.17_p1.ebuild b/sys-kernel/debian-sources/debian-sources-5.5.17_p1.ebuild new file mode 100644 index 000000000000..51f4e37f69b1 --- /dev/null +++ b/sys-kernel/debian-sources/debian-sources-5.5.17_p1.ebuild @@ -0,0 +1,300 @@ +# Distributed under the terms of the GNU General Public License v2 + +# Documentation for adding new kernels -- do not remove! +# +# Find latest stable kernel release for debian here: +# https://packages.debian.org/unstable/kernel/ + +EAPI=5 + +inherit check-reqs eutils mount-boot + +SLOT=$PF +CKV=${PV} +KV_FULL=${PN}-${PVR} +DEB_PV_BASE="5.5.17" +DEB_EXTRAVERSION="-1" +EXTRAVERSION="_p1" + +# install modules to /lib/modules/${DEB_PV_BASE}${EXTRAVERSION}-$MODULE_EXT +MODULE_EXT=${EXTRAVERSION} +[ "$PR" != "r0" ] && MODULE_EXT=$MODULE_EXT-$PR +MODULE_EXT=$MODULE_EXT-${PN} +# install sources to /usr/src/$LINUX_SRCDIR +LINUX_SRCDIR=linux-${PF} +DEB_PV="$DEB_PV_BASE${DEB_EXTRAVERSION}" +KERNEL_ARCHIVE="linux_${DEB_PV_BASE}.orig.tar.xz" +PATCH_ARCHIVE="linux_${DEB_PV}.debian.tar.xz" +RESTRICT="binchecks strip mirror" +LICENSE="GPL-2" +KEYWORDS="*" +IUSE="binary btrfs custom-cflags ec2 luks lvm sign-modules zfs" +DEPEND=" + virtual/libelf + binary? ( >=sys-kernel/genkernel-3.4.40.7 ) + btrfs? ( sys-fs/btrfs-progs sys-kernel/genkernel[btrfs] ) + zfs? ( sys-fs/zfs ) + luks? ( sys-kernel/genkernel[cryptsetup] )" +REQUIRED_USE=" +btrfs? ( binary ) +custom-cflags? ( binary ) +luks? ( binary ) +lvm? ( binary ) +sign-modules? ( binary ) +zfs? ( binary ) +" +DESCRIPTION="Debian Sources (and optional binary kernel)" +DEB_UPSTREAM="http://http.debian.net/debian/pool/main/l/linux" +HOMEPAGE="https://packages.debian.org/unstable/kernel/" +SRC_URI="$DEB_UPSTREAM/${KERNEL_ARCHIVE} $DEB_UPSTREAM/${PATCH_ARCHIVE}" +S="$WORKDIR/linux-${DEB_PV_BASE}" + +get_patch_list() { + [[ -z "${1}" ]] && die "No patch series file specified" + local patch_series="${1}" + while read line ; do + if [[ "${line:0:1}" != "#" ]] ; then + echo "${line}" + fi + done < "${patch_series}" +} + +tweak_config() { + einfo "Setting $2=$3 in kernel config." + sed -i -e "/^$2=/d" $1 + echo "$2=$3" >> $1 +} + +setno_config() { + einfo "Setting $2*=y to n in kernel config." + sed -i -e "s/^$2\(.*\)=.*/$2\1=n/g" $1 +} + +setyes_config() { + einfo "Setting $2*=* to y in kernel config." + sed -i -e "s/^$2\(.*\)=.*/$2\1=y/g" $1 +} + +zap_config() { + einfo "Removing *$2* from kernel config." + sed -i -e "/$2/d" $1 +} + +pkg_pretend() { + # Ensure we have enough disk space to compile + if use binary ; then + CHECKREQS_DISK_BUILD="5G" + check-reqs_pkg_setup + fi +} + +get_certs_dir() { + # find a certificate dir in /etc/kernel/certs/ that contains signing cert for modules. + for subdir in $PF $P linux; do + certdir=/etc/kernel/certs/$subdir + if [ -d $certdir ]; then + if [ ! -e $certdir/signing_key.pem ]; then + eerror "$certdir exists but missing signing key; exiting." + exit 1 + fi + echo $certdir + return + fi + done +} + +pkg_setup() { + export REAL_ARCH="$ARCH" + unset ARCH; unset LDFLAGS #will interfere with Makefile if set +} + +src_prepare() { + cd "${S}" + for debpatch in $( get_patch_list "${WORKDIR}/debian/patches/series" ); do + epatch -p1 "${WORKDIR}/debian/patches/${debpatch}" + done + # end of debian-specific stuff... + + # do not include debian devs certificates + rm -rf "${WORKDIR}"/debian/certs + + sed -i -e "s:^\(EXTRAVERSION =\).*:\1 ${MODULE_EXT}:" Makefile || die + sed -i -e 's:#export\tINSTALL_PATH:export\tINSTALL_PATH:' Makefile || die + rm -f .config >/dev/null + cp -a "${WORKDIR}"/debian "${T}" + make -s mrproper || die "make mrproper failed" + #make -s include/linux/version.h || die "make include/linux/version.h failed" + cd "${S}" + cp -aR "${WORKDIR}"/debian "${S}"/debian + + ## XFS LIBCRC kernel config fixes, FL-823 + epatch "${FILESDIR}"/${DEB_PV_BASE}/${PN}-${DEB_PV_BASE}-xfs-libcrc32c-fix.patch + + ## FL-4424: enable legacy support for MCELOG. + epatch "${FILESDIR}"/${DEB_PV_BASE}/${PN}-${DEB_PV_BASE}-mcelog.patch + + ## do not configure debian devs certs. + epatch "${FILESDIR}"/${DEB_PV_BASE}/${PN}-${DEB_PV_BASE}-nocerts.patch + + ## FL-3381. enable IKCONFIG + epatch "${FILESDIR}"/${DEB_PV_BASE}/${PN}-${DEB_PV_BASE}-ikconfig.patch + + ## increase bluetooth polling patch + epatch "${FILESDIR}"/${DEB_PV_BASE}/${PN}-${DEB_PV_BASE}-fix-bluetooth-polling.patch + epatch "${FILESDIR}"/${DEB_PV_BASE}/export_kernel_fpu_functions_5_3.patch + local arch featureset subarch + featureset="standard" + if [[ ${REAL_ARCH} == x86 ]]; then + arch="i386" + subarch="686-pae" + elif [[ ${REAL_ARCH} == amd64 ]]; then + arch="amd64" + subarch="amd64" + else + die "Architecture not handled in ebuild" + fi + cp "${FILESDIR}"/config-extract . || die + chmod +x config-extract || die + ./config-extract ${arch} ${featureset} ${subarch} || die + setno_config .config CONFIG_DEBUG + if use ec2; then + setyes_config .config CONFIG_BLK_DEV_NVME + setyes_config .config CONFIG_XEN_BLKDEV_FRONTEND + setyes_config .config CONFIG_XEN_BLKDEV_BACKEND + setyes_config .config CONFIG_IXGBEVF + fi + if use sign-modules; then + certs_dir=$(get_certs_dir) + echo + if [ -z "$certs_dir" ]; then + eerror "No certs dir found in /etc/kernel/certs; aborting." + die + else + einfo "Using certificate directory of $certs_dir for kernel module signing." + fi + echo + # turn on options for signing modules. + # first, remove existing configs and comments: + zap_config .config CONFIG_MODULE_SIG + # now add our settings: + tweak_config .config CONFIG_MODULE_SIG y + tweak_config .config CONFIG_MODULE_SIG_FORCE n + tweak_config .config CONFIG_MODULE_SIG_ALL n + tweak_config .config CONFIG_MODULE_SIG_HASH \"sha512\" + tweak_config .config CONFIG_MODULE_SIG_KEY \"${certs_dir}/signing_key.pem\" + tweak_config .config CONFIG_SYSTEM_TRUSTED_KEYRING y + tweak_config .config CONFIG_SYSTEM_EXTRA_CERTIFICATE y + tweak_config .config CONFIG_SYSTEM_EXTRA_CERTIFICATE_SIZE 4096 + echo "CONFIG_MODULE_SIG_SHA512=y" >> .config + ewarn "This kernel will ALLOW non-signed modules to be loaded with a WARNING." + ewarn "To enable strict enforcement, YOU MUST add module.sig_enforce=1 as a kernel boot" + ewarn "parameter (to params in /etc/boot.conf, and re-run boot-update.)" + echo + fi + if use custom-cflags; then + MARCH="$(python -c "import portage; print(portage.settings[\"CFLAGS\"])" | sed 's/ /\n/g' | grep "march")" + if [ -n "$MARCH" ]; then + sed -i -e 's/-mtune=generic/$MARCH/g' arch/x86/Makefile || die "Canna optimize this kernel anymore, captain!" + fi + fi + # get config into good state: + yes "" | make oldconfig >/dev/null 2>&1 || die + cp .config "${T}"/config || die + make -s mrproper || die "make mrproper failed" +} + +src_compile() { + ! use binary && return + install -d "${WORKDIR}"/out/{lib,boot} + install -d "${T}"/{cache,twork} + install -d "${WORKDIR}"/build + cp "${T}"/config "${WORKDIR}"/build/.config + DEFAULT_KERNEL_SOURCE="${S}" CMD_KERNEL_DIR="${S}" genkernel ${GKARGS} \ + --no-save-config \ + --no-oldconfig \ + --kernel-config=${T}/config \ + --kernname="${PN}" \ + --build-src="${S}" \ + --build-dst="${WORKDIR}"/build \ + --makeopts="${MAKEOPTS}" \ + --cachedir="${T}"/cache \ + --tempdir="${T}"/twork \ + --logfile="${WORKDIR}"/genkernel.log \ + --bootdir="${WORKDIR}"/out/boot \ + --disklabel \ + $(usex lvm --lvm --no-lvm ) \ + $(usex luks --luks --no-luks ) \ + --mdadm \ + $(usex btrfs --btrfs --no-btrfs) \ + $(usex zfs --zfs --no-zfs) \ + --module-prefix="${WORKDIR}"/out \ + all || die +} + +src_install() { + # copy sources into place: + dodir /usr/src + cp -a "${S}" "${D}"/usr/src/${LINUX_SRCDIR} || die + cd "${D}"/usr/src/${LINUX_SRCDIR} + # prepare for real-world use and 3rd-party module building: + make mrproper || die + cp "${T}"/config .config || die + cp -a "${T}"/debian debian || die + + + # if we didn't use genkernel, we're done. The kernel source tree is left in + # an unconfigured state - you can't compile 3rd-party modules against it yet. + use binary || return + make prepare || die + make scripts || die + # OK, now the source tree is configured to allow 3rd-party modules to be + # built against it, since we want that to work since we have a binary kernel + # built. + cp -a "${WORKDIR}"/out/* "${D}"/ || die "couldn't copy output files into place" + # module symlink fixup: + rm -f "${D}"/lib/modules/*/source || die + rm -f "${D}"/lib/modules/*/build || die + cd "${D}"/lib/modules + local moddir="$(ls -d [234]*)" + ln -s /usr/src/${LINUX_SRCDIR} "${D}"/lib/modules/${moddir}/source || die + ln -s /usr/src/${LINUX_SRCDIR} "${D}"/lib/modules/${moddir}/build || die + # Fixes FL-14 + cp "${WORKDIR}/build/System.map" "${D}/usr/src/${LINUX_SRCDIR}/" || die + cp "${WORKDIR}/build/Module.symvers" "${D}/usr/src/${LINUX_SRCDIR}/" || die + if use sign-modules; then + for x in $(find "${D}"/lib/modules -iname *.ko); do + # $certs_dir defined previously in this function. + ${WORKDIR}/build/scripts/sign-file sha512 $certs_dir/signing_key.pem $certs_dir/signing_key.x509 $x || die + done + # install the sign-file executable for future use. + exeinto /usr/src/${LINUX_SRCDIR}/scripts + doexe ${WORKDIR}/build/scripts/sign-file + fi + + # The new naming scheme leaves an extra -${PN} at the name of various things in /boot. This should fix that. + cd ${D}/boot + for x in $(ls *); do + xnew=${x%-${PN}} + mv $x ${xnew} || die + done + + +} + +pkg_postinst() { + if use binary && [[ -h "${ROOT}"usr/src/linux ]]; then + rm "${ROOT}"usr/src/linux + fi + if use binary && [[ ! -e "${ROOT}"usr/src/linux ]]; then + ewarn "With binary use flag enabled /usr/src/linux" + ewarn "symlink automatically set to debian kernel" + ln -sf ${LINUX_SRCDIR} "${ROOT}"usr/src/linux + fi + + if [ -e ${ROOT}lib/modules ]; then + depmod -a $DEP_PV + fi + if [ -e /etc/boot.conf ]; then + ego boot update + fi +} diff --git a/sys-kernel/debian-sources/debian-sources-5.6.14_p1.ebuild b/sys-kernel/debian-sources/debian-sources-5.6.14_p1.ebuild new file mode 100644 index 000000000000..548298205364 --- /dev/null +++ b/sys-kernel/debian-sources/debian-sources-5.6.14_p1.ebuild @@ -0,0 +1,300 @@ +# Distributed under the terms of the GNU General Public License v2 + +# Documentation for adding new kernels -- do not remove! +# +# Find latest stable kernel release for debian here: +# https://packages.debian.org/unstable/kernel/ + +EAPI=5 + +inherit check-reqs eutils mount-boot + +SLOT=$PF +CKV=${PV} +KV_FULL=${PN}-${PVR} +DEB_PV_BASE="5.6.14" +DEB_EXTRAVERSION="-1" +EXTRAVERSION="_p1" + +# install modules to /lib/modules/${DEB_PV_BASE}${EXTRAVERSION}-$MODULE_EXT +MODULE_EXT=${EXTRAVERSION} +[ "$PR" != "r0" ] && MODULE_EXT=$MODULE_EXT-$PR +MODULE_EXT=$MODULE_EXT-${PN} +# install sources to /usr/src/$LINUX_SRCDIR +LINUX_SRCDIR=linux-${PF} +DEB_PV="$DEB_PV_BASE${DEB_EXTRAVERSION}" +KERNEL_ARCHIVE="linux_${DEB_PV_BASE}.orig.tar.xz" +PATCH_ARCHIVE="linux_${DEB_PV}.debian.tar.xz" +RESTRICT="binchecks strip mirror" +LICENSE="GPL-2" +KEYWORDS="*" +IUSE="binary btrfs custom-cflags ec2 luks lvm sign-modules zfs" +DEPEND=" + virtual/libelf + binary? ( >=sys-kernel/genkernel-3.4.40.7 ) + btrfs? ( sys-fs/btrfs-progs sys-kernel/genkernel[btrfs] ) + zfs? ( sys-fs/zfs ) + luks? ( sys-kernel/genkernel[cryptsetup] )" +REQUIRED_USE=" +btrfs? ( binary ) +custom-cflags? ( binary ) +luks? ( binary ) +lvm? ( binary ) +sign-modules? ( binary ) +zfs? ( binary ) +" +DESCRIPTION="Debian Sources (and optional binary kernel)" +DEB_UPSTREAM="http://http.debian.net/debian/pool/main/l/linux" +HOMEPAGE="https://packages.debian.org/unstable/kernel/" +SRC_URI="$DEB_UPSTREAM/${KERNEL_ARCHIVE} $DEB_UPSTREAM/${PATCH_ARCHIVE}" +S="$WORKDIR/linux-${DEB_PV_BASE}" + +get_patch_list() { + [[ -z "${1}" ]] && die "No patch series file specified" + local patch_series="${1}" + while read line ; do + if [[ "${line:0:1}" != "#" ]] ; then + echo "${line}" + fi + done < "${patch_series}" +} + +tweak_config() { + einfo "Setting $2=$3 in kernel config." + sed -i -e "/^$2=/d" $1 + echo "$2=$3" >> $1 +} + +setno_config() { + einfo "Setting $2*=y to n in kernel config." + sed -i -e "s/^$2\(.*\)=.*/$2\1=n/g" $1 +} + +setyes_config() { + einfo "Setting $2*=* to y in kernel config." + sed -i -e "s/^$2\(.*\)=.*/$2\1=y/g" $1 +} + +zap_config() { + einfo "Removing *$2* from kernel config." + sed -i -e "/$2/d" $1 +} + +pkg_pretend() { + # Ensure we have enough disk space to compile + if use binary ; then + CHECKREQS_DISK_BUILD="5G" + check-reqs_pkg_setup + fi +} + +get_certs_dir() { + # find a certificate dir in /etc/kernel/certs/ that contains signing cert for modules. + for subdir in $PF $P linux; do + certdir=/etc/kernel/certs/$subdir + if [ -d $certdir ]; then + if [ ! -e $certdir/signing_key.pem ]; then + eerror "$certdir exists but missing signing key; exiting." + exit 1 + fi + echo $certdir + return + fi + done +} + +pkg_setup() { + export REAL_ARCH="$ARCH" + unset ARCH; unset LDFLAGS #will interfere with Makefile if set +} + +src_prepare() { + cd "${S}" + for debpatch in $( get_patch_list "${WORKDIR}/debian/patches/series" ); do + epatch -p1 "${WORKDIR}/debian/patches/${debpatch}" + done + # end of debian-specific stuff... + + # do not include debian devs certificates + rm -rf "${WORKDIR}"/debian/certs + + sed -i -e "s:^\(EXTRAVERSION =\).*:\1 ${MODULE_EXT}:" Makefile || die + sed -i -e 's:#export\tINSTALL_PATH:export\tINSTALL_PATH:' Makefile || die + rm -f .config >/dev/null + cp -a "${WORKDIR}"/debian "${T}" + make -s mrproper || die "make mrproper failed" + #make -s include/linux/version.h || die "make include/linux/version.h failed" + cd "${S}" + cp -aR "${WORKDIR}"/debian "${S}"/debian + + ## XFS LIBCRC kernel config fixes, FL-823 + epatch "${FILESDIR}"/${DEB_PV_BASE}/${PN}-${DEB_PV_BASE}-xfs-libcrc32c-fix.patch + + ## FL-4424: enable legacy support for MCELOG. + epatch "${FILESDIR}"/${DEB_PV_BASE}/${PN}-${DEB_PV_BASE}-mcelog.patch + + ## do not configure debian devs certs. + epatch "${FILESDIR}"/${DEB_PV_BASE}/${PN}-${DEB_PV_BASE}-nocerts.patch + + ## FL-3381. enable IKCONFIG + epatch "${FILESDIR}"/${DEB_PV_BASE}/${PN}-${DEB_PV_BASE}-ikconfig.patch + + ## increase bluetooth polling patch + epatch "${FILESDIR}"/${DEB_PV_BASE}/${PN}-${DEB_PV_BASE}-fix-bluetooth-polling.patch + epatch "${FILESDIR}"/${DEB_PV_BASE}/export_kernel_fpu_functions_5_3.patch + local arch featureset subarch + featureset="standard" + if [[ ${REAL_ARCH} == x86 ]]; then + arch="i386" + subarch="686-pae" + elif [[ ${REAL_ARCH} == amd64 ]]; then + arch="amd64" + subarch="amd64" + else + die "Architecture not handled in ebuild" + fi + cp "${FILESDIR}"/config-extract . || die + chmod +x config-extract || die + ./config-extract ${arch} ${featureset} ${subarch} || die + setno_config .config CONFIG_DEBUG + if use ec2; then + setyes_config .config CONFIG_BLK_DEV_NVME + setyes_config .config CONFIG_XEN_BLKDEV_FRONTEND + setyes_config .config CONFIG_XEN_BLKDEV_BACKEND + setyes_config .config CONFIG_IXGBEVF + fi + if use sign-modules; then + certs_dir=$(get_certs_dir) + echo + if [ -z "$certs_dir" ]; then + eerror "No certs dir found in /etc/kernel/certs; aborting." + die + else + einfo "Using certificate directory of $certs_dir for kernel module signing." + fi + echo + # turn on options for signing modules. + # first, remove existing configs and comments: + zap_config .config CONFIG_MODULE_SIG + # now add our settings: + tweak_config .config CONFIG_MODULE_SIG y + tweak_config .config CONFIG_MODULE_SIG_FORCE n + tweak_config .config CONFIG_MODULE_SIG_ALL n + tweak_config .config CONFIG_MODULE_SIG_HASH \"sha512\" + tweak_config .config CONFIG_MODULE_SIG_KEY \"${certs_dir}/signing_key.pem\" + tweak_config .config CONFIG_SYSTEM_TRUSTED_KEYRING y + tweak_config .config CONFIG_SYSTEM_EXTRA_CERTIFICATE y + tweak_config .config CONFIG_SYSTEM_EXTRA_CERTIFICATE_SIZE 4096 + echo "CONFIG_MODULE_SIG_SHA512=y" >> .config + ewarn "This kernel will ALLOW non-signed modules to be loaded with a WARNING." + ewarn "To enable strict enforcement, YOU MUST add module.sig_enforce=1 as a kernel boot" + ewarn "parameter (to params in /etc/boot.conf, and re-run boot-update.)" + echo + fi + if use custom-cflags; then + MARCH="$(python -c "import portage; print(portage.settings[\"CFLAGS\"])" | sed 's/ /\n/g' | grep "march")" + if [ -n "$MARCH" ]; then + sed -i -e 's/-mtune=generic/$MARCH/g' arch/x86/Makefile || die "Canna optimize this kernel anymore, captain!" + fi + fi + # get config into good state: + yes "" | make oldconfig >/dev/null 2>&1 || die + cp .config "${T}"/config || die + make -s mrproper || die "make mrproper failed" +} + +src_compile() { + ! use binary && return + install -d "${WORKDIR}"/out/{lib,boot} + install -d "${T}"/{cache,twork} + install -d "${WORKDIR}"/build + cp "${T}"/config "${WORKDIR}"/build/.config + DEFAULT_KERNEL_SOURCE="${S}" CMD_KERNEL_DIR="${S}" genkernel ${GKARGS} \ + --no-save-config \ + --no-oldconfig \ + --kernel-config=${T}/config \ + --kernname="${PN}" \ + --build-src="${S}" \ + --build-dst="${WORKDIR}"/build \ + --makeopts="${MAKEOPTS}" \ + --cachedir="${T}"/cache \ + --tempdir="${T}"/twork \ + --logfile="${WORKDIR}"/genkernel.log \ + --bootdir="${WORKDIR}"/out/boot \ + --disklabel \ + $(usex lvm --lvm --no-lvm ) \ + $(usex luks --luks --no-luks ) \ + --mdadm \ + $(usex btrfs --btrfs --no-btrfs) \ + $(usex zfs --zfs --no-zfs) \ + --module-prefix="${WORKDIR}"/out \ + all || die +} + +src_install() { + # copy sources into place: + dodir /usr/src + cp -a "${S}" "${D}"/usr/src/${LINUX_SRCDIR} || die + cd "${D}"/usr/src/${LINUX_SRCDIR} + # prepare for real-world use and 3rd-party module building: + make mrproper || die + cp "${T}"/config .config || die + cp -a "${T}"/debian debian || die + + + # if we didn't use genkernel, we're done. The kernel source tree is left in + # an unconfigured state - you can't compile 3rd-party modules against it yet. + use binary || return + make prepare || die + make scripts || die + # OK, now the source tree is configured to allow 3rd-party modules to be + # built against it, since we want that to work since we have a binary kernel + # built. + cp -a "${WORKDIR}"/out/* "${D}"/ || die "couldn't copy output files into place" + # module symlink fixup: + rm -f "${D}"/lib/modules/*/source || die + rm -f "${D}"/lib/modules/*/build || die + cd "${D}"/lib/modules + local moddir="$(ls -d [234]*)" + ln -s /usr/src/${LINUX_SRCDIR} "${D}"/lib/modules/${moddir}/source || die + ln -s /usr/src/${LINUX_SRCDIR} "${D}"/lib/modules/${moddir}/build || die + # Fixes FL-14 + cp "${WORKDIR}/build/System.map" "${D}/usr/src/${LINUX_SRCDIR}/" || die + cp "${WORKDIR}/build/Module.symvers" "${D}/usr/src/${LINUX_SRCDIR}/" || die + if use sign-modules; then + for x in $(find "${D}"/lib/modules -iname *.ko); do + # $certs_dir defined previously in this function. + ${WORKDIR}/build/scripts/sign-file sha512 $certs_dir/signing_key.pem $certs_dir/signing_key.x509 $x || die + done + # install the sign-file executable for future use. + exeinto /usr/src/${LINUX_SRCDIR}/scripts + doexe ${WORKDIR}/build/scripts/sign-file + fi + + # The new naming scheme leaves an extra -${PN} at the name of various things in /boot. This should fix that. + cd ${D}/boot + for x in $(ls *); do + xnew=${x%-${PN}} + mv $x ${xnew} || die + done + + +} + +pkg_postinst() { + if use binary && [[ -h "${ROOT}"usr/src/linux ]]; then + rm "${ROOT}"usr/src/linux + fi + if use binary && [[ ! -e "${ROOT}"usr/src/linux ]]; then + ewarn "With binary use flag enabled /usr/src/linux" + ewarn "symlink automatically set to debian kernel" + ln -sf ${LINUX_SRCDIR} "${ROOT}"usr/src/linux + fi + + if [ -e ${ROOT}lib/modules ]; then + depmod -a $DEP_PV + fi + if [ -e /etc/boot.conf ]; then + ego boot update + fi +} diff --git a/sys-kernel/debian-sources/debian-sources-5.6.14_p2.ebuild b/sys-kernel/debian-sources/debian-sources-5.6.14_p2.ebuild deleted file mode 100644 index 12c57408cbcf..000000000000 --- a/sys-kernel/debian-sources/debian-sources-5.6.14_p2.ebuild +++ /dev/null @@ -1,588 +0,0 @@ -# Distributed under the terms of the GNU General Public License v2 - -# Documentation for adding new kernels -- do not remove! -# -# Find latest stable kernel release for debian here: -# https://packages.debian.org/unstable/kernel/ - -EAPI=7 - -inherit check-reqs eutils mount-boot toolchain-funcs - -SLOT=$PF -CKV=${PV} -KV_FULL=${PN}-${PVR} -DEB_PV_BASE="5.6.14" -DEB_EXTRAVERSION="-2~bpo10+1" -EXTRAVERSION="_p2" - -# install modules to /lib/modules/${DEB_PV_BASE}${EXTRAVERSION}-$MODULE_EXT -MODULE_EXT=${EXTRAVERSION} -[ "$PR" != "r0" ] && MODULE_EXT=$MODULE_EXT-$PR -MODULE_EXT=$MODULE_EXT-${PN} - -DEB_PV="${DEB_PV_BASE}${DEB_EXTRAVERSION}" -KERNEL_ARCHIVE="linux_${DEB_PV_BASE}.orig.tar.xz" -PATCH_ARCHIVE="linux_${DEB_PV}.debian.tar.xz" -DEB_UPSTREAM="http://http.debian.net/debian/pool/main/l/linux" - -SRC_URI=" - $DEB_UPSTREAM/${KERNEL_ARCHIVE} - $DEB_UPSTREAM/${PATCH_ARCHIVE} -" - -S="$WORKDIR/linux-${DEB_PV_BASE}" - -DESCRIPTION="Linux kernel sources with Debian patches." -HOMEPAGE="https://packages.debian.org/unstable/kernel/" - -RESTRICT="binchecks strip mirror" -LICENSE="GPL-2" -KEYWORDS="*" - -IUSE="binary btrfs clang custom-cflags dmraid dtrace ec2 firmware hardened iscsi libressl luks lvm mdadm microcode multipath nbd nfs plymouth selinux sign-modules systemd wireguard zfs" - -BDEPEND=" - sys-devel/bc - virtual/libelf -" - -DEPEND=" - binary? ( sys-kernel/dracut ) - btrfs? ( sys-fs/btrfs-progs ) - firmware? ( - sys-kernel/linux-firmware - ) - luks? ( sys-fs/cryptsetup ) - lvm? ( sys-fs/lvm2 ) - mdadm? ( sys-fs/mdadm ) - plymouth? ( - x11-libs/libdrm[libkms] - sys-boot/plymouth[libkms,udev] - ) - sign-modules? ( - || ( dev-libs/openssl ) ( dev-libs/libressl ) - sys-apps/kmod - ) - systemd? ( sys-apps/systemd ) - wireguard? ( virtual/wireguard ) - zfs? ( sys-fs/zfs ) -" - -REQUIRED_USE=" - btrfs? ( binary ) - custom-cflags? ( binary ) - ec2? ( binary ) - libressl? ( binary ) - luks? ( binary ) - lvm? ( binary ) - mdadm? ( binary ) - microcode? ( binary ) - plymouth? ( binary ) - selinux? ( binary ) - sign-modules? ( binary ) - systemd? ( binary ) - wireguard? ( binary ) - zfs? ( binary ) -" - -# temporary ;( -DTRACE_PATCHES_DIR="${FILESDIR}/${DEB_PV_BASE}/dtrace-patches/" -DTRACE_PATCHES=( - 0001-ctf-generate-CTF-information-for-the-kernel.patch - 0002-kallsyms-introduce-new-proc-kallmodsyms-including-bu.patch - 0003-waitfd-new-syscall-implementing-waitpid-over-fds.patch - 0004-dtrace-core-and-x86.patch - 0005-dtrace-modular-components-and-x86-support.patch - 0006-dtrace-systrace-provider-core-components.patch - 0007-dtrace-systrace-provider.patch - 0008-dtrace-sdt-provider-core-components.patch - 0009-dtrace-sdt-provider-for-x86.patch - 0010-dtrace-profile-provider-and-test-probe-core-componen.patch - 0011-dtrace-profile-and-tick-providers-built-on-cyclics.patch - 0012-dtrace-USDT-and-pid-provider-core-and-x86-components.patch - 0013-dtrace-USDT-and-pid-providers.patch - 0014-dtrace-function-boundary-tracing-FBT-core-and-x86-co.patch - 0015-dtrace-fbt-provider-modular-components.patch - 0016-dtrace-arm-arm64-port.patch - 0017-dtrace-add-SDT-probes.patch - 0018-dtrace-add-rcu_irq_exit-and-rcu_nmi_exit_common-to-F.patch - 0019-dtrace-add-sample-script-for-building-DTrace-on-Fedo.patch - 0020-locking-publicize-mutex_owner-and-mutex_owned-again.patch -) - -eapply_dtrace() { - eapply "${DTRACE_PATCHES_DIR}/${1}" -} - -get_patch_list() { - [[ -z "${1}" ]] && die "No patch series file specified" - local patch_series="${1}" - while read line ; do - if [[ "${line:0:1}" != "#" ]] ; then - echo "${line}" - fi - done < "${patch_series}" -} - -tweak_config() { - einfo "Setting $2=$3 in kernel config." - sed -i -e "/^$2=/d" $1 -} - -set_no_config() { - einfo "Setting $2*=y to n in kernel config." - sed -i -e "s/^$2\(.*\)=.*/$2\1=n/g" $1 -} - -set_yes_config() { - einfo "Setting $2*=* to y in kernel config." - sed -i -e "s/^$2\(.*\)=.*/$2\1=y/g" $1 -} - -set_module_config() { - einfo "Setting $2*=* to m in kernel config." - sed -i -e "s/^$2\(.*\)=.*/$2\1=y/g" $1 -} - -zap_config() { - einfo "Removing *$2* from kernel config." - sed -i -e "/$2/d" $1 -} - -get_certs_dir() { - # find a certificate dir in /etc/kernel/certs/ that contains signing cert for modules. - for subdir in $PF $P linux; do - certdir=/etc/kernel/certs/$subdir - if [ -d $certdir ]; then - if [ ! -e $certdir/signing_key.pem ]; then - eerror "$certdir exists but missing signing key; exiting." - exit 1 - fi - echo $certdir - return - fi - done -} - -pkg_pretend() { - # Ensure we have enough disk space to compile - if use binary ; then - CHECKREQS_DISK_BUILD="5G" - check-reqs_pkg_setup - fi -} - -pkg_setup() { - export REAL_ARCH="$ARCH" - unset ARCH; unset LDFLAGS #will interfere with Makefile if set -} - -src_prepare() { - - debug-print-function ${FUNCNAME} "${@}" - - # apply debian patches - cd "${S}" - for debpatch in $( get_patch_list "${WORKDIR}/debian/patches/series" ); do - eapply -p1 "${WORKDIR}/debian/patches/${debpatch}" - done - # end of debian-specific stuff... - - # do not include debian devs certificates - rm -rf "${WORKDIR}"/debian/certs - - sed -i -e "s:^\(EXTRAVERSION =\).*:\1 ${MODULE_EXT}:" Makefile || die - sed -i -e 's:#export\tINSTALL_PATH:export\tINSTALL_PATH:' Makefile || die - rm -f .config >/dev/null - cp -a "${WORKDIR}"/debian "${T}" - make -s mrproper || die "make mrproper failed" - #make -s include/linux/version.h || die "make include/linux/version.h failed" - cd "${S}" - cp -aR "${WORKDIR}"/debian "${S}"/debian - - ## increase bluetooth polling patch - eapply "${FILESDIR}"/${DEB_PV_BASE}/${PN}-${DEB_PV_BASE}-fix-bluetooth-polling.patch - - # Restore export_kernel_fpu_functions for zfs - eapply "${FILESDIR}"/${DEB_PV_BASE}/export_kernel_fpu_functions_5_3.patch - - - if use dtrace; then - # Dtrace patches - einfo "Applying DTrace patches ..." - for my_patch in ${DTRACE_PATCHES[*]} ; do - eapply_dtrace "${my_patch}" - done - fi - - local arch featureset subarch - featureset="standard" - if [[ ${REAL_ARCH} == x86 ]]; then - arch="i386" - subarch="686-pae" - elif [[ ${REAL_ARCH} == amd64 ]]; then - arch="amd64" - subarch="amd64" - elif [[ ${REAL_ARCH} == arm64 ]]; then - arch="arm64" - subarch="arm64" - else - die "Architecture not handled in ebuild" - fi - cp "${FILESDIR}"/config-extract . || die - chmod +x config-extract || die - ./config-extract ${arch} ${featureset} ${subarch} || die - - ### TWEAK KERNEL CONFIG ### - - ## FL-3381 Enable IKCONFIG so that /proc/config.gz can be used for various checks - ## TODO: Maybe not a good idea for USE=hardened, look into this. - tweak_config .config CONFIG_IKCONFIG y - tweak_config .config CONFIG_IKCONFIG_PROC y - - ## FL-4424 Enable legacy support for MCELOG - ## TODO: See if this is still required? if not, can it be shit canned? - tweak_config .config CONFIG_X86_MCELOG_LEGACY y - - ## FL-823 Build XFS into kernel - ## TODO: can most likely be shit canned as no longer using genkernel, + Dracut includes all kernel moduels in initrd. - tweak_config .config CONFIG_XFS_FS y - tweak_config .config CONFIG_LIBCRC32C y - - ## Do not configure Debian devs certificates - tweak_config .config CONFIG_SYSTEM_TRUSTED_KEYS - - set_no_config .config CONFIG_DEBUG - if use custom-cflags; then - MARCH="$(python -c "import portage; print(portage.settings[\"CFLAGS\"])" | sed 's/ /\n/g' | grep "march")" - if [ -n "$MARCH" ]; then - sed -i -e 's/-mtune=generic/$MARCH/g' arch/x86/Makefile || die "Canna optimize this kernel anymore, captain!" - fi - fi - if use ec2; then - tweak_config .config CONFIG_BLK_DEV_NVME y - tweak_config .config CONFIG_XEN_BLKDEV_FRONTEND y - tweak_config .config CONFIG_XEN_BLKDEV_BACKEND y - tweak_config .config CONFIG_IXGBEVF y - fi - if use hardened; then - tweak_config .config CONFIG_AUDIT y - tweak_config .config CONFIG_EXPERT y - tweak_config .config CONFIG_SLUB_DEBUG y - tweak_config .config CONFIG_SLAB_MERGE_DEFAULT n - tweak_config .config CONFIG_SLAB_FREELIST_RANDOM y - tweak_config .config CONFIG_SLAB_FREELIST_HARDENED y - tweak_config .config CONFIG_SLAB_CANARY y - tweak_config .config CONFIG_SHUFFLE_PAGE_ALLOCATOR y - tweak_config .config CONFIG_RANDOMIZE_BASE y - tweak_config .config CONFIG_RANDOMIZE_MEMORY y - tweak_config .config CONFIG_HIBERNATION n - tweak_config .config CONFIG_HARDENED_USERCOPY y - tweak_config .config CONFIG_HARDENED_USERCOPY_FALLBACK n - tweak_config .config CONFIG_FORTIFY_SOURCE y - tweak_config .config CONFIG_STACKPROTECTOR y - tweak_config .config CONFIG_STACKPROTECTOR_STRONG y - tweak_config .config CONFIG_ARCH_MMAP_RND_BITS 32 - tweak_config .config CONFIG_ARCH_MMAP_RND_COMPAT_BITS 16 - tweak_config .config CONFIG_INIT_ON_FREE_DEFAULT_ON y - tweak_config .config CONFIG_INIT_ON_ALLOC_DEFAULT_ON y - tweak_config .config CONFIG_SLAB_SANITIZE_VERIFY y - tweak_config .config CONFIG_PAGE_SANITIZE_VERIFY y - - ! if use clang; then - tweak_config .config CONFIG_GCC_PLUGINS y - tweak_config .config CONFIG_GCC_PLUGIN_LATENT_ENTROPY y - tweak_config .config CONFIG_GCC_PLUGIN_STRUCTLEAK y - tweak_config .config CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL y - tweak_config .config CONFIG_GCC_PLUGIN_RANDSTRUCT y - tweak_config .config CONFIG_GCC_PLUGIN_RANDSTRUCT_PERFORMANCE n - tweak_config .config CONFIG_GCC_PLUGIN_STACKLEAK y - tweak_config .config CONFIG_STACKLEAK_TRACK_MIN_SIZE 100 - tweak_config .config CONFIG_STACKLEAK_METRICS n - tweak_config .config CONFIG_STACKLEAK_RUNTIME_DISABLE n - fi - fi - if use sign-modules; then - certs_dir=$(get_certs_dir) - echo - if [ -z "$certs_dir" ]; then - eerror "No certs dir found in /etc/kernel/certs; aborting." - die - else - einfo "Using certificate directory of $certs_dir for kernel module signing." - fi - echo - # turn on options for signing modules. - # first, remove existing configs and comments: - zap_config .config CONFIG_MODULE_SIG - # now add our settings: - tweak_config .config CONFIG_MODULE_SIG y - tweak_config .config CONFIG_MODULE_SIG_FORCE n - tweak_config .config CONFIG_MODULE_SIG_ALL n - # LibreSSL currently (2.9.0) does not have CMS support, so is limited to SHA1. - # https://bugs.gentoo.org/706086 - # https://bugzilla.kernel.org/show_bug.cgi?id=202159 - if use libressl; then - tweak_config .config CONFIG_MODULE_SIG_HASH \"sha1\" - else - tweak_config .config CONFIG_MODULE_SIG_HASH \"sha512\" - fi - tweak_config .config CONFIG_MODULE_SIG_KEY \"${certs_dir}/signing_key.pem\" - tweak_config .config CONFIG_SYSTEM_TRUSTED_KEYRING y - tweak_config .config CONFIG_SYSTEM_EXTRA_CERTIFICATE y - tweak_config .config CONFIG_SYSTEM_EXTRA_CERTIFICATE_SIZE 4096 - # See above comment re: LibreSSL - if use libressl; then - echo "CONFIG_MODULE_SIG_SHA1=y" >> .config - else - echo "CONFIG_MODULE_SIG_SHA512=y" >> .config - fi - ewarn "This kernel will ALLOW non-signed modules to be loaded with a WARNING." - ewarn "To enable strict enforcement, YOU MUST add module.sig_enforce=1 as a kernel boot" - ewarn "parameter (to params in /etc/boot.conf, and re-run boot-update.)" - echo - fi - if use wireguard; then - tweak_config .config CONFIG_NET y - tweak_config .config CONFIG_INET y - tweak_config .config CONFIG_INET_UDP_TUNNEL y - tweak_config .config CONFIG_NF_CONNTRACK y - tweak_config .config CONFIG_NETFILTER_XT_MATCH_HASHLIMIT y - tweak_config .config CONFIG_IP6_NF_IPTABLES y - tweak_config .config CONFIG_CRYPTO_BLKCIPHER y - tweak_config .config CONFIG_PADATA y - fi - # get config into good state: - yes "" | make oldconfig >/dev/null 2>&1 || die - cp .config "${T}"/.config || die - make -s mrproper || die "make mrproper failed" - - # Apply any user patches - eapply_user -} - -src_configure() { - - ! use binary && return - - debug-print-function ${FUNCNAME} "${@}" - - tc-export_build_env - MAKEARGS=( - V=1 - - HOSTCC="$(tc-getBUILD_CC)" - HOSTCXX="$(tc-getBUILD_CXX)" - HOSTCFLAGS="${BUILD_CFLAGS}" - HOSTLDFLAGS="${BUILD_LDFLAGS}" - - CROSS_COMPILE=${CHOST}- - AS="$(tc-getAS)" - CC="$(tc-getCC)" - LD="$(tc-getLD)" - AR="$(tc-getAR)" - NM="$(tc-getNM)" - STRIP=":" - OBJCOPY="$(tc-getOBJCOPY)" - OBJDUMP="$(tc-getOBJDUMP)" - - # we need to pass it to override colliding Gentoo envvar - ARCH=$(tc-arch-kernel) - ) - - mkdir -p "${WORKDIR}"/modprep || die - cp "${T}"/.config "${WORKDIR}"/modprep/ || die - emake O="${WORKDIR}"/modprep "${MAKEARGS[@]}" olddefconfig || die "kernel configure failed" - emake O="${WORKDIR}"/modprep "${MAKEARGS[@]}" modules_prepare || die "modules_prepare failed" - cp -pR "${WORKDIR}"/modprep "${WORKDIR}"/build || die -} - -src_compile() { - - ! use binary && return - - debug-print-function ${FUNCNAME} "${@}" - - emake O="${WORKDIR}"/build "${MAKEARGS[@]}" all || "kernel build failed" -} - -src_install() { - - debug-print-function ${FUNCNAME} "${@}" - - # TODO: Change to SANDBOX_WRITE=".." for installkernel writes - # Disable sandbox - export SANDBOX_ON=0 - - # copy sources into place: - dodir /usr/src - cp -a "${S}" "${D}"/usr/src/linux-${PN}-${PV} || die - cd "${D}"/usr/src/linux-${PN}-${PV} - - # prepare for real-world use and 3rd-party module building: - make mrproper || die - cp "${T}"/.config .config || die - cp -a "${T}"/debian debian || die - - # if we didn't use genkernel, we're done. The kernel source tree is left in - # an unconfigured state - you can't compile 3rd-party modules against it yet. - use binary || return - make prepare || die - make scripts || die - - local targets=( modules_install ) - - # ARM / ARM64 requires dtb - if (use arm || use arm64); then - targets+=( dtbs_install ) - fi - - emake O="${WORKDIR}"/build "${MAKEARGS[@]}" INSTALL_MOD_PATH="${ED}" INSTALL_PATH="${ED}/boot" "${targets[@]}" - installkernel "${PN}-${PV}" "${WORKDIR}/build/arch/x86_64/boot/bzImage" "${WORKDIR}/build/System.map" "${EROOT}/boot" - - # module symlink fix-up: - rm -f "${D}"/lib/modules/${PV}-${PN}/source || die - rm -f "${D}"/lib/modules/${PV}-${PN}/build || die - - # Set-up module symlinks: - ln -s /usr/src/linux-${PN}-${PV} "${D}"/lib/modules/${PV}-${PN}/source || die "failed to install source symlink" - ln -s /usr/src/linux-${PN}-${PV} "${D}"/lib/modules/${PV}-${PN}/build || die "failed to install build symlink" - - # Fixes FL-14 - cp "${WORKDIR}/build/System.map" "${D}"/usr/src/linux-${PN}-${PV}/ || die "failed to install System.map" - cp "${WORKDIR}/build/Module.symvers" "${D}"/usr/src/linux-${PN}-${PV}/ || die "failed to install Module.symvers" - - if use sign-modules; then - for x in $(find "${D}"/lib/modules -iname *.ko); do - # $certs_dir defined previously in this function. - ${WORKDIR}/build/scripts/sign-file sha512 $certs_dir/signing_key.pem $certs_dir/signing_key.x509 $x || die - done - # install the sign-file executable for future use. - exeinto /usr/src/linux-${PN}-${PV}/scripts - doexe ${WORKDIR}/build/scripts/sign-file - fi -} - -pkg_postinst() { - - # TODO: Change to SANDBOX_WRITE=".." for Dracut writes - export SANDBOX_ON=0 - - if use binary && [[ -h "${ROOT}"/usr/src/linux ]]; then - rm "${ROOT}"usr/src/linux - fi - - if use binary && [[ ! -e "${ROOT}"/usr/src/linux ]]; then - ewarn "WARNING... WARNING... WARNING" - ewarn "" - ewarn "/usr/src/linux symlink automatically set to ${PN}-${PV}" - ewarn "" - ln -sf "${ROOT}"/usr/src/linux-${PN}-${PV} "${ROOT}"/usr/src/linux - fi - - if [ -e ${ROOT}lib/modules ]; then - depmod -a ${PV}-${PN} - fi - - # NOTE: WIP and not well tested yet. - # - # Dracut will build an initramfs when USE=binary. - # The initramfs will be configurable via USE, i.e. - # USE=zfs will pass '--zfs' to Dracut and USE=-systemd - # will pass '--omit dracut-systemd systemd systemd-networkd systemd-initrd' - # to exclude these (Dracut) modules from the initramfs. - if use binary; then - einfo ">>> Dracut: building initramfs" - dracut \ - --stdlog=1 \ - --force \ - --no-hostonly \ - --add "base dm fs-lib i18n kernel-modules network rootfs-block shutdown terminfo udev-rules usrmount" \ - --omit "biosdevname bootchart busybox caps convertfs dash debug dmsquash-live dmsquash-live-ntfs fcoe fcoe-uefi fstab-sys gensplash ifcfg img-lib livenet mksh network-manager qemu qemu-net rpmversion securityfs ssh-client stratis syslog url-lib" \ - $(usex btrfs "-a btrfs" "-o btrfs") \ - $(usex dmraid "-a dmraid" "-o dmraid") \ - $(usex hardened "-o resume" "-a resume") \ - $(usex iscsi "-a iscsi" "-o iscsi") \ - $(usex lvm "-a lvm" "-o lvm") \ - $(usex lvm "--lvmconf" "--nolvmconf") \ - $(usex luks "-a crypt" "-o crypt") \ - $(usex mdadm "--mdadmconf" "--nomdadmconf") \ - $(usex mdadm "-a mdraid" "-o mdraid") \ - $(usex microcode "--early-microcode" "--no-early-microcode") \ - $(usex multipath "-a multipath" "-o multipath") \ - $(usex nbd "-a nbd" "-o nbd") \ - $(usex nfs "-a nfs" "-o nfs") \ - $(usex plymouth "-a plymouth" "-o plymouth") \ - $(usex selinux "-a selinux" "-o selinux") \ - $(usex systemd "-a systemd systemd-initrd systemd-networkd" "-o systemd systemd-initrd systemd-networkd") \ - $(usex zfs "-a zfs" "-o zfs") \ - --kver "${PV}-${PN}" \ - --kmoddir "${ROOT}"lib/modules/${PV}-${PN} \ - --fwdir "${ROOT}"lib/firmware \ - --kernel-image "${ROOT}"boot/kernel-${PV}-${PN} - einfo ">>> Dracut: Finished building initramfs" - ewarn "Dracut initramfs has been generated!" - ewarn "" - ewarn "Required kernel arguments:" - ewarn "" - ewarn " root=/dev/ROOT" - ewarn "" - ewarn " Where ROOT is the device node for your root partition as the" - ewarn " one specified in /etc/fstab" - ewarn "" - ewarn "Additional kernel cmdline arguments that *may* be required to boot properly..." - ewarn "" - ewarn "If you use hibernation:" - ewarn "" - ewarn " resume=/dev/SWAP" - ewarn "" - ewarn " Where $SWAP is the swap device used by hibernate software of your choice." - ewarn"" - ewarn " Please consult "man 7 dracut.kernel" for additional kernel arguments." - fi - - if use hardened; then - ewarn "WARNING... WARNING... WARNING..." - ewarn "" - ewarn "Hardened patches have been applied to the kernel and KCONFIG options have been set." - ewarn "These KCONFIG options and patches change kernel behavior." - ewarn "Changes include:" - ewarn "Increased entropy for Address Space Layout Randomization" - ewarn "GCC plugins (if using GCC)" - ewarn "Memory allocation" - ewarn "... and more" - ewarn "" - ewarn "These changes will stop certain programs from functioning" - ewarn "e.g. VirtualBox, Skype" - ewarn "Full information available in $DOCUMENTATION" - ewarn "" - fi - - if use wireguard && [[ ${PV} < "5.6.0" ]]; then - ewarn "WARNING... WARNING... WARNING..." - ewarn "" - ewarn "WireGuard with Linux ${PV} is supported as an external kernel module" - ewarn "You are required to add WireGuard to /etc/conf.d/modules and" - ewarn "add the 'modules' service to the boot runlevel." - ewarn "" - ewarn "e.g rc-update add modules boot" - ewarn "" - fi - - # TODO: tidy up below - if use binary && [[ -e "${ROOT}"var/lib/module-rebuild/moduledb ]]; then - ewarn "WARNING... WARNING... WARNING..." - ewarn "" - ewarn "External kernel modules are not yet automatically built" - ewarn "by USE=binary - emerge @modules-rebuild to do this" - ewarn "and regenerate your initramfs if you are using ZFS root filesystem" - ewarn "" - fi - - if use binary; then - if [ -e /etc/boot.conf ]; then - ego boot update - fi - fi -} diff --git a/sys-kernel/debian-sources/debian-sources-5.6.7_p1.ebuild b/sys-kernel/debian-sources/debian-sources-5.6.7_p1.ebuild new file mode 100644 index 000000000000..9643769a7605 --- /dev/null +++ b/sys-kernel/debian-sources/debian-sources-5.6.7_p1.ebuild @@ -0,0 +1,300 @@ +# Distributed under the terms of the GNU General Public License v2 + +# Documentation for adding new kernels -- do not remove! +# +# Find latest stable kernel release for debian here: +# https://packages.debian.org/unstable/kernel/ + +EAPI=5 + +inherit check-reqs eutils mount-boot + +SLOT=$PF +CKV=${PV} +KV_FULL=${PN}-${PVR} +DEB_PV_BASE="5.6.7" +DEB_EXTRAVERSION="-1" +EXTRAVERSION="_p1" + +# install modules to /lib/modules/${DEB_PV_BASE}${EXTRAVERSION}-$MODULE_EXT +MODULE_EXT=${EXTRAVERSION} +[ "$PR" != "r0" ] && MODULE_EXT=$MODULE_EXT-$PR +MODULE_EXT=$MODULE_EXT-${PN} +# install sources to /usr/src/$LINUX_SRCDIR +LINUX_SRCDIR=linux-${PF} +DEB_PV="$DEB_PV_BASE${DEB_EXTRAVERSION}" +KERNEL_ARCHIVE="linux_${DEB_PV_BASE}.orig.tar.xz" +PATCH_ARCHIVE="linux_${DEB_PV}.debian.tar.xz" +RESTRICT="binchecks strip mirror" +LICENSE="GPL-2" +KEYWORDS="*" +IUSE="binary btrfs custom-cflags ec2 luks lvm sign-modules zfs" +DEPEND=" + virtual/libelf + binary? ( >=sys-kernel/genkernel-3.4.40.7 ) + btrfs? ( sys-fs/btrfs-progs sys-kernel/genkernel[btrfs] ) + zfs? ( sys-fs/zfs ) + luks? ( sys-kernel/genkernel[cryptsetup] )" +REQUIRED_USE=" +btrfs? ( binary ) +custom-cflags? ( binary ) +luks? ( binary ) +lvm? ( binary ) +sign-modules? ( binary ) +zfs? ( binary ) +" +DESCRIPTION="Debian Sources (and optional binary kernel)" +DEB_UPSTREAM="http://http.debian.net/debian/pool/main/l/linux" +HOMEPAGE="https://packages.debian.org/unstable/kernel/" +SRC_URI="$DEB_UPSTREAM/${KERNEL_ARCHIVE} $DEB_UPSTREAM/${PATCH_ARCHIVE}" +S="$WORKDIR/linux-${DEB_PV_BASE}" + +get_patch_list() { + [[ -z "${1}" ]] && die "No patch series file specified" + local patch_series="${1}" + while read line ; do + if [[ "${line:0:1}" != "#" ]] ; then + echo "${line}" + fi + done < "${patch_series}" +} + +tweak_config() { + einfo "Setting $2=$3 in kernel config." + sed -i -e "/^$2=/d" $1 + echo "$2=$3" >> $1 +} + +setno_config() { + einfo "Setting $2*=y to n in kernel config." + sed -i -e "s/^$2\(.*\)=.*/$2\1=n/g" $1 +} + +setyes_config() { + einfo "Setting $2*=* to y in kernel config." + sed -i -e "s/^$2\(.*\)=.*/$2\1=y/g" $1 +} + +zap_config() { + einfo "Removing *$2* from kernel config." + sed -i -e "/$2/d" $1 +} + +pkg_pretend() { + # Ensure we have enough disk space to compile + if use binary ; then + CHECKREQS_DISK_BUILD="5G" + check-reqs_pkg_setup + fi +} + +get_certs_dir() { + # find a certificate dir in /etc/kernel/certs/ that contains signing cert for modules. + for subdir in $PF $P linux; do + certdir=/etc/kernel/certs/$subdir + if [ -d $certdir ]; then + if [ ! -e $certdir/signing_key.pem ]; then + eerror "$certdir exists but missing signing key; exiting." + exit 1 + fi + echo $certdir + return + fi + done +} + +pkg_setup() { + export REAL_ARCH="$ARCH" + unset ARCH; unset LDFLAGS #will interfere with Makefile if set +} + +src_prepare() { + cd "${S}" + for debpatch in $( get_patch_list "${WORKDIR}/debian/patches/series" ); do + epatch -p1 "${WORKDIR}/debian/patches/${debpatch}" + done + # end of debian-specific stuff... + + # do not include debian devs certificates + rm -rf "${WORKDIR}"/debian/certs + + sed -i -e "s:^\(EXTRAVERSION =\).*:\1 ${MODULE_EXT}:" Makefile || die + sed -i -e 's:#export\tINSTALL_PATH:export\tINSTALL_PATH:' Makefile || die + rm -f .config >/dev/null + cp -a "${WORKDIR}"/debian "${T}" + make -s mrproper || die "make mrproper failed" + #make -s include/linux/version.h || die "make include/linux/version.h failed" + cd "${S}" + cp -aR "${WORKDIR}"/debian "${S}"/debian + + ## XFS LIBCRC kernel config fixes, FL-823 + epatch "${FILESDIR}"/${DEB_PV_BASE}/${PN}-${DEB_PV_BASE}-xfs-libcrc32c-fix.patch + + ## FL-4424: enable legacy support for MCELOG. + epatch "${FILESDIR}"/${DEB_PV_BASE}/${PN}-${DEB_PV_BASE}-mcelog.patch + + ## do not configure debian devs certs. + epatch "${FILESDIR}"/${DEB_PV_BASE}/${PN}-${DEB_PV_BASE}-nocerts.patch + + ## FL-3381. enable IKCONFIG + epatch "${FILESDIR}"/${DEB_PV_BASE}/${PN}-${DEB_PV_BASE}-ikconfig.patch + + ## increase bluetooth polling patch + epatch "${FILESDIR}"/${DEB_PV_BASE}/${PN}-${DEB_PV_BASE}-fix-bluetooth-polling.patch + epatch "${FILESDIR}"/${DEB_PV_BASE}/export_kernel_fpu_functions_5_3.patch + local arch featureset subarch + featureset="standard" + if [[ ${REAL_ARCH} == x86 ]]; then + arch="i386" + subarch="686-pae" + elif [[ ${REAL_ARCH} == amd64 ]]; then + arch="amd64" + subarch="amd64" + else + die "Architecture not handled in ebuild" + fi + cp "${FILESDIR}"/config-extract . || die + chmod +x config-extract || die + ./config-extract ${arch} ${featureset} ${subarch} || die + setno_config .config CONFIG_DEBUG + if use ec2; then + setyes_config .config CONFIG_BLK_DEV_NVME + setyes_config .config CONFIG_XEN_BLKDEV_FRONTEND + setyes_config .config CONFIG_XEN_BLKDEV_BACKEND + setyes_config .config CONFIG_IXGBEVF + fi + if use sign-modules; then + certs_dir=$(get_certs_dir) + echo + if [ -z "$certs_dir" ]; then + eerror "No certs dir found in /etc/kernel/certs; aborting." + die + else + einfo "Using certificate directory of $certs_dir for kernel module signing." + fi + echo + # turn on options for signing modules. + # first, remove existing configs and comments: + zap_config .config CONFIG_MODULE_SIG + # now add our settings: + tweak_config .config CONFIG_MODULE_SIG y + tweak_config .config CONFIG_MODULE_SIG_FORCE n + tweak_config .config CONFIG_MODULE_SIG_ALL n + tweak_config .config CONFIG_MODULE_SIG_HASH \"sha512\" + tweak_config .config CONFIG_MODULE_SIG_KEY \"${certs_dir}/signing_key.pem\" + tweak_config .config CONFIG_SYSTEM_TRUSTED_KEYRING y + tweak_config .config CONFIG_SYSTEM_EXTRA_CERTIFICATE y + tweak_config .config CONFIG_SYSTEM_EXTRA_CERTIFICATE_SIZE 4096 + echo "CONFIG_MODULE_SIG_SHA512=y" >> .config + ewarn "This kernel will ALLOW non-signed modules to be loaded with a WARNING." + ewarn "To enable strict enforcement, YOU MUST add module.sig_enforce=1 as a kernel boot" + ewarn "parameter (to params in /etc/boot.conf, and re-run boot-update.)" + echo + fi + if use custom-cflags; then + MARCH="$(python -c "import portage; print(portage.settings[\"CFLAGS\"])" | sed 's/ /\n/g' | grep "march")" + if [ -n "$MARCH" ]; then + sed -i -e 's/-mtune=generic/$MARCH/g' arch/x86/Makefile || die "Canna optimize this kernel anymore, captain!" + fi + fi + # get config into good state: + yes "" | make oldconfig >/dev/null 2>&1 || die + cp .config "${T}"/config || die + make -s mrproper || die "make mrproper failed" +} + +src_compile() { + ! use binary && return + install -d "${WORKDIR}"/out/{lib,boot} + install -d "${T}"/{cache,twork} + install -d "${WORKDIR}"/build + cp "${T}"/config "${WORKDIR}"/build/.config + DEFAULT_KERNEL_SOURCE="${S}" CMD_KERNEL_DIR="${S}" genkernel ${GKARGS} \ + --no-save-config \ + --no-oldconfig \ + --kernel-config=${T}/config \ + --kernname="${PN}" \ + --build-src="${S}" \ + --build-dst="${WORKDIR}"/build \ + --makeopts="${MAKEOPTS}" \ + --cachedir="${T}"/cache \ + --tempdir="${T}"/twork \ + --logfile="${WORKDIR}"/genkernel.log \ + --bootdir="${WORKDIR}"/out/boot \ + --disklabel \ + $(usex lvm --lvm --no-lvm ) \ + $(usex luks --luks --no-luks ) \ + --mdadm \ + $(usex btrfs --btrfs --no-btrfs) \ + $(usex zfs --zfs --no-zfs) \ + --module-prefix="${WORKDIR}"/out \ + all || die +} + +src_install() { + # copy sources into place: + dodir /usr/src + cp -a "${S}" "${D}"/usr/src/${LINUX_SRCDIR} || die + cd "${D}"/usr/src/${LINUX_SRCDIR} + # prepare for real-world use and 3rd-party module building: + make mrproper || die + cp "${T}"/config .config || die + cp -a "${T}"/debian debian || die + + + # if we didn't use genkernel, we're done. The kernel source tree is left in + # an unconfigured state - you can't compile 3rd-party modules against it yet. + use binary || return + make prepare || die + make scripts || die + # OK, now the source tree is configured to allow 3rd-party modules to be + # built against it, since we want that to work since we have a binary kernel + # built. + cp -a "${WORKDIR}"/out/* "${D}"/ || die "couldn't copy output files into place" + # module symlink fixup: + rm -f "${D}"/lib/modules/*/source || die + rm -f "${D}"/lib/modules/*/build || die + cd "${D}"/lib/modules + local moddir="$(ls -d [234]*)" + ln -s /usr/src/${LINUX_SRCDIR} "${D}"/lib/modules/${moddir}/source || die + ln -s /usr/src/${LINUX_SRCDIR} "${D}"/lib/modules/${moddir}/build || die + # Fixes FL-14 + cp "${WORKDIR}/build/System.map" "${D}/usr/src/${LINUX_SRCDIR}/" || die + cp "${WORKDIR}/build/Module.symvers" "${D}/usr/src/${LINUX_SRCDIR}/" || die + if use sign-modules; then + for x in $(find "${D}"/lib/modules -iname *.ko); do + # $certs_dir defined previously in this function. + ${WORKDIR}/build/scripts/sign-file sha512 $certs_dir/signing_key.pem $certs_dir/signing_key.x509 $x || die + done + # install the sign-file executable for future use. + exeinto /usr/src/${LINUX_SRCDIR}/scripts + doexe ${WORKDIR}/build/scripts/sign-file + fi + + # The new naming scheme leaves an extra -${PN} at the name of various things in /boot. This should fix that. + cd ${D}/boot + for x in $(ls *); do + xnew=${x%-${PN}} + mv $x ${xnew} || die + done + + +} + +pkg_postinst() { + if use binary && [[ -h "${ROOT}"usr/src/linux ]]; then + rm "${ROOT}"usr/src/linux + fi + if use binary && [[ ! -e "${ROOT}"usr/src/linux ]]; then + ewarn "With binary use flag enabled /usr/src/linux" + ewarn "symlink automatically set to debian kernel" + ln -sf ${LINUX_SRCDIR} "${ROOT}"usr/src/linux + fi + + if [ -e ${ROOT}lib/modules ]; then + depmod -a $DEP_PV + fi + if [ -e /etc/boot.conf ]; then + ego boot update + fi +} diff --git a/sys-kernel/debian-sources/files/4.19.118/amdgpu-picasso.patch b/sys-kernel/debian-sources/files/4.19.118/amdgpu-picasso.patch new file mode 100644 index 000000000000..8f3180118b2c --- /dev/null +++ b/sys-kernel/debian-sources/files/4.19.118/amdgpu-picasso.patch @@ -0,0 +1,20 @@ +--- linux-4.19.66-gentoo/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 2019-08-26 06:23:22.455074755 +0200 ++++ linux-4.19.66-gentoo/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 2019-09-09 13:11:52.879809819 +0200 +@@ -785,6 +785,7 @@ static const struct pci_device_id pciidl + {0x1002, 0x66AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, + /* Raven */ + {0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU}, ++ {0x1002, 0x15d8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_MOBILITY|AMD_IS_APU}, + + {0, 0, 0} + }; +--- linux-4.19.66-gentoo/drivers/gpu/drm/amd/amdkfd/kfd_device.c 2019-08-26 06:23:22.683079348 +0200 ++++ linux-4.19.66-gentoo/drivers/gpu/drm/amd/amdkfd/kfd_device.c 2019-09-09 13:31:18.563024146 +0200 +@@ -275,6 +275,7 @@ static const struct kfd_deviceid support + { 0x9876, &carrizo_device_info }, /* Carrizo */ + { 0x9877, &carrizo_device_info }, /* Carrizo */ + { 0x15DD, &raven_device_info }, /* Raven */ ++ { 0x15D8, &raven_device_info }, /* Raven */ + #endif + { 0x67A0, &hawaii_device_info }, /* Hawaii */ + { 0x67A1, &hawaii_device_info }, /* Hawaii */ diff --git a/sys-kernel/debian-sources/files/4.19.118/debian-sources-4.19.118-fix-bluetooth-polling.patch b/sys-kernel/debian-sources/files/4.19.118/debian-sources-4.19.118-fix-bluetooth-polling.patch new file mode 100644 index 000000000000..b7e7ddb275e5 --- /dev/null +++ b/sys-kernel/debian-sources/files/4.19.118/debian-sources-4.19.118-fix-bluetooth-polling.patch @@ -0,0 +1,45 @@ +Update the default BLE connection parameters. + +Commit c49a8682fc5d298d44e8d911f4fa14690ea9485e introduced a bounds +check on connection interval update requests, but the default min/max +values were left at 24-40 (30-50ms) which caused problems for devices +that want to negotiate connection intervals outside of those bounds. + +Setting the default min/max connection interval to the full allowable +range in the bluetooth specification restores the default Linux behavior +of allowing remote devices to negotiate their desired connection +interval, while still permitting the system administrator to later +narrow the range. + +The default supervision timeout must also be modified to accommodate +the max connection interval increase. The new default value meets the +requirements of the bluetooth specification and the conditions in +the hci_check_conn_params function. + +The downside to modifying the default supervision timeout is that +it will take longer (about 10 seconds) to detect a link loss condition. + +Fixes c49a8682fc5d: (validate BLE connection interval updates) + +Signed-off-by: Carey Sonsino <csonsino@xxxxxxxxx> + +--- + +diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c +index 04bc79359a17..895d17ec9291 100644 +--- a/net/bluetooth/hci_core.c ++++ b/net/bluetooth/hci_core.c +@@ -3181,10 +3181,10 @@ struct hci_dev *hci_alloc_dev(void) + hdev->le_adv_max_interval = 0x0800; + hdev->le_scan_interval = 0x0060; + hdev->le_scan_window = 0x0030; +- hdev->le_conn_min_interval = 0x0018; +- hdev->le_conn_max_interval = 0x0028; ++ hdev->le_conn_min_interval = 0x0006; ++ hdev->le_conn_max_interval = 0x0c80; + hdev->le_conn_latency = 0x0000; +- hdev->le_supv_timeout = 0x002a; ++ hdev->le_supv_timeout = 0x03ea; + hdev->le_def_tx_len = 0x001b; + hdev->le_def_tx_time = 0x0148; + hdev->le_max_tx_len = 0x001b; diff --git a/sys-kernel/debian-sources/files/4.19.118/debian-sources-4.19.118-ikconfig.patch b/sys-kernel/debian-sources/files/4.19.118/debian-sources-4.19.118-ikconfig.patch new file mode 100644 index 000000000000..7fd83ab8ddd4 --- /dev/null +++ b/sys-kernel/debian-sources/files/4.19.118/debian-sources-4.19.118-ikconfig.patch @@ -0,0 +1,13 @@ +diff -Nuar debian/config/config debian-fixed/config/config +--- debian/config/config 2018-12-16 17:37:36.000000000 +0000 ++++ debian-fixed/config/config 2018-12-21 19:38:42.417410783 +0000 +@@ -5970,7 +5970,8 @@ + CONFIG_TASK_XACCT=y + CONFIG_TASK_IO_ACCOUNTING=y + CONFIG_CPU_ISOLATION=y +-# CONFIG_IKCONFIG is not set ++CONFIG_IKCONFIG=y ++CONFIG_IKCONFIG_PROC=y + CONFIG_LOG_BUF_SHIFT=17 + CONFIG_LOG_CPU_MAX_BUF_SHIFT=12 + CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=13 diff --git a/sys-kernel/debian-sources/files/4.19.118/debian-sources-4.19.118-mcelog.patch b/sys-kernel/debian-sources/files/4.19.118/debian-sources-4.19.118-mcelog.patch new file mode 100644 index 000000000000..dd219db89ab4 --- /dev/null +++ b/sys-kernel/debian-sources/files/4.19.118/debian-sources-4.19.118-mcelog.patch @@ -0,0 +1,14 @@ +diff -Nuar debian/config/config debian-fixed/config/config +--- debian/config/config 2017-10-28 22:51:59.000000000 +0300 ++++ debian-fixed/config/config 2017-11-13 06:47:04.809885320 +0200 +@@ -14,6 +14,10 @@ + CONFIG_STRICT_KERNEL_RWX=y + + ## ++## file: arch/x86/Kconfig ++## ++CONFIG_X86_MCELOG_LEGACY=y ++ + ## file: block/Kconfig + ## + CONFIG_BLOCK=y diff --git a/sys-kernel/debian-sources/files/4.19.118/debian-sources-4.19.118-nocerts.patch b/sys-kernel/debian-sources/files/4.19.118/debian-sources-4.19.118-nocerts.patch new file mode 100644 index 000000000000..4fae0b08eb93 --- /dev/null +++ b/sys-kernel/debian-sources/files/4.19.118/debian-sources-4.19.118-nocerts.patch @@ -0,0 +1,12 @@ +diff -urN debian/config/config debian.fixed/config/config +--- debian/config/config 2019-05-05 10:59:14.000000000 -0400 ++++ debian.fixed/config/config 2019-05-06 13:03:53.445432253 -0400 +@@ -77,7 +77,7 @@ + #. Actually a file containing X.509 certificates, not keys. + #. Whenever the filename changes, this also needs to be updated in + #. debian/featureset-*/config +-CONFIG_SYSTEM_TRUSTED_KEYS="debian/certs/debian-uefi-certs.pem" ++CONFIG_SYSTEM_TRUSTED_KEYS="" + #. Add secondary keyring with keys from UEFI db and MOK. + CONFIG_SECONDARY_TRUSTED_KEYRING=y + CONFIG_SYSTEM_BLACKLIST_KEYRING=y diff --git a/sys-kernel/debian-sources/files/4.19.118/debian-sources-4.19.118-xfs-libcrc32c-fix.patch b/sys-kernel/debian-sources/files/4.19.118/debian-sources-4.19.118-xfs-libcrc32c-fix.patch new file mode 100644 index 000000000000..da144c5e9b74 --- /dev/null +++ b/sys-kernel/debian-sources/files/4.19.118/debian-sources-4.19.118-xfs-libcrc32c-fix.patch @@ -0,0 +1,27 @@ +diff -Nuar debian/config/config debian-fix/config/config +--- debian/config/config 2014-04-29 01:57:15.000000000 +0000 ++++ debian-fix/config/config 2014-05-16 15:02:38.622819509 +0000 +@@ -1,4 +1,4 @@ +-## ++### + ## file: arch/Kconfig + ## + CONFIG_KPROBES=y +@@ -4710,7 +4710,7 @@ + ## + ## file: fs/xfs/Kconfig + ## +-CONFIG_XFS_FS=m ++CONFIG_XFS_FS=y + CONFIG_XFS_QUOTA=y + CONFIG_XFS_POSIX_ACL=y + CONFIG_XFS_RT=y +@@ -4909,7 +4909,7 @@ + CONFIG_CRC32=y + # CONFIG_CRC32_SELFTEST is not set + CONFIG_CRC7=m +-CONFIG_LIBCRC32C=m ++CONFIG_LIBCRC32C=y + # CONFIG_CRC8 is not set + # CONFIG_RANDOM32_SELFTEST is not set + CONFIG_CORDIC=m diff --git a/sys-kernel/debian-sources/files/4.19.118/usb-blacklist-endpoint-sound-devices-usbpre2.patch b/sys-kernel/debian-sources/files/4.19.118/usb-blacklist-endpoint-sound-devices-usbpre2.patch new file mode 100644 index 000000000000..b1b68515fb5c --- /dev/null +++ b/sys-kernel/debian-sources/files/4.19.118/usb-blacklist-endpoint-sound-devices-usbpre2.patch @@ -0,0 +1,131 @@ +See: https://lore.kernel.org/linux-usb/20200202134159.GM10381@localhost/ + +Add a new device quirk that can be used to blacklist endpoints. + +Since commit 3e4f8e21c4f2 ("USB: core: fix check for duplicate +endpoints") USB core ignores any duplicate endpoints found during +descriptor parsing. + +In order to handle devices where the first interfaces with duplicate +endpoints are the ones that should have their endpoints ignored, we need +to add a blacklist. + +Signed-off-by: Johan Hovold <johan@kernel.org> +--- + drivers/usb/core/config.c | 11 +++++++++++ + drivers/usb/core/quirks.c | 35 +++++++++++++++++++++++++++++++++++ + drivers/usb/core/usb.h | 3 +++ + include/linux/usb/quirks.h | 3 +++ + 4 files changed, 52 insertions(+) + +diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c +index 26bc05e48d8a..7df22bcefa9d 100644 +--- a/drivers/usb/core/config.c ++++ b/drivers/usb/core/config.c +@@ -256,6 +256,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, + struct usb_host_interface *ifp, int num_ep, + unsigned char *buffer, int size) + { ++ struct usb_device *udev = to_usb_device(ddev); + unsigned char *buffer0 = buffer; + struct usb_endpoint_descriptor *d; + struct usb_host_endpoint *endpoint; +@@ -297,6 +298,16 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, + goto skip_to_next_endpoint_or_interface_descriptor; + } + ++ /* Ignore blacklisted endpoints */ ++ if (udev->quirks & USB_QUIRK_ENDPOINT_BLACKLIST) { ++ if (usb_endpoint_is_blacklisted(udev, ifp, d)) { ++ dev_warn(ddev, "config %d interface %d altsetting %d has a blacklisted endpoint with address 0x%X, skipping\n", ++ cfgno, inum, asnum, ++ d->bEndpointAddress); ++ goto skip_to_next_endpoint_or_interface_descriptor; ++ } ++ } ++ + endpoint = &ifp->endpoint[ifp->desc.bNumEndpoints]; + ++ifp->desc.bNumEndpoints; + +diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c +index 6b6413073584..9925b18e2154 100644 +--- a/drivers/usb/core/quirks.c ++++ b/drivers/usb/core/quirks.c +@@ -354,6 +354,9 @@ static const struct usb_device_id usb_quirk_list[] = { + { USB_DEVICE(0x0904, 0x6103), .driver_info = + USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL }, + ++ /* Sound Devices USBPre2 */ ++ { USB_DEVICE(0x0926, 0x0202), .driver_info = USB_QUIRK_ENDPOINT_BLACKLIST }, ++ + /* Keytouch QWERTY Panel keyboard */ + { USB_DEVICE(0x0926, 0x3333), .driver_info = + USB_QUIRK_CONFIG_INTF_STRINGS }, +@@ -472,6 +475,38 @@ static const struct usb_device_id usb_amd_resume_quirk_list[] = { + { } /* terminating entry must be last */ + }; + ++/* ++ * Entries for blacklisted endpoints. ++ * ++ * Matched for devices with USB_QUIRK_ENDPOINT_BLACKLIST. ++ */ ++static const struct usb_device_id usb_endpoint_blacklist_quirk_list[] = { ++ { USB_DEVICE_INTERFACE_NUMBER(0x0926, 0x0202, 1), .driver_info = 0x85 }, ++ { }, ++}; ++ ++bool usb_endpoint_is_blacklisted(struct usb_device *udev, ++ struct usb_host_interface *intf, ++ struct usb_endpoint_descriptor *epd) ++{ ++ const struct usb_device_id *id; ++ unsigned int address; ++ ++ for (id = usb_endpoint_blacklist_quirk_list; id->match_flags; ++id) { ++ if (!usb_match_device(udev, id)) ++ continue; ++ ++ if (!usb_match_one_id_intf(udev, intf, id)) ++ continue; ++ ++ address = id->driver_info; ++ if (address == epd->bEndpointAddress) ++ return true; ++ } ++ ++ return false; ++} ++ + static bool usb_match_any_interface(struct usb_device *udev, + const struct usb_device_id *id) + { +diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h +index cf4783cf661a..3ad0ee57e859 100644 +--- a/drivers/usb/core/usb.h ++++ b/drivers/usb/core/usb.h +@@ -37,6 +37,9 @@ extern void usb_authorize_interface(struct usb_interface *); + extern void usb_detect_quirks(struct usb_device *udev); + extern void usb_detect_interface_quirks(struct usb_device *udev); + extern void usb_release_quirk_list(void); ++extern bool usb_endpoint_is_blacklisted(struct usb_device *udev, ++ struct usb_host_interface *intf, ++ struct usb_endpoint_descriptor *epd); + extern int usb_remove_device(struct usb_device *udev); + + extern int usb_get_device_descriptor(struct usb_device *dev, +diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h +index a1be64c9940f..22c1f579afe3 100644 +--- a/include/linux/usb/quirks.h ++++ b/include/linux/usb/quirks.h +@@ -69,4 +69,7 @@ + /* Hub needs extra delay after resetting its port. */ + #define USB_QUIRK_HUB_SLOW_RESET BIT(14) + ++/* device has blacklisted endpoints */ ++#define USB_QUIRK_ENDPOINT_BLACKLIST BIT(15) ++ + #endif /* __LINUX_USB_QUIRKS_H */ +-- +2.24.1 + diff --git a/sys-kernel/debian-sources/files/4.19.98/debian-sources-lts-4.19.98-fix-bluetooth-polling.patch b/sys-kernel/debian-sources/files/4.19.98/debian-sources-lts-4.19.98-fix-bluetooth-polling.patch new file mode 100644 index 000000000000..b7e7ddb275e5 --- /dev/null +++ b/sys-kernel/debian-sources/files/4.19.98/debian-sources-lts-4.19.98-fix-bluetooth-polling.patch @@ -0,0 +1,45 @@ +Update the default BLE connection parameters. + +Commit c49a8682fc5d298d44e8d911f4fa14690ea9485e introduced a bounds +check on connection interval update requests, but the default min/max +values were left at 24-40 (30-50ms) which caused problems for devices +that want to negotiate connection intervals outside of those bounds. + +Setting the default min/max connection interval to the full allowable +range in the bluetooth specification restores the default Linux behavior +of allowing remote devices to negotiate their desired connection +interval, while still permitting the system administrator to later +narrow the range. + +The default supervision timeout must also be modified to accommodate +the max connection interval increase. The new default value meets the +requirements of the bluetooth specification and the conditions in +the hci_check_conn_params function. + +The downside to modifying the default supervision timeout is that +it will take longer (about 10 seconds) to detect a link loss condition. + +Fixes c49a8682fc5d: (validate BLE connection interval updates) + +Signed-off-by: Carey Sonsino <csonsino@xxxxxxxxx> + +--- + +diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c +index 04bc79359a17..895d17ec9291 100644 +--- a/net/bluetooth/hci_core.c ++++ b/net/bluetooth/hci_core.c +@@ -3181,10 +3181,10 @@ struct hci_dev *hci_alloc_dev(void) + hdev->le_adv_max_interval = 0x0800; + hdev->le_scan_interval = 0x0060; + hdev->le_scan_window = 0x0030; +- hdev->le_conn_min_interval = 0x0018; +- hdev->le_conn_max_interval = 0x0028; ++ hdev->le_conn_min_interval = 0x0006; ++ hdev->le_conn_max_interval = 0x0c80; + hdev->le_conn_latency = 0x0000; +- hdev->le_supv_timeout = 0x002a; ++ hdev->le_supv_timeout = 0x03ea; + hdev->le_def_tx_len = 0x001b; + hdev->le_def_tx_time = 0x0148; + hdev->le_max_tx_len = 0x001b; diff --git a/sys-kernel/debian-sources/files/4.19.98/debian-sources-lts-4.19.98-ikconfig.patch b/sys-kernel/debian-sources/files/4.19.98/debian-sources-lts-4.19.98-ikconfig.patch new file mode 100644 index 000000000000..7fd83ab8ddd4 --- /dev/null +++ b/sys-kernel/debian-sources/files/4.19.98/debian-sources-lts-4.19.98-ikconfig.patch @@ -0,0 +1,13 @@ +diff -Nuar debian/config/config debian-fixed/config/config +--- debian/config/config 2018-12-16 17:37:36.000000000 +0000 ++++ debian-fixed/config/config 2018-12-21 19:38:42.417410783 +0000 +@@ -5970,7 +5970,8 @@ + CONFIG_TASK_XACCT=y + CONFIG_TASK_IO_ACCOUNTING=y + CONFIG_CPU_ISOLATION=y +-# CONFIG_IKCONFIG is not set ++CONFIG_IKCONFIG=y ++CONFIG_IKCONFIG_PROC=y + CONFIG_LOG_BUF_SHIFT=17 + CONFIG_LOG_CPU_MAX_BUF_SHIFT=12 + CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=13 diff --git a/sys-kernel/debian-sources/files/4.19.98/debian-sources-lts-4.19.98-mcelog.patch b/sys-kernel/debian-sources/files/4.19.98/debian-sources-lts-4.19.98-mcelog.patch new file mode 100644 index 000000000000..dd219db89ab4 --- /dev/null +++ b/sys-kernel/debian-sources/files/4.19.98/debian-sources-lts-4.19.98-mcelog.patch @@ -0,0 +1,14 @@ +diff -Nuar debian/config/config debian-fixed/config/config +--- debian/config/config 2017-10-28 22:51:59.000000000 +0300 ++++ debian-fixed/config/config 2017-11-13 06:47:04.809885320 +0200 +@@ -14,6 +14,10 @@ + CONFIG_STRICT_KERNEL_RWX=y + + ## ++## file: arch/x86/Kconfig ++## ++CONFIG_X86_MCELOG_LEGACY=y ++ + ## file: block/Kconfig + ## + CONFIG_BLOCK=y diff --git a/sys-kernel/debian-sources/files/4.19.98/debian-sources-lts-4.19.98-nocerts.patch b/sys-kernel/debian-sources/files/4.19.98/debian-sources-lts-4.19.98-nocerts.patch new file mode 100644 index 000000000000..4fae0b08eb93 --- /dev/null +++ b/sys-kernel/debian-sources/files/4.19.98/debian-sources-lts-4.19.98-nocerts.patch @@ -0,0 +1,12 @@ +diff -urN debian/config/config debian.fixed/config/config +--- debian/config/config 2019-05-05 10:59:14.000000000 -0400 ++++ debian.fixed/config/config 2019-05-06 13:03:53.445432253 -0400 +@@ -77,7 +77,7 @@ + #. Actually a file containing X.509 certificates, not keys. + #. Whenever the filename changes, this also needs to be updated in + #. debian/featureset-*/config +-CONFIG_SYSTEM_TRUSTED_KEYS="debian/certs/debian-uefi-certs.pem" ++CONFIG_SYSTEM_TRUSTED_KEYS="" + #. Add secondary keyring with keys from UEFI db and MOK. + CONFIG_SECONDARY_TRUSTED_KEYRING=y + CONFIG_SYSTEM_BLACKLIST_KEYRING=y diff --git a/sys-kernel/debian-sources/files/4.19.98/debian-sources-lts-4.19.98-xfs-libcrc32c-fix.patch b/sys-kernel/debian-sources/files/4.19.98/debian-sources-lts-4.19.98-xfs-libcrc32c-fix.patch new file mode 100644 index 000000000000..da144c5e9b74 --- /dev/null +++ b/sys-kernel/debian-sources/files/4.19.98/debian-sources-lts-4.19.98-xfs-libcrc32c-fix.patch @@ -0,0 +1,27 @@ +diff -Nuar debian/config/config debian-fix/config/config +--- debian/config/config 2014-04-29 01:57:15.000000000 +0000 ++++ debian-fix/config/config 2014-05-16 15:02:38.622819509 +0000 +@@ -1,4 +1,4 @@ +-## ++### + ## file: arch/Kconfig + ## + CONFIG_KPROBES=y +@@ -4710,7 +4710,7 @@ + ## + ## file: fs/xfs/Kconfig + ## +-CONFIG_XFS_FS=m ++CONFIG_XFS_FS=y + CONFIG_XFS_QUOTA=y + CONFIG_XFS_POSIX_ACL=y + CONFIG_XFS_RT=y +@@ -4909,7 +4909,7 @@ + CONFIG_CRC32=y + # CONFIG_CRC32_SELFTEST is not set + CONFIG_CRC7=m +-CONFIG_LIBCRC32C=m ++CONFIG_LIBCRC32C=y + # CONFIG_CRC8 is not set + # CONFIG_RANDOM32_SELFTEST is not set + CONFIG_CORDIC=m diff --git a/sys-kernel/debian-sources/files/5.4.19/debian-sources-5.4.19-fix-bluetooth-polling.patch b/sys-kernel/debian-sources/files/5.4.19/debian-sources-5.4.19-fix-bluetooth-polling.patch new file mode 100644 index 000000000000..b7e7ddb275e5 --- /dev/null +++ b/sys-kernel/debian-sources/files/5.4.19/debian-sources-5.4.19-fix-bluetooth-polling.patch @@ -0,0 +1,45 @@ +Update the default BLE connection parameters. + +Commit c49a8682fc5d298d44e8d911f4fa14690ea9485e introduced a bounds +check on connection interval update requests, but the default min/max +values were left at 24-40 (30-50ms) which caused problems for devices +that want to negotiate connection intervals outside of those bounds. + +Setting the default min/max connection interval to the full allowable +range in the bluetooth specification restores the default Linux behavior +of allowing remote devices to negotiate their desired connection +interval, while still permitting the system administrator to later +narrow the range. + +The default supervision timeout must also be modified to accommodate +the max connection interval increase. The new default value meets the +requirements of the bluetooth specification and the conditions in +the hci_check_conn_params function. + +The downside to modifying the default supervision timeout is that +it will take longer (about 10 seconds) to detect a link loss condition. + +Fixes c49a8682fc5d: (validate BLE connection interval updates) + +Signed-off-by: Carey Sonsino <csonsino@xxxxxxxxx> + +--- + +diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c +index 04bc79359a17..895d17ec9291 100644 +--- a/net/bluetooth/hci_core.c ++++ b/net/bluetooth/hci_core.c +@@ -3181,10 +3181,10 @@ struct hci_dev *hci_alloc_dev(void) + hdev->le_adv_max_interval = 0x0800; + hdev->le_scan_interval = 0x0060; + hdev->le_scan_window = 0x0030; +- hdev->le_conn_min_interval = 0x0018; +- hdev->le_conn_max_interval = 0x0028; ++ hdev->le_conn_min_interval = 0x0006; ++ hdev->le_conn_max_interval = 0x0c80; + hdev->le_conn_latency = 0x0000; +- hdev->le_supv_timeout = 0x002a; ++ hdev->le_supv_timeout = 0x03ea; + hdev->le_def_tx_len = 0x001b; + hdev->le_def_tx_time = 0x0148; + hdev->le_max_tx_len = 0x001b; diff --git a/sys-kernel/debian-sources/files/5.4.19/debian-sources-5.4.19-ikconfig.patch b/sys-kernel/debian-sources/files/5.4.19/debian-sources-5.4.19-ikconfig.patch new file mode 100644 index 000000000000..7fd83ab8ddd4 --- /dev/null +++ b/sys-kernel/debian-sources/files/5.4.19/debian-sources-5.4.19-ikconfig.patch @@ -0,0 +1,13 @@ +diff -Nuar debian/config/config debian-fixed/config/config +--- debian/config/config 2018-12-16 17:37:36.000000000 +0000 ++++ debian-fixed/config/config 2018-12-21 19:38:42.417410783 +0000 +@@ -5970,7 +5970,8 @@ + CONFIG_TASK_XACCT=y + CONFIG_TASK_IO_ACCOUNTING=y + CONFIG_CPU_ISOLATION=y +-# CONFIG_IKCONFIG is not set ++CONFIG_IKCONFIG=y ++CONFIG_IKCONFIG_PROC=y + CONFIG_LOG_BUF_SHIFT=17 + CONFIG_LOG_CPU_MAX_BUF_SHIFT=12 + CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=13 diff --git a/sys-kernel/debian-sources/files/5.4.19/debian-sources-5.4.19-mcelog.patch b/sys-kernel/debian-sources/files/5.4.19/debian-sources-5.4.19-mcelog.patch new file mode 100644 index 000000000000..dd219db89ab4 --- /dev/null +++ b/sys-kernel/debian-sources/files/5.4.19/debian-sources-5.4.19-mcelog.patch @@ -0,0 +1,14 @@ +diff -Nuar debian/config/config debian-fixed/config/config +--- debian/config/config 2017-10-28 22:51:59.000000000 +0300 ++++ debian-fixed/config/config 2017-11-13 06:47:04.809885320 +0200 +@@ -14,6 +14,10 @@ + CONFIG_STRICT_KERNEL_RWX=y + + ## ++## file: arch/x86/Kconfig ++## ++CONFIG_X86_MCELOG_LEGACY=y ++ + ## file: block/Kconfig + ## + CONFIG_BLOCK=y diff --git a/sys-kernel/debian-sources/files/5.4.19/debian-sources-5.4.19-nocerts.patch b/sys-kernel/debian-sources/files/5.4.19/debian-sources-5.4.19-nocerts.patch new file mode 100644 index 000000000000..7f5c79b0d802 --- /dev/null +++ b/sys-kernel/debian-sources/files/5.4.19/debian-sources-5.4.19-nocerts.patch @@ -0,0 +1,12 @@ +diff -urN debian/config/config debian.fixed/config/config +--- debian/config/config 2019-10-19 18:35:10.000000000 -0400 ++++ debian.fixed/config/config 2019-10-22 01:13:53.262687881 -0400 +@@ -68,7 +68,7 @@ + #. Actually a file containing X.509 certificates, not keys. + #. Whenever the filename changes, this also needs to be updated in + #. debian/featureset-*/config +-CONFIG_SYSTEM_TRUSTED_KEYS="debian/certs/debian-uefi-certs.pem" ++CONFIG_SYSTEM_TRUSTED_KEYS="" + #. Add secondary keyring with keys from UEFI db and MOK. + CONFIG_SECONDARY_TRUSTED_KEYRING=y + CONFIG_SYSTEM_BLACKLIST_KEYRING=y diff --git a/sys-kernel/debian-sources/files/5.4.19/debian-sources-5.4.19-xfs-libcrc32c-fix.patch b/sys-kernel/debian-sources/files/5.4.19/debian-sources-5.4.19-xfs-libcrc32c-fix.patch new file mode 100644 index 000000000000..da144c5e9b74 --- /dev/null +++ b/sys-kernel/debian-sources/files/5.4.19/debian-sources-5.4.19-xfs-libcrc32c-fix.patch @@ -0,0 +1,27 @@ +diff -Nuar debian/config/config debian-fix/config/config +--- debian/config/config 2014-04-29 01:57:15.000000000 +0000 ++++ debian-fix/config/config 2014-05-16 15:02:38.622819509 +0000 +@@ -1,4 +1,4 @@ +-## ++### + ## file: arch/Kconfig + ## + CONFIG_KPROBES=y +@@ -4710,7 +4710,7 @@ + ## + ## file: fs/xfs/Kconfig + ## +-CONFIG_XFS_FS=m ++CONFIG_XFS_FS=y + CONFIG_XFS_QUOTA=y + CONFIG_XFS_POSIX_ACL=y + CONFIG_XFS_RT=y +@@ -4909,7 +4909,7 @@ + CONFIG_CRC32=y + # CONFIG_CRC32_SELFTEST is not set + CONFIG_CRC7=m +-CONFIG_LIBCRC32C=m ++CONFIG_LIBCRC32C=y + # CONFIG_CRC8 is not set + # CONFIG_RANDOM32_SELFTEST is not set + CONFIG_CORDIC=m diff --git a/sys-kernel/debian-sources/files/5.4.19/export_kernel_fpu_functions_5_3.patch b/sys-kernel/debian-sources/files/5.4.19/export_kernel_fpu_functions_5_3.patch new file mode 100644 index 000000000000..af71d043e612 --- /dev/null +++ b/sys-kernel/debian-sources/files/5.4.19/export_kernel_fpu_functions_5_3.patch @@ -0,0 +1,43 @@ +From 1e010beda2896bdf3082fb37a3e49f8ce20e04d8 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= <joerg@thalheim.io> +Date: Thu, 2 May 2019 05:28:08 +0100 +Subject: [PATCH] x86/fpu: Export kernel_fpu_{begin,end}() with + EXPORT_SYMBOL_GPL +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +We need these symbols in zfs as the fpu implementation breaks userspace: + +https://github.com/zfsonlinux/zfs/issues/9346 +Signed-off-by: Jörg Thalheim <joerg@thalheim.io> +--- + arch/x86/kernel/fpu/core.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c +index 12c70840980e..352538b3bb5d 100644 +--- a/arch/x86/kernel/fpu/core.c ++++ b/arch/x86/kernel/fpu/core.c +@@ -102,7 +102,7 @@ void kernel_fpu_begin(void) + } + __cpu_invalidate_fpregs_state(); + } +-EXPORT_SYMBOL_GPL(kernel_fpu_begin); ++EXPORT_SYMBOL(kernel_fpu_begin); + + void kernel_fpu_end(void) + { +@@ -111,7 +111,7 @@ void kernel_fpu_end(void) + this_cpu_write(in_kernel_fpu, false); + preempt_enable(); + } +-EXPORT_SYMBOL_GPL(kernel_fpu_end); ++EXPORT_SYMBOL(kernel_fpu_end); + + /* + * Save the FPU state (mark it for reload if necessary): +-- +2.23.0 + + diff --git a/sys-kernel/debian-sources/files/5.5.17/debian-sources-5.5.17-fix-bluetooth-polling.patch b/sys-kernel/debian-sources/files/5.5.17/debian-sources-5.5.17-fix-bluetooth-polling.patch new file mode 100644 index 000000000000..b7e7ddb275e5 --- /dev/null +++ b/sys-kernel/debian-sources/files/5.5.17/debian-sources-5.5.17-fix-bluetooth-polling.patch @@ -0,0 +1,45 @@ +Update the default BLE connection parameters. + +Commit c49a8682fc5d298d44e8d911f4fa14690ea9485e introduced a bounds +check on connection interval update requests, but the default min/max +values were left at 24-40 (30-50ms) which caused problems for devices +that want to negotiate connection intervals outside of those bounds. + +Setting the default min/max connection interval to the full allowable +range in the bluetooth specification restores the default Linux behavior +of allowing remote devices to negotiate their desired connection +interval, while still permitting the system administrator to later +narrow the range. + +The default supervision timeout must also be modified to accommodate +the max connection interval increase. The new default value meets the +requirements of the bluetooth specification and the conditions in +the hci_check_conn_params function. + +The downside to modifying the default supervision timeout is that +it will take longer (about 10 seconds) to detect a link loss condition. + +Fixes c49a8682fc5d: (validate BLE connection interval updates) + +Signed-off-by: Carey Sonsino <csonsino@xxxxxxxxx> + +--- + +diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c +index 04bc79359a17..895d17ec9291 100644 +--- a/net/bluetooth/hci_core.c ++++ b/net/bluetooth/hci_core.c +@@ -3181,10 +3181,10 @@ struct hci_dev *hci_alloc_dev(void) + hdev->le_adv_max_interval = 0x0800; + hdev->le_scan_interval = 0x0060; + hdev->le_scan_window = 0x0030; +- hdev->le_conn_min_interval = 0x0018; +- hdev->le_conn_max_interval = 0x0028; ++ hdev->le_conn_min_interval = 0x0006; ++ hdev->le_conn_max_interval = 0x0c80; + hdev->le_conn_latency = 0x0000; +- hdev->le_supv_timeout = 0x002a; ++ hdev->le_supv_timeout = 0x03ea; + hdev->le_def_tx_len = 0x001b; + hdev->le_def_tx_time = 0x0148; + hdev->le_max_tx_len = 0x001b; diff --git a/sys-kernel/debian-sources/files/5.5.17/debian-sources-5.5.17-ikconfig.patch b/sys-kernel/debian-sources/files/5.5.17/debian-sources-5.5.17-ikconfig.patch new file mode 100644 index 000000000000..7fd83ab8ddd4 --- /dev/null +++ b/sys-kernel/debian-sources/files/5.5.17/debian-sources-5.5.17-ikconfig.patch @@ -0,0 +1,13 @@ +diff -Nuar debian/config/config debian-fixed/config/config +--- debian/config/config 2018-12-16 17:37:36.000000000 +0000 ++++ debian-fixed/config/config 2018-12-21 19:38:42.417410783 +0000 +@@ -5970,7 +5970,8 @@ + CONFIG_TASK_XACCT=y + CONFIG_TASK_IO_ACCOUNTING=y + CONFIG_CPU_ISOLATION=y +-# CONFIG_IKCONFIG is not set ++CONFIG_IKCONFIG=y ++CONFIG_IKCONFIG_PROC=y + CONFIG_LOG_BUF_SHIFT=17 + CONFIG_LOG_CPU_MAX_BUF_SHIFT=12 + CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=13 diff --git a/sys-kernel/debian-sources/files/5.5.17/debian-sources-5.5.17-mcelog.patch b/sys-kernel/debian-sources/files/5.5.17/debian-sources-5.5.17-mcelog.patch new file mode 100644 index 000000000000..dd219db89ab4 --- /dev/null +++ b/sys-kernel/debian-sources/files/5.5.17/debian-sources-5.5.17-mcelog.patch @@ -0,0 +1,14 @@ +diff -Nuar debian/config/config debian-fixed/config/config +--- debian/config/config 2017-10-28 22:51:59.000000000 +0300 ++++ debian-fixed/config/config 2017-11-13 06:47:04.809885320 +0200 +@@ -14,6 +14,10 @@ + CONFIG_STRICT_KERNEL_RWX=y + + ## ++## file: arch/x86/Kconfig ++## ++CONFIG_X86_MCELOG_LEGACY=y ++ + ## file: block/Kconfig + ## + CONFIG_BLOCK=y diff --git a/sys-kernel/debian-sources/files/5.5.17/debian-sources-5.5.17-nocerts.patch b/sys-kernel/debian-sources/files/5.5.17/debian-sources-5.5.17-nocerts.patch new file mode 100644 index 000000000000..7f5c79b0d802 --- /dev/null +++ b/sys-kernel/debian-sources/files/5.5.17/debian-sources-5.5.17-nocerts.patch @@ -0,0 +1,12 @@ +diff -urN debian/config/config debian.fixed/config/config +--- debian/config/config 2019-10-19 18:35:10.000000000 -0400 ++++ debian.fixed/config/config 2019-10-22 01:13:53.262687881 -0400 +@@ -68,7 +68,7 @@ + #. Actually a file containing X.509 certificates, not keys. + #. Whenever the filename changes, this also needs to be updated in + #. debian/featureset-*/config +-CONFIG_SYSTEM_TRUSTED_KEYS="debian/certs/debian-uefi-certs.pem" ++CONFIG_SYSTEM_TRUSTED_KEYS="" + #. Add secondary keyring with keys from UEFI db and MOK. + CONFIG_SECONDARY_TRUSTED_KEYRING=y + CONFIG_SYSTEM_BLACKLIST_KEYRING=y diff --git a/sys-kernel/debian-sources/files/5.5.17/debian-sources-5.5.17-xfs-libcrc32c-fix.patch b/sys-kernel/debian-sources/files/5.5.17/debian-sources-5.5.17-xfs-libcrc32c-fix.patch new file mode 100644 index 000000000000..da144c5e9b74 --- /dev/null +++ b/sys-kernel/debian-sources/files/5.5.17/debian-sources-5.5.17-xfs-libcrc32c-fix.patch @@ -0,0 +1,27 @@ +diff -Nuar debian/config/config debian-fix/config/config +--- debian/config/config 2014-04-29 01:57:15.000000000 +0000 ++++ debian-fix/config/config 2014-05-16 15:02:38.622819509 +0000 +@@ -1,4 +1,4 @@ +-## ++### + ## file: arch/Kconfig + ## + CONFIG_KPROBES=y +@@ -4710,7 +4710,7 @@ + ## + ## file: fs/xfs/Kconfig + ## +-CONFIG_XFS_FS=m ++CONFIG_XFS_FS=y + CONFIG_XFS_QUOTA=y + CONFIG_XFS_POSIX_ACL=y + CONFIG_XFS_RT=y +@@ -4909,7 +4909,7 @@ + CONFIG_CRC32=y + # CONFIG_CRC32_SELFTEST is not set + CONFIG_CRC7=m +-CONFIG_LIBCRC32C=m ++CONFIG_LIBCRC32C=y + # CONFIG_CRC8 is not set + # CONFIG_RANDOM32_SELFTEST is not set + CONFIG_CORDIC=m diff --git a/sys-kernel/debian-sources/files/5.5.17/export_kernel_fpu_functions_5_3.patch b/sys-kernel/debian-sources/files/5.5.17/export_kernel_fpu_functions_5_3.patch new file mode 100644 index 000000000000..af71d043e612 --- /dev/null +++ b/sys-kernel/debian-sources/files/5.5.17/export_kernel_fpu_functions_5_3.patch @@ -0,0 +1,43 @@ +From 1e010beda2896bdf3082fb37a3e49f8ce20e04d8 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= <joerg@thalheim.io> +Date: Thu, 2 May 2019 05:28:08 +0100 +Subject: [PATCH] x86/fpu: Export kernel_fpu_{begin,end}() with + EXPORT_SYMBOL_GPL +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +We need these symbols in zfs as the fpu implementation breaks userspace: + +https://github.com/zfsonlinux/zfs/issues/9346 +Signed-off-by: Jörg Thalheim <joerg@thalheim.io> +--- + arch/x86/kernel/fpu/core.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c +index 12c70840980e..352538b3bb5d 100644 +--- a/arch/x86/kernel/fpu/core.c ++++ b/arch/x86/kernel/fpu/core.c +@@ -102,7 +102,7 @@ void kernel_fpu_begin(void) + } + __cpu_invalidate_fpregs_state(); + } +-EXPORT_SYMBOL_GPL(kernel_fpu_begin); ++EXPORT_SYMBOL(kernel_fpu_begin); + + void kernel_fpu_end(void) + { +@@ -111,7 +111,7 @@ void kernel_fpu_end(void) + this_cpu_write(in_kernel_fpu, false); + preempt_enable(); + } +-EXPORT_SYMBOL_GPL(kernel_fpu_end); ++EXPORT_SYMBOL(kernel_fpu_end); + + /* + * Save the FPU state (mark it for reload if necessary): +-- +2.23.0 + + diff --git a/sys-kernel/debian-sources/files/5.6.14/debian-sources-5.6.14-ikconfig.patch b/sys-kernel/debian-sources/files/5.6.14/debian-sources-5.6.14-ikconfig.patch new file mode 100644 index 000000000000..7fd83ab8ddd4 --- /dev/null +++ b/sys-kernel/debian-sources/files/5.6.14/debian-sources-5.6.14-ikconfig.patch @@ -0,0 +1,13 @@ +diff -Nuar debian/config/config debian-fixed/config/config +--- debian/config/config 2018-12-16 17:37:36.000000000 +0000 ++++ debian-fixed/config/config 2018-12-21 19:38:42.417410783 +0000 +@@ -5970,7 +5970,8 @@ + CONFIG_TASK_XACCT=y + CONFIG_TASK_IO_ACCOUNTING=y + CONFIG_CPU_ISOLATION=y +-# CONFIG_IKCONFIG is not set ++CONFIG_IKCONFIG=y ++CONFIG_IKCONFIG_PROC=y + CONFIG_LOG_BUF_SHIFT=17 + CONFIG_LOG_CPU_MAX_BUF_SHIFT=12 + CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=13 diff --git a/sys-kernel/debian-sources/files/5.6.14/debian-sources-5.6.14-mcelog.patch b/sys-kernel/debian-sources/files/5.6.14/debian-sources-5.6.14-mcelog.patch new file mode 100644 index 000000000000..dd219db89ab4 --- /dev/null +++ b/sys-kernel/debian-sources/files/5.6.14/debian-sources-5.6.14-mcelog.patch @@ -0,0 +1,14 @@ +diff -Nuar debian/config/config debian-fixed/config/config +--- debian/config/config 2017-10-28 22:51:59.000000000 +0300 ++++ debian-fixed/config/config 2017-11-13 06:47:04.809885320 +0200 +@@ -14,6 +14,10 @@ + CONFIG_STRICT_KERNEL_RWX=y + + ## ++## file: arch/x86/Kconfig ++## ++CONFIG_X86_MCELOG_LEGACY=y ++ + ## file: block/Kconfig + ## + CONFIG_BLOCK=y diff --git a/sys-kernel/debian-sources/files/5.6.14/debian-sources-5.6.14-nocerts.patch b/sys-kernel/debian-sources/files/5.6.14/debian-sources-5.6.14-nocerts.patch new file mode 100644 index 000000000000..7f5c79b0d802 --- /dev/null +++ b/sys-kernel/debian-sources/files/5.6.14/debian-sources-5.6.14-nocerts.patch @@ -0,0 +1,12 @@ +diff -urN debian/config/config debian.fixed/config/config +--- debian/config/config 2019-10-19 18:35:10.000000000 -0400 ++++ debian.fixed/config/config 2019-10-22 01:13:53.262687881 -0400 +@@ -68,7 +68,7 @@ + #. Actually a file containing X.509 certificates, not keys. + #. Whenever the filename changes, this also needs to be updated in + #. debian/featureset-*/config +-CONFIG_SYSTEM_TRUSTED_KEYS="debian/certs/debian-uefi-certs.pem" ++CONFIG_SYSTEM_TRUSTED_KEYS="" + #. Add secondary keyring with keys from UEFI db and MOK. + CONFIG_SECONDARY_TRUSTED_KEYRING=y + CONFIG_SYSTEM_BLACKLIST_KEYRING=y diff --git a/sys-kernel/debian-sources/files/5.6.14/debian-sources-5.6.14-xfs-libcrc32c-fix.patch b/sys-kernel/debian-sources/files/5.6.14/debian-sources-5.6.14-xfs-libcrc32c-fix.patch new file mode 100644 index 000000000000..da144c5e9b74 --- /dev/null +++ b/sys-kernel/debian-sources/files/5.6.14/debian-sources-5.6.14-xfs-libcrc32c-fix.patch @@ -0,0 +1,27 @@ +diff -Nuar debian/config/config debian-fix/config/config +--- debian/config/config 2014-04-29 01:57:15.000000000 +0000 ++++ debian-fix/config/config 2014-05-16 15:02:38.622819509 +0000 +@@ -1,4 +1,4 @@ +-## ++### + ## file: arch/Kconfig + ## + CONFIG_KPROBES=y +@@ -4710,7 +4710,7 @@ + ## + ## file: fs/xfs/Kconfig + ## +-CONFIG_XFS_FS=m ++CONFIG_XFS_FS=y + CONFIG_XFS_QUOTA=y + CONFIG_XFS_POSIX_ACL=y + CONFIG_XFS_RT=y +@@ -4909,7 +4909,7 @@ + CONFIG_CRC32=y + # CONFIG_CRC32_SELFTEST is not set + CONFIG_CRC7=m +-CONFIG_LIBCRC32C=m ++CONFIG_LIBCRC32C=y + # CONFIG_CRC8 is not set + # CONFIG_RANDOM32_SELFTEST is not set + CONFIG_CORDIC=m diff --git a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0001-ctf-generate-CTF-information-for-the-kernel.patch b/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0001-ctf-generate-CTF-information-for-the-kernel.patch deleted file mode 100644 index d34ad4abd622..000000000000 --- a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0001-ctf-generate-CTF-information-for-the-kernel.patch +++ /dev/null @@ -1,7304 +0,0 @@ -From 9700123a320c952c1a71b58addf27f2702878790 Mon Sep 17 00:00:00 2001 -From: Nick Alcock <nick.alcock@oracle.com> -Date: Wed, 14 Nov 2018 19:42:21 +0000 -Subject: [PATCH 01/20] ctf: generate CTF information for the kernel - -This introduces a new tool, dwarf2ctf, which runs whenever 'make ctf' is -run, extracting information on the kernel's types and global variables -from the DWARF-format debug information in the kernel build tree, -deduplicating it, and emitting it in Sun's Compact Type Format into a -mmappable type archive named vmlinux.ctfa, which is installed at 'make -install' time into /lib/modules/$(uname -r)/kernel/. Out-of-tree -modules cannot participate in this mechanism since the file is already -written: CTF information for such modules is instead linked into such -modules at build time as new sections named .SUNW_ctf (as a result, most -of the build-time machinery for this is in scripts/Makefile.modpost). -Care should be taken not to strip such sections into debug RPMs (they -are small enough that this should not be a problem). - -Within the ctfa file, the type information is divided into a shared -repository, containing all types used by more than one module, CTF for -the core kernel, and separate CTF for each module built, whether or not -this module has been compiled in or not: if a file *could* be built as a -module, it will be considered to be a module from the perspective of CTF -file emission (and kallmodsyms: see the next commit). This ensures that -external consumers such as DTrace always find types for a given module -in the same place, regardless of the local kernel configuration, as long -as that module is present at all, assisting in portability of D scripts -between installations. The ctf_ar tool in libdtrace-ctf can be used to -inspect ctfa files, and the ctf_dump tool can be used to look at the ctf -files they contain. - -This process needs a pair of new files, objects.builtin (which lists all -object files that are unconditionally built into the kernel and cannot -be built as modules) and modules_thick.builtin, which maps from the thin -archives that make up built-in modules to their constituent object -files. Taken together, these files let dwarf2ctf determine whether a -given object file linked into vmlinux.o is part of a module, and if so, -which one. - -There is a single manually-maintained blacklist of structure members -dwarf2ctf cannot handle in scripts/dwarf2ctf/member.blacklist: this is -used to identify structure members which have different definitions in -different object files even though they are defined in the same location -in the same source file, usually due to preprocessor magic. (Currently, -the only item in this list is present for example purposes only, since -the file in question was recently removed from the kernel: dwarf2ctf can -these days identify most members needing blacklisting automatically, and -will fail with an error if it needs more help. It is quite possible that -dwarf2ctf will fail on make allyesconfig kernel configurations and other -extreme cases: I hope to track all such bugs down in time.) - -The documentation for dwarf2ctf is currently somewhat outdated: an -update is planned. It remains largely accurate except for some details -of the deduplication pass. - -This introduces new kernel build-time dependencies on elfutils, zlib, -glib, and the new libdtrace-ctf package (shared with DTrace userspace). -No new runtime dependencies are introduced. - -v5.6: retain the tristate machinery and scripts/Makefile.modbuiltin, - since there appears to be no other way to get modules_thick.builtin - (or anything like it) generated. - -Signed-off-by: Nick Alcock <nick.alcock@oracle.com> -Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com> -Signed-off-by: Tomas Jedlicka <tomas.jedlicka@oracle.com> ---- - .gitignore | 3 + - Documentation/dontdiff | 1 + - Documentation/dwarf2ctf | 1054 ++++++ - Documentation/kbuild/kconfig.rst | 5 + - Documentation/process/changes.rst | 23 +- - Makefile | 71 +- - lib/Kconfig | 12 + - scripts/Kbuild.include | 6 + - scripts/Makefile | 1 + - scripts/Makefile.modbuiltin | 60 + - scripts/Makefile.modfinal | 141 +- - scripts/dwarf2ctf/.gitignore | 1 + - scripts/dwarf2ctf/Makefile | 11 + - scripts/dwarf2ctf/dwarf2ctf.c | 4961 ++++++++++++++++++++++++++++ - scripts/dwarf2ctf/eu_simple.c | 2 + - scripts/dwarf2ctf/member.blacklist | 1 + - scripts/eu_simple.c | 356 ++ - scripts/eu_simple.h | 91 + - scripts/kconfig/confdata.c | 41 +- - scripts/move-if-change | 8 + - scripts/package/mkspec | 12 + - 21 files changed, 6850 insertions(+), 11 deletions(-) - create mode 100644 Documentation/dwarf2ctf - create mode 100644 scripts/Makefile.modbuiltin - create mode 100644 scripts/dwarf2ctf/.gitignore - create mode 100644 scripts/dwarf2ctf/Makefile - create mode 100644 scripts/dwarf2ctf/dwarf2ctf.c - create mode 100644 scripts/dwarf2ctf/eu_simple.c - create mode 100644 scripts/dwarf2ctf/member.blacklist - create mode 100644 scripts/eu_simple.c - create mode 100644 scripts/eu_simple.h - create mode 100755 scripts/move-if-change - -diff --git a/.gitignore b/.gitignore -index 72ef86a5570d..33bd38f6cdb0 100644 ---- a/.gitignore -+++ b/.gitignore -@@ -46,6 +46,8 @@ - Module.symvers - modules.builtin - modules.order -+modules_thick.builtin -+objects.builtin - - # - # Top-level generic files -@@ -55,6 +57,7 @@ modules.order - /linux - /vmlinux - /vmlinux.32 -+/vmlinux.ctfa - /vmlinux-gdb.py - /vmlinuz - /System.map -diff --git a/Documentation/dontdiff b/Documentation/dontdiff -index 72fc2e9e2b63..54fe19318ff3 100644 ---- a/Documentation/dontdiff -+++ b/Documentation/dontdiff -@@ -180,6 +180,7 @@ modpost - modules.builtin - modules.builtin.modinfo - modules.nsdeps -+modules_thick.builtin - modules.order - modversions.h* - nconf -diff --git a/Documentation/dwarf2ctf b/Documentation/dwarf2ctf -new file mode 100644 -index 000000000000..5cc445fbd227 ---- /dev/null -+++ b/Documentation/dwarf2ctf -@@ -0,0 +1,1054 @@ -+dwarf2ctf, a type encoder for the Linux kernel -+========= -+ -+Many kernel-level debugging and tracing systems need access to the kernel's type -+information. Since C doesn't support any form of introspection, the data must -+be extracted in some other way: here, we extract it from the DWARF debugging -+information generated by the compiler. Unfortunately, this information is very -+voluminous (just the type information alone adds up to a couple of hundred -+megabytes in a 'make allyesconfig' kernel): even if users are happy to spend the -+disk space, the time and memory required to read much of this information in is -+likely to be prohibitive. -+ -+This problem is not new -- back in 2004, Sun had the same problem when -+attempting to give DTrace a view of the type information in the Solaris kernel. -+Their solution was the Compact ANSI-C Type Format (CTF), a highly compacted -+representation of C types suitable for debuggers and tracers. They combined -+this with a highly efficient tool for converting DWARF2 types to CTF, and hacks -+in the Solaris kernel causing the kernel itself to emit CTF data for its own -+types. -+ -+Unfortunately while this tool may be highly efficient it is not adequate for the -+Linux kernel. It treats every ELF object as an independent entity with an -+independent set of types -- perfectly all right for the Solaris kernel with a -+few hundred modules maximum, but very much not for Linux, where distro kernels -+often compile in thousands of modules. Ideally, we would like to treat all -+kernel modules, built-in or not, the same way, sharing and deduplicating all -+globally-visible types across the entire set of visible modules and recording -+each precisely once. -+ -+We also want to collect descriptions of global variables and emit descriptions -+of their name->type mapping as well, since the kernel has no easily accessible -+ELF section we can extract this information from at runtime (kernel modules must -+be accessible at runtime for modern Linux systems to work, but the kernel itself -+could have come from over the network or off a USB key or from a non-mounted -+partition or an EFI boot partition or who knows where, and could have any name -+even if it is accessible: so tracing tools should not rely on being able to look -+inside the kernel image). -+ -+We do all this with dwarf2ctf, a CTF generation tool that reads in DWARF from a -+set of object files (usually, every object file in the kernel and all modules) -+and fills a directory with compressed files containing CTF representations of -+the types in those object files: the kernel build system regenerates these as -+necessary and links them directly into kernel modules. -+ -+Caveats: It is somewhat specific to the form of DWARF output emitted by GCC, and -+doesn't yet support DWARF-4 type signatures or compressed DWARF at all. -+ -+We'll look at each part of this system in turn, from the top down, starting with -+using the kernel type information dwarf2ctf produces in other programs. -+ -+ -+Using dwarf2ctf output -+---------------------- -+ -+Using this data is fairly simple. Once you've read the CTF sections from the -+kernel modules and inflated them (or ignored them if they are empty or, as just -+mentioned, one byte long), you simply need to look at the ctf_parent_name() for -+each module, and if it is set to "ctf", call ctf_import() to set the parent of -+this module to the CTF data you have read from the .ctf.shared_ctf section in -+the ctf.ko kernel module. The core kernel's types are stored in the -+.ctf.vmlinux section in the same kernel module, and all built-in kernel modules -+have their types in .ctf.$module_name. Non-built-in kernel modules just have a -+.ctf section containing their types, which again might need their parent set to -+"shared_ctf". (Out-of-tree kernel modules will have no such parent.) -+ -+Once you've set up the parenthood relationships you can call ctf_close() on the -+shared type repository and forget about it entirely: it will be refcounted and -+destroyed when all its children are closed. -+ -+ -+You should end up with a family of CTF files, one per kernel module built-in or -+not and one for the core kernel, freely usable for whatever purpose you need. -+ -+ -+Invocation and build-system connections -+---------- -+ -+dwarf2ctf's command-line syntax emphasises simplicity over compactness. Linux -+has nearly-infinitely-long command lines these days, so we can take advantage of -+this. -+ -+Two syntaxes are supported. The first shares types across multiple modules and -+the core kernel; the second is used for out-of-tree module building, and avoids -+either sharing anything at all across modules or depending on the set of shared -+types defined for the core kernel. -+ -+ -+dwarf2ctf outputdir objects.builtin modules.builtin dedup.blacklist \ -+ vmlinux.o module.o ... -+dwarf2ctf outputdir -e module.o ... -+ -+where: -+ -+ - 'outputdir' is the possibly-relative path to a directory in which the -+ generated CTF files get placed. -+ - 'objects.builtin' is the name of the file containing the object files that -+ correspond to always-built-in kernel code (that cannot be built as modules). -+ - 'modules.builtin' is the name of the file containing the names of -+ kernel modules presently built in to the kernel. -+ - 'dedup.blacklist' is a blacklist of modules that should never participate -+ in deduplication: see 'Duplicate type detection' below. -+ - the .o filenames are the names of object files comprising the kernel and/or -+ modules: you can feed in whole modules at once (before linking with .mod.o). -+ This list is often very, very long (I have seen command-lines in excess of -+ 60Kb). -+ -+dwarf2ctf's output consists of a series of gzip-compressed .ctf.new files in the -+outputdir, which the makefile compares with and if necessary moves over the top -+of .ctf files with the same basename, so as to avoid relinking things if -+dwarf2ctf has written out content identical to what it wrote last time it ran. -+These fall into several classes, partitioned according to the contents of -+objects.builtin and modules.builtin: -+ -+ - shared_ctf.builtin.ctf: The shared type repository. Types shared by more -+ than one of the files below go here. -+ libdtrace-ctf). See 'Using dwarf2ctf output' below regarding use of this -+ data. -+ - vmlinux.builtin.ctf: Types in the core kernel, that cannot be built in to -+ modules, go here. -+ - *.builtin.ctf: One of these is generated for the types in each module that -+ is presently built in to the kernel. -+ - *.mod.ctf: One of these is generated for each .ko. -+ -+All the files in the first three classes are linked into the ctf.ko module under -+various names, an empty module containing nothing but CTF data. -+ -+ -+A lengthy section of Makefile.modpost, and a short section of the toplevel -+Makefile, is dedicated to creating these files, and to linking them into the -+kernel modules. The dependency graph related to dwarf2ctf output is quite -+complex: modules and objects (ld -r'ed *.o files) are processed by dwarf2ctf to -+produce a number of files in the .ctf directory, and the final modules depend on -+the relevant ctf files. The .mod.ctf's go into the .ko's with the same stem -+name, but ctf.ko receives content from all the CTF files corresponding to -+built-in modules, and until dwarf2ctf runs and creates those files we cannot -+tell what those CTF files will be, though we do have a wildcard that matches -+them all. -+[ -+GNU Make's 'secondary expansion' feature comes to the rescue here: we can -+compute a list of expected CTF filenames at runtime, given the names of the -+modules we are linking in. For the builtin modules, we cheat and touch a stamp -+file after moving any .ctf.new files back over a .ctf file, then depend on that -+to see if ctf.ko needs to be relinked. -+ -+The actual incorporation of the CTF data into the kernel modules happens before -+module signing (if signing is active), by calling objcopy --add-section on the -+module in question. This too has some knotty corners. -+ -+First of all, the module linkage process normally links a module using all the -+prerequisites of the module's target -- but we have designated all the CTF files -+as prerequisites of the module's target, and we don't want to link them directly -+in using ld(1), since they aren't object files. So we have to filter them out in -+the link line. -+ -+Secondly, those modules which have no CTF files should acquire empty CTF -+sections to indicate their lack of unique types -- but objcopy in binutils 2.20 -+and below silently exits if asked to --add-section an empty file. So we use dd -+to generate a file with a one-byte null in it instead, and teach the users of -+CTF sections to treat a one-byte-long 'CTF' section as if it were empty. -+ -+ -+Overview of dwarf2ctf operation -+-------- -+ -+There are four phases to dwarf2ctf operation: initialization, duplicate type -+detection, CTF construction, and writeout. Some of these phases can repeat. -+All but the last phase consists purely of sucking data from object files into -+GHashTables in memory. (The last two phases could potentially be combined, -+shrinking the size of one hash and saving memory, but the hash that is shrunk is -+by no means the largest one, so the extra complexity is probably not worth it.) -+ -+dwarf2ctf uses several other libraries to do this: -+ -+ - elfutils, used for DWARF parsing. We could potentially write our own -+ DWARF parser, but elfutils works and is tested. -+ -+ - glib, used for the GHashTable. The rest of the kernel uses roll-your-own -+ hash tables, but dwarf2ctf makes heavy demands of its hashtables: they must -+ be expanding hashes capable of efficiently storing hundreds of thousands of -+ items, with amortized log(N) lookup time, and they must support deletion -+ (though it need not be particularly efficient deletion). This rules out -+ simple fixed-size bucket hashes like the ones used in other parts of the -+ kernel build system: GHashTable is already implemented, and works. -+ -+ - zlib, used to compress the CTF information. -+ -+ - libdtrace-ctf, which both reads and writes the CTF data. This is a port of -+ the Solaris CTF library, GPLed and with additional support for the storage -+ of name->type mappings (meant to represent variables) akin to its existing -+ ELF symbol->type mappings. -+ -+dwarf2ctf has a good few important data structures, described at the top of -+scripts/dwarf2ctf/dwarf2ctf.c. -+ -+dwarf2ctf has its own trace facility, implemented via the dw_ctf_trace() macro -+and enabled by compiling with -DDEBUG and setting DWARF2CTF_TRACE in the -+environment. (The first step is required because some very numerous data -+structures are greatly expanded when debugging is turned on, which would waste -+memory if it were done all the time). This produces a huge volume of trace -+output, several gigabytes when run over an allyesconfig kernel. -+ -+ -+Unless you're interested in how dwarf2ctf works internally, you can stop reading -+here. If you are interested, now is a good time to read the comments above -+main() in scripts/dwarf2ctf/dwarf2ctf.c, which briefly describe dwarf2ctf's data -+structures and functions. -+ -+ -+Flow of Control -+--------------- -+ -+The /* C comments */ point to other sections of this document, -+ -+Functions named in the /* Utilities */ section of dwarf2ctf.c are not mentioned -+here for simplicity's sake. -+ -+[C]: Callback -+[R]: recursive -+[1]: Numbers: Mutually-recursive loop -+|: Several functions which all call the same functions -+->: Call from array of callbacks (filter_ctf_*() omitted as uninteresting) -+ -+main() -+ /* See 'Initialization' */ -+ init_assembly_tab() -+ init_builtin() -+ init_dedup_blacklist() -+ init_member_blacklist() -+ run() -+ init_tu_to_modules() -+ init_ctf_table() -+ -+ /* Duplicate detection */ -+ -+ scan_duplicates() -+ process_file() /* Toplevel DWARF walkers */ -+[C] detect_duplicates_init() -+[R] process_tu_func() -+[C] assembly_filter_tab[] -+[C] detect_duplicates() -+[ 1] mark_shared() -+[R] type_id() /* Type IDs */ -+[C1] mark_shared() -+[R] mark_seen_contained() -+[C] detect_duplicates_done() -+ -+ process_file() -+[C] detect_duplicates_init() -+[R] process_tu_func() -+[C] assembly_filter_tab[] -+[C] detect_duplicates_alias_fixup() -+[R] type_id() -+[C] is_named_struct_union_enum() -+[R] type_id() -+[C] detect_duplicates_alias_fixup_internal() -+ mark_shared() (see above) -+[C] detect_duplicates_done() -+ -+ /* CTF construction */ -+ -+ process_file() -+[R] process_tu_func() -+[C] assembly_filter_tab[] -+[C] construct_ctf() -+[ 2] construct_ctf_id() -+[R3] die_to_ctf() -+ assembly_tab[] -+[C] -> assemble_ctf_base() -+ -> assemble_ctf_pointer() -+ | assemble_ctf_array() -+ | assemble_ctf_array_dimension() -+ | assemble_ctf_typedef() -+ | assemble_ctf_cvr_qual() -+ | assemble_ctf_variable() -+ lookup_ctf_type() -+[ 2] construct_ctf_id() -+ -> assemble_ctf_enumeration() -+ -> assemble_ctf_enumerator() -+ -> assemble_ctf_struct_union() -+ -> assemble_ctf_su_member() -+[ 3] die_to_ctf() -+[ 2] construct_ctf_id() -+ -+ write_types() -+ -+Initialization -+-------------- -+ -+ init_assembly_tab() -+ init_builtin() -+ init_dedup_blacklist() -+ run() -+ init_tu_to_modules() -+ init_ctf_table() -+ -+This happens at the top of main() and run(), and in various functions named -+init_*(). Of these, init_assembly_tab() and init_builtin() serve only to turn -+various static arrays and files mentioned on the command line into more useful -+internal representations (e.g. the assembly filter array of structures is turned -+into a pair of arrays indexed by DWARF tag), and the blacklisting functions are -+described in the section on duplicate type detection below. -+ -+init_ctf_table(), called both at initialization time and later during CTF -+assembly when new CTF files are found to be needed, creates a new CTF file in -+memory and either marks it as a child of the shared type repository, or (if it -+*is* the shared type repository, or deduplication is off and there is only one -+CTF file being processed and no shared type repository at all) creates a few -+types in it which CTF has representations of but DWARF does not: a void type, -+and a generic catchall pointer-to-function-returning-int. -+ -+That leaves init_tu_to_modules(). This walks over all the top-level -+compile_unit DIEs in the DWARF debugging information in every object file -+mentioned in the list of modules and built-in modules, constructing a mapping -+from translation unit name back to the name of the kernel module it comes from, -+even if that module is built in to the kernel. This is normally the same as the -+filename (sans extension), but for built-in kernel modules, the name comes from -+the modules.builtin file's entry for the translation unit instead, so that the -+output can land in a .builtin.ctf file rather than being jammed into -+vmlinux.builtin.ctf with the core kernel's types. -+ -+This means that dwarf2ctf can operate in terms of the kernel module a type is -+contained within rather than having to think about the mapping between object -+file name, translation unit name and module name all the time. -+ -+ -+Toplevel DWARF walkers -+---------------------- -+ -+ process_file() -+[C] (per-TU initialization callback) -+[R] process_tu_func() -+[C] assembly_filter_tab[] -+[C] (per-DIE callback) -+[C] (per-TU cleanup callback) -+ -+All routines in dwarf2ctf other than initialization and writeout are DWARF -+walkers: i.e., they walk over all DWARF DIEs in all object files specified on -+the command line and do something with every DIE. This job is done by -+process_file() and its helper process_tu_func(), which not only digs out the -+corresponding (built-in or non-built-in) module name corresponding to each -+object file, but also detects and skips translation units it has handled before -+(in case they are incrementally linked into multiple object files) and allows -+callbacks to be invoked at the start and end of each translation unit. -+ -+Even though dwarf2ctf only cares about top-level types, in some situations DWARF -+can emit top-level types with references to a non-top-level type: if all -+occurrences of the top-level type are an opaque structure, and the only -+non-opaque definition is inside a function, references in the same translation -+unit as the non-opaque definition will point to the definition inside the -+function (and references outside the translation unit will not point at any -+definition). Thus, if we want to catch all nuances of globally-visible types, -+we have to scan types inside functions and lexical blocks inside functions too. -+ -+To avoid generating a vast number of unnecessary type definitions, the 'assembly -+table' which describes how to construct a CTF type given a DWARF DIE also -+contains a description of a set of filters which are passed the current DIE and -+its parent: if they return false, the DIE is skipped and never passed to the -+callback function. We also avoid calling the callback for any DWARF DIE whose -+tag doesn't appear in the assembly table at all: there's no point doing -+duplicate detection or anything else for a DWARF DIE we won't be generating CTF -+from. There are currently two filters defined: filter_ctf_file_scope(), which -+is called for every DWARF DIE whose tag is one we never expect to see a -+reference to if it is inside a function (except if they relate to a structure or -+union, as above), and filter_ctf_uninteresting(), which is called for variables -+to see if they are worthy of recording (top-level named variables with external -+linkage not part of the internal workings of macros only). -+ -+ -+Type IDs -+-------- -+ -+[R] type_id() -+[C] (optional per-type callback) -+ -+The only thing dwarf2ctf does which the Sun tool does not is the detection of -+duplicate and shared types, both within individual kernel modules and across -+modules. Our ultimate goal is that a type that appears in the source code once -+appears in the CTF output once as well. This goal has mostly been attained, -+except for out-of-tree modules, where cross-module type sharing must be disabled -+to avoid requiring rebuilds of the module whenever the core kernel is rebuilt. -+ -+The core of this is the concept of a *type ID* and the function type_id() which -+computes it. A type ID is an identifier for a type which precisely represents -+that type and only that type. Doing this for types in different headers or at -+different scopes with the same name without needing to encode knowledge of C -+scoping rules into dwarf2ctf is an interesting proposition: we can use the line -+number and filename info provided by DWARF in most user-specified types to help. -+ -+A type ID is a recursively-constructed string of the following form (fixed -+elements represented by {}, optional elements by []): -+ -+//[filename]//[line number]//{type string} -+ -+Types are *based* upon other types iff they have a DW_AT_type attribute pointing -+to some other type. All types based upon other types have a type ID that is the -+type ID of the type upon which they are based, with additional information -+specific to this type appended to it. The filename and line number is only -+added for those types which are not based upon other types and which have a -+filename and line number in the DWARF (lots don't, e.g. base types): the -+filename is canonicalized with realpath(), though since this is quite slow and -+type_id() is called a lot, the mapping from DWARF filename to realpath() result -+is cached. Types that have no filename or line number start with '////'. -+ -+We use // to separate the filename and line number elements because this is the -+shortest string other than NUL that cannot appear in a canonicalized POSIX -+pathname (ignoring Pyramid, Cygwin and other strange systems that actually -+return // in the result of realpath(): Linux doesn't use it and that's all that -+matters. Should it start to use it, we can switch delimiter to ///.) -+ -+Function pointers are not represented (or, rather, are all mapped to the same -+type ID, the generic catchall function-pointer type mentioned above); array -+dimensions are represented by [index-type dimension], or [] for flexible array -+members. Structure members are not represented, since they are not types, but -+the types of their members *are* represented, as are nested structures (the line -+number and filename serving, as ever, to disambiguate them from other structures -+with the same name declared nested inside different structures). -+ -+The following are some examples of valid type IDs (assuming the kernel source -+tree is, implausibly, located at /k/, just off the root directory: comments on -+individual types done /* like C */; the last example is broken across lines for -+formatting's sake): -+ -+//fp//* # a pointer to a function, any function -+////long int -+////char [] -+////unsigned int typedef __kernel_uid_t typedef __kernel_uid32_t -+//fp//* typedef __signalfn_t * typedef __sighandler_t -+////struct nsproxy /* an opaque type */ -+////struct nsproxy * # /* pointer to it */ -+////long unsigned int typedef u64 volatile -+////long unsigned int volatile const * const -+////long unsigned int typedef sector_t [////long unsigned int 511] -+/k/include/linux/types.h//222//struct list_head -+/k/include/linux/types.h//222//struct list_head * -+/k/include/linux/types.h//222//struct list_head [////long unsigned int 5] -+/k/include/linux/types.h//217//struct /* no struct tag */ -+/k/include/linux/types.h//217//struct typedef atomic64_t -+/k/include/linux/mm_types.h//34//struct page * typedef pgtable_t -+/k/fs/eventpoll.c//122//struct nested_calls -+/k/include/linux/sysctl.h//1016//struct ctl_table typedef ctl_table -+ [////long unsigned int 4] var inotify_table /* A global variable */ -+ -+This scheme means that cv-quals and other modifiers applied to other types are -+always merged: if there are a dozen typedefs for a single type 'foo' with the -+same name declared in the same place, they all end up with the same type ID and -+are only emitted into the CTF once. -+ -+The type_id() function can also accept a callback, which is called as the -+recursion unwinds, from base type up to derived type: so it might be called for -+"////unsigned int", then for "////unsigned int typedef __kernel_uid_t", and so -+on up to the DIE that was originally passed in. Because type_id() returns a -+dynamically-allocated string, calls to type_id() made purely for the sake of -+invoking a callback are normally of the peculiar form "free(type_id(...))". -+ -+type_id() is a very hot spot, so syscall results are cached in it (such as -+realpath(), as mentioned above), and when string appending is done, it is done -+all at once where possible, via str_appendn(), which calls realloc() only once -+no matter the number of strings being appended. -+ -+Lots of core data structures in dwarf2ctf consist of hashes mapping type IDs to -+something else (predominantly CTF file/ID pairs and module names). It would -+be possible to map from hashes of type IDs, saving some memory, but this would -+impair debugging so is not yet implemented. (If it is implemented, it should -+probably be implemented only when DEBUG is not defined.) -+ -+ -+Duplicate detection -+------------------- -+ -+ scan_duplicates() -+ process_file() /* Toplevel DWARF walkers */ -+[C] detect_duplicates_init() -+[R] process_tu_func() -+[C] assembly_filter_tab[] -+[C] detect_duplicates() -+[ 1] mark_shared() -+[R] type_id() /* Type IDs */ -+[C1] mark_shared() -+[ 4] type_id() -+[ 4] detect_duplicates_typeid() -+[ 4] detect_duplicates() -+[R] mark_seen_contained() -+ member_blacklisted() -+[C] detect_duplicates_done() -+ -+ process_file() -+[C] detect_duplicates_init() -+[R] process_tu_func() -+[C] assembly_filter_tab[] -+[C] detect_duplicates_alias_fixup() -+[R] type_id() -+[C] is_named_struct_union_enum() -+[R] type_id() -+[C] detect_duplicates_alias_fixup_internal() -+ mark_shared() (see above) -+[C] detect_duplicates_done() -+ -+The job of the duplicate detection pass is to fill out the id_to_module hash, -+which maps type IDs to the module they appear in, with the two special cases -+that types that appear only in the core kernel are said to appear in the module -+'vmlinux', and types that appear in more than one module (or in a module and in -+the core kernel) are said to appear in the module 'shared_ctf', the shared type -+repository. This is quite a tricky multi-pass process, because we must ensure -+that the shared type repository is self-contained: all types in the repository -+must not reference any types outside the repository. -+ -+Detecting duplicates itself is easy: we consider two types duplicates if they -+have the same type ID: if they both reside in the same module, the resulting -+type resides in that module too. However, detecting shared types is harder. -+We consider that a type belongs in the shared module if any of these conditions -+is true: -+ -+ - the type appears multiple times in different modules -+ - a type for which this type is a base type is shared -+ - the type is referenced by a structure or union member, and the structure -+ or union is shared -+ - the type is a non-opaque type with an opaque variant ('struct foo'), or -+ vice versa, and either of these variants is shared: these two types will -+ get different type IDs, so explicit checking is necessary -+ -+Note that we do *not* consider a type to belong in the shared module merely if -+*it* has a base type which is shared: indeed, this is the common case for -+unshared types (even unshared structures tend to have fields of shared types -+like int). -+ -+It should be fairly easy to see that sharedness is a contagious property: -+e.g. if you mark a structure as shared, and one of its members is an -+otherwise-unshared opaque pointer to a structure, you have to mark that as -+shared: this causes the non-opaque definition of the structure, and all *its* -+members, to be shared, and so on. So since dwarf2ctf does not track the members -+of structures itself (not until the CTF generation phase, anyway), this means -+walking over the DWARF DIEs multiple times, checking for sharedness over and -+over until we are done. -+ -+We partition the problem into two parts, both of which are carried out by -+process_file() callback functions: detect_duplicates() and -+detect_duplicates_alias_fixup(). -+ -+ -+detect_duplicates() is called first, once for every DIE in the kernel (via -+process_file()). This identifies types that are duplicated but not shared, and -+identifies shared types without consideration of opaque struct/union aliasing. -+It also flags types that have been seen only once as 'seen': this is checked -+much later on by the CTF construction phase, since construction of CTF for -+any type which has not been inspected by the deduplicator is a sign of a bug in -+the deduplicator. -+ -+This has several subtleties. -+ -+If we are running for an out-of-tree module, we must still identify types as -+duplicated within the module, but must never mark them as shared: out-of-tree -+modules cannot contribute to the shared type repository nor even use types in -+it, since they are rebuilt independently from the kernel proper and thus cannot -+depend on a type currently in the repository remaining there (e.g. perhaps it -+has only two users, both in modules, and the kernel is rebuilt to not build one -+of those modules anymore: this should not require rebuilding of any out-of-tree -+modules). -+ -+If we mark a structure or union type as seen, we must mark aggregate types that -+appear directly within that type's DIE as seen as well. This is done by the -+recursive function mark_seen_contained(). You might wonder what the point of it -+is: such types surely cannot appear anywhere else, and any duplication will -+precisely match the duplication of the containing type. The answer is that they -+can still be referenced as the type of structure members of their containing -+structure, e.g. in -+ -+struct foo { -+ struct bar { -+ } *baz; -+ struct bar wombat[16]; -+}; -+ -+Here, a reference to 'struct bar' appears in 'struct foo', and CTF is -+constructed for it, even though it is not a top-level DIE. In GCC 4.8+, a -+reference to the 16-element array-of-struct-bar can also appear in 'struct foo': -+in fact almost anything can appear in there if used nowhere else in the -+translation unit, even base types. So we look for the appearance of anything -+which we can assemble into CTF (anything in the assembly_tab) other than -+members, since members cannot be used as the type of anything else, and mark -+them all as seen in this module. (Nearly everything in a structure or union is -+a member, so this ends up skipping almost but not quite everything.) -+ -+ -+If we find that a type has appeared more than once in different kernel modules -+(or in a module and in the core kernel), we must mark it as shared. This is -+done via mark_shared(), which is both a function that can be directly called -+(e.g. from detect_duplicates()) and a type_id() callback. If it is called -+directly, it immediately reinvokes itself as a type_id() callback, which calls -+it for the base type of the type in question and then for all qualifiers up the -+type ID stack, marking them all as shared if they weren't already. -+ -+If a structure or union is marked as shared, the types of its members are also -+marked as shared via a recursive call (even if they have already been so marked: -+just because this structure is of a type we've already seen, in a location we've -+already seen, doesn't mean that someone might not have legitimately used -+#defines to add extra members to the end of it, and we need to mark them as -+shared too). We track structures that have been seen in this translation unit -+and avoid recursing into them, to avoid an infinite loop in cases like this: -+ -+struct one; -+struct two { -+ struct one *foo; -+}; -+ -+struct one { -+ struct two *foo; -+} -+ -+The types being pointers does not help here -- the marking of 'struct one *' as -+shared will automatically mark 'struct one' as shared too, because otherwise we -+might have a structure in the shared type repository whose members' types -+could not be found there. -+ -+ -+The second pass is the 'alias fixup' pass, implemented by -+detect_duplicates_alias_fixup(). This pass serves to detect unshared opaque -+types whose non-opaque equivalents are shared, and vice versa. It is executed -+repeatedly until no types have been marked as shared for an entire iteration, -+but is considerably faster per iteration than the first pass, which often -+consumes more than half of dwarf2ctf's total runtime. We work in one direction -+only, looking for non-opaque structures, unions or enums which have structure -+tags. (Structures without tags cannot have opaque variants, and structures -+which are opaque will have non-opaque cousins somewhere, or can be emitted to -+the CTF as an opaque structure harmlessly since they truly have no members and -+are probably manipulated only via casts.) -+ -+We identify structures, unions or enums with tags via the type_id() callback -+is_named_struct_union_enum(), but cannot determine if something is an opaque -+structure at this stage. Instead, we do that after the callback, checking to -+see if the first four characters of the type ID are "////": this relies on the -+fact that GCC never gives opaque structures line numbers in DWARF. We do the -+actual checking and marking of each non-opaque structure using -+detect_duplicates_alias_fixup_internal(), which is yet another type_id() -+callback. -+ -+This function directly synthesises the name which this structure's opaque cousin -+would have, if it existed, by stripping off the line number and filename and -+replacing them with '////', and sees if either of these types have been marked -+as shared while the other has not. If the opaque type is shared, the non-opaque -+variant can be marked shared using the same recursive mark_shared() function as -+before (thus marking the types of all its members and types it depends upon as -+shared too). If it is the opaque type that needs marking shared, this will not -+work, since mark_shared() takes a DWARF DIE, and we don't have one for the -+opaque type, just a faked-up type ID. However, since an opaque type doesn't -+have any members that need recursively tracing, we don't need access to its -+DWARF DIE to figure them out, and can just mark it as shared directly, via an -+insert into the id_to_module hash. -+ -+Since this function is a type_id() callback, it is called not just for -+structures but for types based on them (e.g. in type_id form, "struct foo const -+* [43] volatile *"); at every level of this declarator stack, a shared opaque -+base type will contaminate its non-opaque cousins with sharedness, and vice -+versa. This handles situations in which, say, the opaque version of "struct foo -+const * [43]" was used by more than one module and was marked as shared: the -+marking process will have marked the opaque versions of "struct foo const * -+[43]", "struct foo const *", "struct foo const" and "struct foo" as shared, but -+will not have touched any non-opaque versions of these types which may exist -+until this routine runs. It also handles typedefs to structures with no need -+for any extra code. -+ -+This whole alias fixup process needs to be repeated, because whenever a -+non-opaque type is marked as shared and its member's types traced and marked -+shared, *those* may themselves be structure types with corresponding opaque or -+non-opaque variants, and when they are opaque types the non-opaque variant that -+alias fixup works from may already have passed under the DWARF walker's gaze: so -+another pass over the kernel's DWARF is necessary to be sure we catch it. -+mark_shared() thus sets a flag that scan_duplicates() recognizes and uses to -+trigger another run through the alias fixup pass. -+ -+ -+There are a very few modules that this algorithm doesn't work for. One example -+is snd-ens1371, which reads, in toto -+ -+#define CHIP1371 -+#include "ens1370.c" -+ -+and ens1370.c (itself a distinct kernel module) then defines a 'struct ensoniq' -+whose members vary depending on whether CHIP1371 is defined. Obviously, it is -+impossible to share any such types between kernel modules even though their -+names are the same and they are defined in the same place in the same source -+file in both cases. But this sort of trickery is very rare, so we simply -+implement a 'deduplication blacklist' of modules which will not introduce new -+types into the shared CTF repository, and who do not participate in alias fixup -+detection either. Detecting these cases in order to blacklist them is harder: -+no automated system has yet been implemented, although instances where #defines -+of this nature introduce new types that are then used by later members will -+cause assertion failures inside dwarf2ctf which might be a clue. So it is -+possible that some examples have been missed. (The blacklist only applies to -+cases where structure members change within a single kernel build, so cases -+where structures have members whose presence depends on CONFIG_* values are -+quite all right, as are cases where #defines are introduced by one translation -+that #includes another that then goes on to define whole new structures: it is -+only cases where modules #define something that changes the definition of -+individual possibly-shared types that will need blacklisting.) -+ -+ -+There are a few even worse cases where a single structure is defined with -+different members in different translation units within a single module. In -+this case we can do nothing at all, since our output representation describes -+only a single type per module: we implement a 'member blacklist' which bans -+emission of affected members entirely, leaving a description of a structure with -+an undescribed hole in it. -+ -+ -+CTF construction -+---------------- -+ -+ process_file() -+[R] process_tu_func() -+[C] assembly_filter_tab[] -+[C] construct_ctf() -+[ 2] construct_ctf_id() -+[R3] die_to_ctf() -+ assembly_tab[] -+[C] -> assemble_ctf_base() -+ -> assemble_ctf_pointer() -+ | assemble_ctf_array() -+ | assemble_ctf_array_dimension() -+ | assemble_ctf_typedef() -+ | assemble_ctf_cvr_qual() -+ | assemble_ctf_variable() -+ lookup_ctf_type() -+[ 2] construct_ctf_id() -+ -> assemble_ctf_enumeration() -+ -> assemble_ctf_enumerator() -+ -> assemble_ctf_struct_union() -+ -> assemble_ctf_su_member() -+ member_blacklisted() -+[ 3] die_to_ctf() -+[ 2] construct_ctf_id() -+[C] cleanup_sou_member_count() -+ -+The next stage after the detection of duplicate and cross-module shared types is -+to generate CTF. We generate all CTF at once before emitting it: this is -+potentially somewhat wasteful of memory, but in practice has not proved to be a -+problem: substantially less memory is used than is used by other parts of the -+kernel build, unless -DDEBUG is enabled. Its job is to look through the -+kernel's type DWARF (via process_file(), as usual) and create CTF for every -+file-or-global-scope type and every externally-visible variable in the CTF file -+in which the duplicate detection pass has said that type should appear. -+(Variables are treated exactly like types: it just so happens that they are -+never shared because no type or variable can depend upon them, so they always go -+directly into the appropriate module and never into the shared type repository.) -+ -+At this stage, the 'CTF files' are not actually files but rather ctf_file_t -+structures maintained by libdtrace-ctf and tracked in the per_module hash, along -+with other information which varies by module name. We track every single -+individual type in the CTF file in the id_to_type hash, which maps type IDs to -+pairs of (CTF file ID, ctf type ID): this lets us use the type IDs described -+above when considering cross-references within CTF files (e.g. from one CTF type -+to a type it depends upon). -+ -+The most important functions in this phase are: -+ -+ - construct_ctf_id(), the top-level process_file() callback which is given a -+ DWARF DIE, looks in module_to_ctf_file for the CTF file where this type -+ should land (creating it if necessary), makes sure a type with this type ID -+ has not already been created there, calls die_to_ctf() to create the CTF, -+ notes where it was created in id_to_type, and handles errors. -+ -+ - die_to_ctf(), a recursive function which calls the assembly function for the -+ DIE it is given and all its immediate children, with special-case handling -+ for tagged structures and unions. If you want to create a type but not note -+ where it was created for future lookups by lookup_ctf_type(), this is what to -+ call. (This is only done currently for unnamed structures/unions.) -+ -+ - lookup_ctf_type(), which is called by CTF assembly functions for those -+ types that depend upon other types: it calls construct_ctf_id() again -+ to construct the type, and double-checks that all such types appear -+ either in the module we are constructing types for, or in the shared CTF -+ module. CTF represents all function pointers the same way, and has a -+ special type ID for 'void', so we special-case both of these cases. -+ -+These functions are mostly straightforward (though highly recursive, with all -+three plus CTF construction functions participating in loop 2 above, -+die_to_ctf() calling itself directly, and even one situation, the -+already-mentioned unnamed structures/unions, in which die_to_ctf() is directly -+called back by a CTF construction function, in loop 3 above.) -+ -+ -+There are a few subtleties, though. Firstly, error handling. We consider that -+errors that will lead to unusable CTF are fatal: mostly, these are errors where -+a bug in the deduplicator has failed to trace types correctly and has left at -+least one type in the shared module depending on a type in a non-shared module, -+or has failed to mark a type as shared at all. In all these cases, you'll -+eventually get an error from lookup_ctf_type() of the general form -+ -+blah.c:413:foo_t: Internal error: lookup of flob found in different file. -+ -+The first two parts of this error are the translation unit and line number the -+type being assembled (usually a structure or union) was found in: foo_t is the -+name of the type being assembled. The type of the structure being looked up -+appears nowhere, because we don't know it, but the name of the member is given -+("flob" above, or "(unnamed)" if we don't know it). If you want more -+information, you can pass -DDEBUG to the compilation of dwarf2ctf.c in -+scripts/dwarf2ctf/Makefile, and rerun dwarf2ctf, and you'll get the module and -+filename in which both the originating and the target types appear. In order to -+actually track down the bug you'll probably have to run dwarf2ctf with -+DWARF2CTF_TRACE set in the environment, and look at the place where the target -+type was deduplicated, and try to figure out why the deduplicator didn't trace -+the reference to the type in foo_t correctly. -+ -+There is another kind of error, though: a failure to assemble a single type, -+perhaps because DWARF was emitted that we don't know how to understand (this is -+particularly likely in structure assembly, where we are highly dependent on the -+form of the DWARF that GCC happens to emit for DW_AT_member_location). We pass -+an 'enum skip_type' around, which has three possible values, one of which is -+SKIP_ABORT. Before each type is assembled, we call ctf_snapshot() to take a -+snapshot of the variable-plus-type set in the CTF file we're working over. If a -+SKIP_ABORT propagates up to construct_ctf_id(), we call ctf_rollback(), which -+throws away every type constructed since the last ctf_snapshot() -- i.e., the -+specific erroneous type we've just been working on. (We might have emitted some -+parts of it and then failed, so we should try to clean up). -+ -+A SKIP_ABORT is not fatal unless DEBUG is defined: its only effect is to omit -+one single type from the resulting CTF, which is probably still usable. -+ -+libdtrace-ctf causes additional problems here. It can only see the types we -+added once the notably expensive function ctf_update() is called. This takes -+the in-memory structures and serializes them (all of them, every time). This -+only affects libctf when structure and union members are added: libctf needs to -+know the sizes and alignments of the types of those members, which might quite -+possibly just have been added, e.g. if this structure contains a pointer to its -+own structure tag. So, when we insert a member in assemble_ctf_su_member(), we -+note a bad type-ID error and do a ctf_update() on the file we're working over -+and try again: even then that can fail if the type was added to the shared -+repository, so we do a ctf_update() on *that* and try again, and only if that -+fails do we declare a SKIP_ABORT error. (We check the shared repository last -+because it is very large, so takes longer to serialize than other CTF files do). -+ -+The need to keep the number of calls to ctf_update() down means we must avoid -+all access to the CTF types we are assembling if we can possibly get at the same -+data another way. Hence the member_counts hash, a member of the per_module -+state, which tracks the number of members in structures with a given C-style -+name and their CTF IDs. This structure allows us to handle the (valid) C idiom -+of redeclaring the same structure with a different number of members, merging -+the definitions across translation units and discarding them (iff the structure -+was unshared) when we transition into a new module, without ever having to -+consult the CTF to see how many members we put into it. (We have to use the -+C-style name here, because by definition the type IDs of such redeclared -+structures will be different, since a type ID contains a line number and -+translation unit name.) -+ -+ -+There's more error-handling complexity inside die_to_ctf(), where errors from -+libdtrace-ctf are actually reported (there may be multiple of them for a single -+type, e.g. if we are assembling a structure and several members somehow refer to -+a type we do not know about). -+ -+die_to_ctf() itself has the sort of parameter list that can make people swear -+off C for life. It is largely explained in the description of ctf_assembly_fun. -+Most parts of it are hardly used in the function itself, just passed down to CTF -+assembly functions. -+ -+Finally, we must note the override flag. Both die_to_ctf() and -+construct_ctf_id() return a CTF ID. This is thrown away by the DWARF walking -+code (the function construct_ctf() exists just for that purpose), but when -+called by lookup_ctf_type(), this CTF ID is taken to be the single ID of the CTF -+type that's just been assembled. Normally this is the same as the CTF ID -+returned by the CTF assembly function for the top-level DWARF DIE, but there are -+a few structures for which we want to return the result of some other CTF -+assembly function. -+ -+The only currently-existing example is array dimensions, which DWARF represents -+as a typed array DIE whose child is a dimension, but which CTF represents as an -+array-with-dimensions that you can't change afterwards. We can't assemble an -+'array' at the top level because we don't know how big it is, but we have to -+track the type recorded there somehow. We handle this by having -+assemble_ctf_array(), the assembly function for the top-level DW_TAG_array_type -+DIE, simply look up the type of the array's members and return its ID as if it -+had just constructed it, after which assemble_ctf_array_dimension(), the -+assembly function for DW_TAG_subrange_type, actually constructs the array, -+wrapping it around the CTF 'ID' 'assembled' by the parent and setting the -+override flag to make sure that this is what is really recorded. -+ -+ -+CTF construction functions -+-------------------------- -+ -+Each CTF construction function takes a single DWARF DIE and turns it into CTF, -+somehow. They are laid out in the assembly table described in 'toplevel DWARF -+walkers' above. They all start the same way, with a series of CTF_DW_ENFORCE or -+CTF_DW_ENFORCE_NOT assertions. These guard against corrupted DWARF missing some -+of the attributes we need, or DWARF containing attributes which indicate that we -+can't handle the content (e.g. DW_AT_signature or DW_AT_specification on -+structures, which would both indicate this is DWARF 4, which we can't handle -+yet.) -+ -+We'll go through these functions one by one, pointing out anything that -+maintainers should be aware of. -+ -+ -+assemble_ctf_base() assembles all integral base types (DW_TAG_base_type) and -+transforms them into the corresponding CTF type. The functions we need to call -+for this in the CTF API all have the same type signature but have different -+names; CTF also distinguishes between the various differently-sized -+floating-point types, so we must figure out from the type size which type a -+given DWARF base type is referring to. We map from DWARF encoding to a triple -+of (CTF addition function, CTF integral type, type size) where the latter is -+optional and depends on the size of the DWARF type we are encoding and the size -+of various floating_point types on the current system. (This does mean that -+cross-compilation using dwarf2ctf is likely to fail fairly often: we need -+machinery to determine the sizeof() types on the target system before that can -+function.) -+ -+This sizeof()-based search procedure is why we do not currently support -+DW_AT_bit_size for base types: we could easily support it for sizes modulo 8, -+but GCC happens to emit DW_AT_byte_size in this case. In C DW_AT_bit_size is -+likely to be emitted only for bitfields in structures anyway, not for base -+types. -+ -+ -+assemble_ctf_pointer() and assemble_ctf_typedef() are trivial: look up the -+associated type with lookup_ctf_type() and assemble the appropriate thing. -+assemble_ctf_cvr_qual() is almost as trivial, but has to figure out which of -+const, volatile or restrict it was called for and call the corresponding CTF API -+function. assemble_ctf_enumeration() and assemble_ctf_enumerator() are quite -+simple too. -+ -+ -+assemble_ctf_variable() has a couple of extra complexities: we unconditionally -+set the skip parameter to SKIP_SKIP, suppressing recursion into containing DIEs, -+since we already know we won't care about any of them. Also, while the -+deduplication pass unifies opaque and non-opaque structures into the same type, -+it never makes sure that variables declared in the same header by translation -+units which have opaque versus non-opaque structures in scope are deduplicated. -+e.g. you could well end up with these two type IDs, depending on whether -+<linux/pid_namespace.h> was included before <linux/pid.h> in a given translation -+unit: -+ -+////struct pid_namespace var init_pid_ns -+/path/to/kernel/include/linux/pid_namespace.h//19//struct pid_namespace var init_pid_ns -+ -+These variables both refer to the same type, but deduplicating them would -+require an additional deduplication pass. Since variables are always terminal -+and nothing can refer to them, nothing will ever look up any of those type IDs -+(since the only thing that looks up type IDs is code that is searching for type -+that other types depend on). So we don't care about this duplication and -+running an additional deduplication pass to eliminate it would slow down -+dwarf2ctf to no good end. It's better just to ignore duplicate errors from -+ctf_add_variable(). -+ -+ -+assemble_ctf_array() and assemble_ctf_array_dimension() we talked about -+above. One last subtlety remains, which is that figuring out the actual -+dimensionality of an array is complicated enough that it has been hived off into -+a private_subrange_dimension() function, called both from here and from -+type_id(). Arrays with neither a DW_AT_upper_bound nor a DW_AT_count, and -+arrays without an indexing type, are best considered flexible arrays; arrays -+whose upper bound or count is not unsigned or signed integral data are also -+flexible (perhaps they're using a full-blown location list, but we can't encode -+that in CTF so we treat it as flexible); and if an upper bound is used, we want -+to add one to its value before treating it as a count of elements. -+ -+ -+This leaves structure/union assembly, both of which are assembled by the same -+pair of functions, assemble_ctf_struct_union for the type itself and -+assemble_ctf_su_member() for the individual members. As with -+assemble_ctf_cvr_qual(), we have to look at the tag to figure out which CTF -+function to use to do the assembly, but we have an extra constraint: it is -+perfectly idiomatic C to declare a structure repeatedly with a different number -+of members every time. This is perfectly permissible as long as the leading -+portions of all declarations match. We do not verify this (we hope that the -+compiler will diagnose it, which it will unless the conflicting declarations -+cross modules), though perhaps we should: we simply look up the structure in the -+CTF and the DWARF and skip assembly of the structure members via SKIP_SKIP if -+the already-assembled structure has at least as many members as the current one. -+ -+assemble_ctf_su_member() is by far the most complex of the assembly functions. -+It has to handle members that already exist, members that need assembly, members -+that correspond to unnamed structure members, numerous different ways of -+representing structure offsets and members with no offset at all. -+ -+The offset computation is quite laborious and by no means complete: a complete -+implementation would require an interpreter for DWARF location lists, which is -+total overkill given that in DWARF2 GCC emits a totally stereotyped location -+list, and in DWARF3+ we don't need location list parsing at all. CTF wants an -+offset in bits. -+ -+We have five cases: -+ - for DW_AT_data_bit_offset, we just use the offset unchanged. -+ -+ - for DW_AT_data_member_location with an integral form (data2, data4, data8, -+ udata, or sdata) we just look it up and multiply it by eight, adding the -+ parent's DW_AT_bit_offset to handle structures nested inside other -+ structures. -+ -+ - for DW_AT_data_member_location with a block form, we make sure that the list -+ is of one particular simple form (DW_OP_plus_uconst and a constant value in -+ bytes), and abort assembly otherwise. The only case I know of where this -+ test will trip is C++ virtual bases: if people are using C++ code with -+ virtual bases inside the kernel they deserve sympathy, but probably not -+ support in the code. CTF can't represent C++ types in any case. -+ -+ - for expression location lists, or anything else that we don't understand, we -+ simply die (we could simply skip the type, but this seems serious enough that -+ dying is warranted). -+ -+ - with none of these present, we have no offset: the member is at the same -+ location as the start of the structure. -+ -+But where is the 'start of the structure'? That depends on whether this is an -+unnamed struct/union member (usually a union). If it is, we want to fold all -+its members directly into the parent structure, with their offsets increased by -+the offset of the unnamed member as a whole. This is done by directly calling -+die_to_ctf() with the first child of the anonymous member's type and with all -+other parameters set as if the parent DIE was the current structure, thus -+fooling die_to_ctf() into believing that these members are members of the -+current structure, not of the anonymous one. The offset-increasing magic is -+done via the parent_bias parameter to die_to_ctf() and all the CTF construction -+functions: it is ignored by all of them except for assemble_ctf_su_member() -+itself, which adds the parent bias onto the normally-computed offset, and is -+otherwise passed down unchanged to all children. This means that even this -+terribly contrived case works: -+ -+struct horror { -+ int spacer; -+ union { -+ struct { -+ int spacer; -+ struct { -+ int foo; -+ int bar; -+ } b; -+ } a; -+ }; -+}; -+ -+In this situation, horror.a.b.bar may have: -+ -+ - a nonzero parent_bias due to the offset of the anonymous union in 'struct -+ horror' -+ - a nonzero offset due to the offset of 'bar' in its containing structure -+ - if DW_AT_data_member_location with integral form is used, a nonzero -+ DW_AT_bit_offset of 'b' in 'a' -+ -+If this is not an anonymous union, we are dealing with only one member: we look -+up its type and add it reasonably conventionally via ctf_add_member_offset(). -+Even here there are subtleties: we use construct_ctf_id() directly rather than -+via lookup_ctf_type() so we can get a better error message on failure, and we -+ignore any duplicate-member errors because this is probably a sign that this -+structure has already been encountered and we are working through another -+instance of it with more members. -+ -+ -+Writeout -+-------- -+ -+ write_types() -+ -+This couldn't really be simpler, as the trivial call graph shows. We create an -+output directory with the requested name, then work over the entire -+module_to_ctf_file hash, writing out every CTF file into a new suitably-named -+file via zlib's compressed file I/O functions. -diff --git a/Documentation/kbuild/kconfig.rst b/Documentation/kbuild/kconfig.rst -index dce6801d66c9..a9a855f894b3 100644 ---- a/Documentation/kbuild/kconfig.rst -+++ b/Documentation/kbuild/kconfig.rst -@@ -154,6 +154,11 @@ KCONFIG_AUTOCONFIG - This environment variable can be set to specify the path & name of the - "auto.conf" file. Its default value is "include/config/auto.conf". - -+KCONFIG_TRISTATE -+---------------- -+This environment variable can be set to specify the path & name of the -+"tristate.conf" file. Its default value is "include/config/tristate.conf". -+ - KCONFIG_AUTOHEADER - ------------------ - This environment variable can be set to specify the path & name of the -diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst -index e47863575917..bf32c66f3ba9 100644 ---- a/Documentation/process/changes.rst -+++ b/Documentation/process/changes.rst -@@ -55,9 +55,14 @@ iptables 1.4.2 iptables -V - openssl & libcrypto 1.0.0 openssl version - bc 1.06.95 bc --version - Sphinx\ [#f1]_ 1.3 sphinx-build --version -+elfutils\ [#f2]_ 0.156 eu-readelf --version -+pkg-config\ [#f2]_ 0.16 pkg-config --version -+glib\ [#f2]_ 2.x pkg-config --exists glib-2.0 && echo present -+libdtrace-ctf\ [#f2]_ 1.1 - ====================== =============== ======================================== - - .. [#f1] Sphinx is needed only to build the Kernel documentation -+.. [#f2] This is needed at build-time when CTF or DTrace are enabled - - Kernel compilation - ****************** -@@ -84,7 +89,8 @@ pkg-config - The build system, as of 4.18, requires pkg-config to check for installed - kconfig tools and to determine flags settings for use in - 'make {g,x}config'. Previously pkg-config was being used but not --verified or documented. -+verified or documented. dwarf2ctf also relies on it during 'make ctf' and -+while building out-of-tree modules with CONFIG_CTF enabled. - - Flex - ---- -@@ -356,6 +362,21 @@ OpenSSL - - - <https://www.openssl.org/> - -+elfutils -+-------- -+ -+- <https://fedorahosted.org/elfutils/> -+ -+glib 2.x -+-------- -+ -+- <http://www.gtk.org/> -+ -+libdtrace-ctf -+------------- -+ -+- <https://oss.oracle.com/git/?p=libdtrace-ctf.git> -+ - System utilities - **************** - -diff --git a/Makefile b/Makefile -index 713f93cceffe..814d9903bd3e 100644 ---- a/Makefile -+++ b/Makefile -@@ -674,7 +674,7 @@ $(KCONFIG_CONFIG): - # - # This exploits the 'multi-target pattern rule' trick. - # The syncconfig should be executed only once to make all the targets. --%/auto.conf %/auto.conf.cmd: $(KCONFIG_CONFIG) -+%/auto.conf %/auto.conf.cmd %/tristate.conf: $(KCONFIG_CONFIG) - $(Q)$(MAKE) -f $(srctree)/Makefile syncconfig - else # !may-sync-config - # External modules and some install targets need include/generated/autoconf.h -@@ -1082,7 +1082,7 @@ cmd_link-vmlinux = \ - $(CONFIG_SHELL) $< $(LD) $(KBUILD_LDFLAGS) $(LDFLAGS_vmlinux) ; \ - $(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true) - --vmlinux: scripts/link-vmlinux.sh autoksyms_recursive $(vmlinux-deps) FORCE -+vmlinux: scripts/link-vmlinux.sh autoksyms_recursive $(vmlinux-deps) modules_thick.builtin FORCE - +$(call if_changed,link-vmlinux) - - targets := vmlinux -@@ -1294,6 +1294,45 @@ modules: $(if $(KBUILD_BUILTIN),vmlinux) modules.order - modules.order: descend - $(Q)$(AWK) '!x[$$0]++' $(addsuffix /$@, $(build-dirs)) > $@ - -+ifneq (CONFIG_CTF@,'@') -+ -+# We need to force everything to be built, since we need the .o files below. -+KBUILD_BUILTIN := 1 -+ -+# This contains all the object files that are built directly into the -+# kernel (including built-in modules), for consumption by dwarf2ctf in -+# Makefile.modpost. -+# This is made doubly annoying by the presence of '.o' files which are actually -+# thin ar archives, and the need to support file(1) versions too old to -+# recognize them as archives at all. (So we assume that everything that is not -+# an ELF object is an archive.) -+ifeq ($(SRCARCH),x86) -+objects.builtin: $(vmlinux-dirs) $(if $(KBUILD_BUILTIN),bzImage) FORCE -+else -+objects.builtin: $(vmlinux-dirs) $(if $(KBUILD_BUILTIN),vmlinux) FORCE -+endif -+ @echo $(KBUILD_VMLINUX_OBJS) | \ -+ tr " " "\n" | grep "\.o$$" | xargs -r file | \ -+ grep ELF | cut -d: -f1 > objects.builtin -+ @for archive in $$(echo $(KBUILD_VMLINUX_OBJS) |\ -+ tr " " "\n" | xargs -r file | grep -v ELF | cut -d: -f1); do \ -+ $(AR) t "$$archive" >> objects.builtin; \ -+ done -+ -+ctf: vmlinux.ctfa -+PHONY += ctf -+ -+# Making CTF needs the builtin files unless out-of-tree. -+ifeq ($(KBUILD_EXTMOD),) -+vmlinux.ctfa: modules_thick.builtin objects.builtin -+endif -+vmlinux.ctfa: -+ $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modfinal vmlinux.ctfa -+else -+PHONY += objects.builtin -+objects.builtin: -+endif -+ - # Target to prepare building external modules - PHONY += modules_prepare - modules_prepare: prepare -@@ -1315,6 +1354,9 @@ _modinst_: - @sed 's:^:kernel/:' modules.order > $(MODLIB)/modules.order - @cp -f modules.builtin $(MODLIB)/ - @cp -f $(objtree)/modules.builtin.modinfo $(MODLIB)/ -+ @if [ -f $(objtree)/vmlinux.ctfa ] ; then \ -+ cp -f $(objtree)/vmlinux.ctfa $(MODLIB)/kernel ; \ -+ fi - $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modinst - - # This depmod is only for convenience to give the initial -@@ -1346,6 +1388,23 @@ modules modules_install: - - endif # CONFIG_MODULES - -+# modules_thick.builtin maps from kernel modules (or rather the object file -+# names they would have had had they not been built in) to their constituent -+# object files: dwarf2ctf uses this to determine which modules any given object -+# file is part of. (We cannot eliminate the slight redundancy here without -+# double-expansion.) -+ -+modthickbuiltin-dirs := $(addprefix _modthickbuiltin_, $(build-dirs)) -+ -+modules_thick.builtin: $(modthickbuiltin-dirs) -+ $(Q)$(AWK) '!x[$$0]++' $(addsuffix /$@, $(build-dirs)) > $@ -+ -+PHONY += $(modthickbuiltin-dirs) -+# tristate.conf is not included from this Makefile. Add it as a prerequisite -+# here to make it self-healing in case somebody accidentally removes it. -+$(modthickbuiltin-dirs): include/config/tristate.conf -+ $(Q)$(MAKE) $(modbuiltin)=$(patsubst _modthickbuiltin_%,%,$@) builtin-file=modules_thick.builtin -+ - ### - # Cleaning is done on three levels. - # make clean Delete most generated files -@@ -1354,8 +1413,8 @@ endif # CONFIG_MODULES - # make distclean Remove editor backup files, patch leftover files and the like - - # Directories & files removed with 'make clean' --CLEAN_DIRS += include/ksym --CLEAN_FILES += modules.builtin modules.builtin.modinfo modules.nsdeps -+CLEAN_DIRS += include/ksym .ctf -+CLEAN_FILES += modules.builtin modules.builtin.modinfo objects.builtin modules.nsdeps .ctf.filelist .ctf.filelist.raw - - # Directories & files removed with 'make mrproper' - MRPROPER_DIRS += include/config include/generated \ -@@ -1456,6 +1515,8 @@ help: - @echo ' (requires a recent binutils and recent build (System.map))' - @echo ' dir/file.ko - Build module including final link' - @echo ' modules_prepare - Set up for building external modules' -+ @echo ' ctf - Generate CTF type information for DTrace, installed by ' -+ @echo ' make modules_install' - @echo ' tags/TAGS - Generate tags file for editors' - @echo ' cscope - Generate cscope index' - @echo ' gtags - Generate GNU GLOBAL index' -@@ -1712,7 +1773,7 @@ clean: $(clean-dirs) - -o -name '*.symtypes' -o -name 'modules.order' \ - -o -name '.tmp_*.o.*' \ - -o -name '*.c.[012]*.*' \ -- -o -name '*.ll' \ -+ -o -name '*.ll' -o -name '*.ctfa' \ - -o -name '*.gcno' \) -type f -print | xargs rm -f - - # Generate tags for editors -diff --git a/lib/Kconfig b/lib/Kconfig -index bc7e56370129..1397f69eaa8d 100644 ---- a/lib/Kconfig -+++ b/lib/Kconfig -@@ -572,6 +572,18 @@ config DIMLIB - # - config LIBFDT - bool -+# -+# CTF support is select'ed if needed -+# -+config CTF -+ bool "Compact Type Format generation" -+ default n -+ select STRIP_ASM_SYMS -+ depends on DEBUG_INFO && !DEBUG_INFO_REDUCED && !DEBUG_INFO_SPLIT && !DEBUG_INFO_DWARF4 && DTRACE -+ help -+ Emit a compact, compressed description of the kernel's datatypes and -+ global variables into the vmlinux.ctfa archive (for in-tree modules) -+ or into .ctf sections in kernel modules (for out-of-tree modules). - - config OID_REGISTRY - tristate -diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include -index 6cabf20ce66a..d7287773b455 100644 ---- a/scripts/Kbuild.include -+++ b/scripts/Kbuild.include -@@ -162,6 +162,12 @@ ld-ifversion = $(shell [ $(ld-version) $(1) $(2) ] && echo $(3) || echo $(4)) - # $(Q)$(MAKE) $(build)=dir - build := -f $(srctree)/scripts/Makefile.build obj - -+### -+# Shorthand for $(Q)$(MAKE) -f scripts/Makefile.modbuiltin obj= -+# Usage: -+# $(Q)$(MAKE) $(modbuiltin)=dir -+modbuiltin := -f $(srctree)/scripts/Makefile.modbuiltin obj -+ - ### - # Shorthand for $(Q)$(MAKE) -f scripts/Makefile.dtbinst obj= - # Usage: -diff --git a/scripts/Makefile b/scripts/Makefile -index 5e75802b1a44..8f6353508366 100644 ---- a/scripts/Makefile -+++ b/scripts/Makefile -@@ -37,6 +37,7 @@ hostprogs += unifdef - - subdir-$(CONFIG_GCC_PLUGINS) += gcc-plugins - subdir-$(CONFIG_MODVERSIONS) += genksyms -+subdir-$(CONFIG_CTF) += dwarf2ctf - subdir-$(CONFIG_SECURITY_SELINUX) += selinux - - # Let clean descend into subdirs -diff --git a/scripts/Makefile.modbuiltin b/scripts/Makefile.modbuiltin -new file mode 100644 -index 000000000000..f2c085e8640f ---- /dev/null -+++ b/scripts/Makefile.modbuiltin -@@ -0,0 +1,60 @@ -+# SPDX-License-Identifier: GPL-2.0 -+# ========================================================================== -+# Generating modules_thick.builtin -+# ========================================================================== -+ -+src := $(obj) -+ -+PHONY := __modbuiltin -+__modbuiltin: -+ -+include include/config/auto.conf -+# tristate.conf sets tristate variables to uppercase 'Y' or 'M' -+# That way, we get the list of built-in modules in obj-Y -+include include/config/tristate.conf -+ -+include scripts/Kbuild.include -+ -+ifdef building_out_of_srctree -+# Create output directory if not already present -+_dummy := $(shell [ -d $(obj) ] || mkdir -p $(obj)) -+endif -+ -+# The filename Kbuild has precedence over Makefile -+kbuild-dir := $(if $(filter /%,$(src)),$(src),$(srctree)/$(src)) -+kbuild-file := $(if $(wildcard $(kbuild-dir)/Kbuild),$(kbuild-dir)/Kbuild,$(kbuild-dir)/Makefile) -+include $(kbuild-file) -+ -+include scripts/Makefile.lib -+__subdir-Y := $(patsubst %/,%,$(filter %/, $(obj-Y))) -+subdir-Y += $(__subdir-Y) -+subdir-ym := $(sort $(subdir-y) $(subdir-Y) $(subdir-m)) -+subdir-ym := $(addprefix $(obj)/,$(subdir-ym)) -+pathobj-Y := $(addprefix $(obj)/,$(obj-Y)) -+ -+modthickbuiltin-subdirs := $(patsubst %,%/modules_thick.builtin, $(subdir-ym)) -+modthickbuiltin-target := $(obj)/modules_thick.builtin -+ -+__modbuiltin: $(obj)/$(builtin-file) $(subdir-ym) -+ @: -+ -+$(modthickbuiltin-target): $(subdir-ym) FORCE -+ $(Q) $(foreach mod-o, $(filter %.o,$(obj-Y)),\ -+ printf "%s:" $(addprefix $(obj)/,$(mod-o)) >> $@; \ -+ printf " %s" $(sort $(strip $(addprefix $(obj)/,$($(mod-o:.o=-objs)) \ -+ $($(mod-o:.o=-y)) $($(mod-o:.o=-Y))))) >> $@; \ -+ printf "\n" >> $@; ) \ -+ cat /dev/null $(modthickbuiltin-subdirs) >> $@; -+ -+PHONY += FORCE -+ -+FORCE: -+ -+# Descending -+# --------------------------------------------------------------------------- -+ -+PHONY += $(subdir-ym) -+$(subdir-ym): -+ $(Q)$(MAKE) $(modbuiltin)=$@ builtin-file=$(builtin-file) -+ -+.PHONY: $(PHONY) -diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal -index 411c1e600e7d..abe9dcc72bc0 100644 ---- a/scripts/Makefile.modfinal -+++ b/scripts/Makefile.modfinal -@@ -1,11 +1,21 @@ - # SPDX-License-Identifier: GPL-2.0-only - # =========================================================================== --# Module final link -+# Module final link and CTF generation - # =========================================================================== -+# 1) compile all <module>.mod.c files -+# 2) for external modules, generate CTF for the module (there is an extra, -+# externally-invoked target that does this for the entire kernel but does -+# not invoke the rst of the module-building process) -+# 3) final link of the module to a <module.ko> file -+ -+# We need secondary expansion for 'module-ctfs-modular-prereq', below. -+ -+.SECONDEXPANSION: - - PHONY := __modfinal - __modfinal: - -+include include/config/auto.conf - include $(srctree)/scripts/Kbuild.include - - # for c_flags -@@ -27,6 +37,128 @@ quiet_cmd_cc_o_c = CC [M] $@ - %.mod.o: %.mod.c FORCE - $(call if_changed_dep,cc_o_c) - -+# Generate CTF for the entire kernel, or for the module alone if this is a -+# build of an external module. -+ -+# These are overridden below for standalone modules only. -+module-ctfs-modular-prereq = -+module-ctfs-modular = -+module-ctf-flags = -+cmd_touch_ctf = -+ctf-dir = ///.nonexistent -+cmd-touch-ctf = @: -+ -+ifdef CONFIG_CTF -+ -+# This is quite tricky. If called for non-external-modules, dwarf2ctf needs to -+# be told about all the built-in objects as well as all the external modules -- -+# but Makefile.modpost only knows about the latter. So the toplevel makefile -+# emits the names of the built-in objects into a temporary file, which is -+# then catted and its contents used as prerequisites by this rule. -+# -+# We write the names of the object files to be scanned for CTF content into a -+# file, then use that, to avoid hitting command-line length limits. -+ -+ifeq ($(KBUILD_EXTMOD),) -+ctf-dir-mk := -+quiet_cmd_ctf = CTFA -+ cmd_ctf = scripts/dwarf2ctf/dwarf2ctf vmlinux.ctfa $(srctree) objects.builtin modules_thick.builtin $(srctree)/scripts/dwarf2ctf/member.blacklist $(ctf-filelist) -+ctf-builtins := objects.builtin -+ctf-builtins-prereq := $(ctf-builtins) -+ ctf-modules := $(shell find . -name '*.ko' -print) -+ctf-filelist := .ctf.filelist -+ctf-filelist-raw := .ctf.filelist.raw -+ctf-stamp := -+ -+else -+ctf-dir := $(KBUILD_EXTMOD)/.ctf -+ctf-dir-mk := $(ctf-dir) -+quiet_cmd_ctf = CTF -+ cmd_ctf = scripts/dwarf2ctf/dwarf2ctf $(ctf-dir) -e $(ctf-filelist) -+ctf-builtins := ////.no-builtins -+ctf-builtins-prereq := -+ctf-modules := $(modules:.ko=.o) -+ctf-filelist := $(ctf-dir)/$(notdir $(M)-extmod).ctf.filelist -+ctf-filelist-raw := $(ctf-dir)/$(notdir $(M)-extmod).ctf.filelist.raw -+ctf-stamp = $(ctf-dir)/$(notdir $(M)-extmod).stamp -+ -+# All the modules' CTF depends on the stamp file. -+ -+all-module-ctfs = $(addprefix $(ctf-dir)/,$(notdir $(modules:.ko=.mod.ctf))) -+$(all-module-ctfs): $(ctf-stamp) -+ -+endif -+ -+# Split a list up like shell xargs does. -+define xargs = -+$(1) $(wordlist 1,1024,$(2)) -+$(if $(word 1025,$(2)),$(call xargs,$(1),$(wordlist 1025,$(words $(2)),$(2)))) -+endef -+ -+$(ctf-filelist-raw): $(ctf-builtins-prereq) $(ctf-modules) -+ @rm -f $(ctf-filelist-raw); -+ @if [[ -n "$(ctf-dir-mk)" ]]; then \ -+ mkdir -p "$(ctf-dir-mk)"; \ -+ fi -+ $(call xargs,@printf "%s\n" >> $(ctf-filelist-raw),$^) -+ @touch $(ctf-filelist-raw) -+ -+$(ctf-filelist): $(ctf-filelist-raw) -+ @rm -f $(ctf-filelist); -+ @cat $(ctf-filelist-raw) | while read -r obj; do \ -+ case $$obj in \ -+ $(ctf-builtins)) cat $$obj >> $(ctf-filelist);; \ -+ *.a) ar t $$obj > $(ctf-filelist);; \ -+ *.builtin) cat $$obj >> $(ctf-filelist);; \ -+ *) echo "$$obj" >> $(ctf-filelist);; \ -+ esac; \ -+ done -+ @touch $(ctf-filelist) -+ -+ifeq ($(KBUILD_EXTMOD),) -+# The CTF depends on the output CTF file list, and that depends -+# on the .ko files for the modules. -+vmlinux.ctfa: $(ctf-filelist) -+ $(call if_changed,ctf) -+else -+ -+# The CTF depends on the output CTF file list, and that depends -+# on the .o files for the modules -+$(ctf-stamp): $(ctf-filelist) -+ $(call if_changed,ctf) -+ @shopt -s nullglob; \ -+ for name in $(ctf-dir)/*.ctf.new; do \ -+ $(srctree)/scripts/move-if-change $$name $${name%.new}; \ -+ done; \ -+ touch $(ctf-stamp) -+ -+# Expands to the names of the CTF files to be incorporated into this module. -+# The former is used in prerequisite lists, thanks to secondary expansion. -+ -+module-ctfs-modular-prereq = $$(addprefix $(ctf-dir)/,$$(notdir $$*.mod.ctf)) -+module-ctfs-modular = $(addprefix $(ctf-dir)/,$(notdir $*.mod.ctf)) -+ -+# Expands to the name of a CTF file, given a target of a module name given to -+# one of the link rules below. -+ -+ctf-module-name = $(addprefix $(ctf-dir)/,$(notdir $(basename $@)).mod.ctf) -+ -+# An objcopy --add-section argument to add the CTF section to a standalone -+# module. -+ -+module-ctf-flags = --add-section .ctf=$(ctf-module-name) -+ -+# We have to put content in our dummy no-CTF files because --add-section -+# in binutils 2.20 silently fails if asked to add an empty file as a section. -+ -+cmd_touch_ctf = @for name in $(filter $(ctf-dir)/%,$(module-ctfs-modular)); do \ -+ test -f $$name || dd if=/dev/zero of=$$name bs=1 count=1 2>/dev/null; \ -+ done -+ -+endif # KBUILD_EXTMOD -+ -+endif # !CONFIG_CTF -+ - ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink) - - quiet_cmd_ld_ko_o = LD [M] $@ -@@ -34,10 +166,13 @@ quiet_cmd_ld_ko_o = LD [M] $@ - $(LD) -r $(KBUILD_LDFLAGS) \ - $(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \ - $(addprefix -T , $(KBUILD_LDS_MODULE)) \ -- -o $@ $(filter %.o, $^); \ -+ $(LDFLAGS_$(modname)) -o $@.tmp \ -+ -o $@.tmp $(patsubst $(ctf-dir)/%,,$(filter %.o, $^)) && \ -+ $(OBJCOPY) $(module-ctf-flags) $@.tmp $@ && rm -f $@.tmp ; \ - $(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true) - --$(modules): %.ko: %.o %.mod.o $(KBUILD_LDS_MODULE) FORCE -+$(modules): %.ko: %.o %.mod.o $(KBUILD_LDS_MODULE) $(module-ctfs-modular-prereq) FORCE -+ $(call cmd_touch_ctf) - +$(call if_changed,ld_ko_o) - - targets += $(modules) $(modules:.ko=.mod.o) -diff --git a/scripts/dwarf2ctf/.gitignore b/scripts/dwarf2ctf/.gitignore -new file mode 100644 -index 000000000000..e37b47cf3028 ---- /dev/null -+++ b/scripts/dwarf2ctf/.gitignore -@@ -0,0 +1 @@ -+dwarf2ctf -diff --git a/scripts/dwarf2ctf/Makefile b/scripts/dwarf2ctf/Makefile -new file mode 100644 -index 000000000000..d908050a271f ---- /dev/null -+++ b/scripts/dwarf2ctf/Makefile -@@ -0,0 +1,11 @@ -+ifdef CONFIG_CTF -+hostprogs-y := dwarf2ctf -+always := $(hostprogs-y) -+ -+dwarf2ctf-objs := dwarf2ctf.o eu_simple.o -+ -+HOSTCFLAGS_eu_simple.o := -I$(srctree)/scripts -+HOSTCFLAGS_dwarf2ctf.o := $(shell pkg-config --cflags glib-2.0) -I$(srctree)/scripts -+ -+HOSTLDLIBS_dwarf2ctf := -ldtrace-ctf -lelf -ldw $(shell pkg-config --libs glib-2.0) -lz -+endif -diff --git a/scripts/dwarf2ctf/dwarf2ctf.c b/scripts/dwarf2ctf/dwarf2ctf.c -new file mode 100644 -index 000000000000..cd523c67014e ---- /dev/null -+++ b/scripts/dwarf2ctf/dwarf2ctf.c -@@ -0,0 +1,4961 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * dwarf2ctf.c: Read in DWARF[23] debugging information from some set of ELF -+ * files, and generate CTF in correspondingly-named files, or in a single -+ * representation meant for mmapping. -+ * -+ * Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ */ -+ -+#define _GNU_SOURCE 1 -+#include <stdio.h> -+#include <stdlib.h> -+#include <string.h> -+#include <errno.h> -+#include <stddef.h> -+#include <sys/stat.h> -+#include <sys/types.h> -+#include <fcntl.h> -+#include <limits.h> -+#include <endian.h> -+#include <unistd.h> -+ -+#include <libelf.h> -+#include <dwarf.h> -+#include <elfutils/libdwfl.h> -+#include <elfutils/libdw.h> -+#include <elfutils/version.h> -+#include <sys/ctf_api.h> -+#include <glib.h> -+ -+#include <eu_simple.h> -+ -+#ifndef PATH_MAX -+#define PATH_MAX 1024 -+#endif -+ -+#define __unused__ __attribute__((__unused__)) -+ -+/* -+ * If non-NULL, tracing is on. -+ */ -+static const char *trace; -+ -+/* -+ * Trace something. -+ */ -+#ifdef DEBUG -+#define dw_ctf_trace(format, ...) do { \ -+ if (trace) \ -+ fprintf(stderr, (format), ## __VA_ARGS__); \ -+} while (0) -+#else -+#define dw_ctf_trace(format, ...) -+#endif -+ -+/* -+ * Run dwarf2ctf over a single object file or set thereof. -+ * -+ * output_dir is the directory into which the CTF goes, if 'standalone', or the -+ * CTF archive file name otherwise. -+ */ -+static void run(char *output, int standalone); -+ -+/* -+ * Whether we are deduplicating. We do not deduplicate if run over external -+ * modules. -+ */ -+static int deduplicating; -+ -+/* -+ * A fully descriptive CTF type ID: both file and type ID in one place. -+ */ -+struct ctf_full_id { -+ ctf_file_t *ctf_file; -+ ctf_id_t ctf_id; -+#ifdef DEBUG -+ char module_name[PATH_MAX]; -+ char file_name[PATH_MAX]; -+#endif -+}; -+ -+/* -+ * A hash mapping 'atoms' (almost entirely type IDs) to nothing. -+ */ -+static GHashTable *atoms; -+ -+/* -+ * A mapping from the type ID of a DIE (see type_id()) to ctf_full_id_t's -+ * describing the type with that ID. The type ID is an atom. -+ * -+ * This is used to look up types regardless of which CTF file they may reside -+ * in. Not the same as a DWARF4 type signature because we must encode scope -+ * information which DWARF4 can encode in its DIE refs. -+ * -+ * (TODO: store a hash of the ID rather than the ID itself, to save memory. -+ * Makes debugging slightly harder though.) -+ */ -+static GHashTable *id_to_type; -+ -+/* -+ * A mapping from the type ID of a DIE (an atom) to the name of the module (and -+ * thus CTF table) incorporating that type. (Modules in this context, and -+ * throughout dwarf2ctf, are DTrace modules: a name without suffix or path.) -+ * -+ * This is used to merge types identical across modules (e.g. those in global -+ * header files). -+ */ -+static GHashTable *id_to_module; -+ -+/* -+ * Module-specific state. The module named 'vmlinux' is that corresponding to -+ * the types in always-built-in translation units; the module named 'shared_ctf' -+ * (not appearing in this mapping) is that corresponding to types shared between -+ * more than one module (even between two currently-built-in modules: we do not -+ * distinguish at this level between built-in modules and non-built-in modules.) -+ */ -+static GHashTable *per_module; -+ -+/* -+ * The data structure that per_module maps module names to. -+ */ -+struct per_module { -+ /* -+ * The CTF file containing the types in this module. -+ */ -+ ctf_file_t *ctf_file; -+ -+ /* -+ * A hash from a "CTF-form" structure name (in the form 's/u NAME') to -+ * a struct ctf_memb_count (see below). -+ */ -+ GHashTable *member_counts; -+}; -+ -+/* -+ * A count associating a type ID relating to a structure or union with a count -+ * of members in that structure. -+ */ -+struct ctf_memb_count { -+ ctf_id_t ctf_id; -+ size_t count; -+}; -+ -+/* -+ * A mapping from the absolute pathname of a TU to a hashtable mapping -+ * DIE offsets of child DIEs to DIE offsets of parents. Populated on first -+ * iteration. Contains only those DIEs that we know are necessary for other -+ * functions' use of this structure, to keep memory usage down. -+ */ -+static GHashTable *fn_to_die_to_parent; -+ -+/* -+ * Get a ctf_file out of the per_module hash for a given module. -+ */ -+static ctf_file_t *lookup_ctf_file(const char *module_name); -+ -+/* -+ * The names of the object files to run over. Except in -e mode, this comes -+ * straight from the module filelist passed in. -+ */ -+static char **object_names; -+static size_t object_names_cnt; -+ -+/* -+ * Populate the object_names list from the module filelist. -+ */ -+static void init_object_names(const char *object_names_file); -+ -+/* -+ * Populate and object_to_module from the objects.builtin and modules.builtin -+ * file. -+ */ -+static void init_builtin(const char *builtin_objects_file, -+ const char *builtin_module_file); -+ -+/* -+ * The member blacklist bans fields with specific names in specifically named -+ * structures, declared in specific source files, from being emitted. The -+ * mapping is from absolute source file name:structure.member to NULL (this is -+ * safe because type names cannot contain a colon, and structure names cannot -+ * contain a period). -+ */ -+static GHashTable *member_blacklist; -+ -+/* -+ * Populate the member blacklist from the member_blacklist file. -+ */ -+static void init_member_blacklist(const char *member_blacklist_file, -+ const char *srcdir); -+ -+/* -+ * Return 1 if a given DWARF DIE, which must be a DW_TAG_member, appears in the -+ * member blacklist. -+ */ -+static int member_blacklisted(Dwarf_Die *die, Dwarf_Die *parent_die); -+ -+/* -+ * The variable blacklist, like the others, is an automatically-maintained -+ * blacklist giving variables in specific modules which should not be emitted. -+ * (These are variables whose names are ambiguous within a module, and may -+ * appear multiple times in /proc/kallmodsyms, identical but for address and -+ * thus indistinguishable.) -+ * -+ * The mapping is from module`variable to NULL (safe because variable names -+ * cannot begin with a backtick, and even if they could DTrace's notation could -+ * not reference such variables). -+ */ -+static GHashTable *variable_blacklist; -+ -+/* -+ * A mapping from object file name to the name of the module that translation -+ * unit is part of. -+ * -+ * Actual, real, on-disk .ko modules do not appear here, because the translation -+ * is trivial for them. -+ */ -+static GHashTable *object_to_module; -+ -+/* -+ * Initialize a CTF type table, and possibly fill it with those special types -+ * that appear in CTF but not in DWARF (such as 'void'). (This filling happens -+ * only for the type table named "shared_ctf", unless deduplication is turned -+ * off.) -+ * -+ * If this is a local type table, and deduplication is active, make the global -+ * type table its parent. -+ */ -+static void init_ctf_table(const char *module_name); -+ -+/* -+ * A few useful singleton CTF type IDs in the global type table: a void pointer -+ * and a function pointer. Constructed by init_ctf_table(). -+ */ -+static ctf_id_t ctf_void_type; -+static ctf_id_t ctf_funcptr_type; -+ -+/* -+ * Initialize the child->parent DIE mapping for a single file. -+ */ -+static void init_parent_die(const char *file_name, Dwfl *dwfl); -+ -+/* -+ * Initialize one layer of a child->parent mapping. -+ */ -+static int init_parent_die_internal(const char *file_name, -+ GHashTable *offs, Dwarf_Die *parent, -+ int depth, int found_subprogram); -+ -+/* -+ * Override the presence and value of FORM_u/sdata attributes on DWARF DIEs, -+ * either adding to it, or replacing it. -+ * -+ * (Used so that a caller of construct_ctf_id() that wants a type to be created -+ * can override aspects of that type.) -+ * -+ * The 'chain', if set, causes the various private_*() functions that handle -+ * overrides to look back along the chain to find a suitable attribute. The -+ * chain must be set on the last element in the array. The search for -+ * attributes terminates at the first match. -+ * -+ * Note: this is not a particularly generic implementation: a better approach -+ * would be to keep walking the chain on DIE_OVERRIDE_ADD, and keep adding until -+ * we are done: but we have only one user of ADD, and it implements the addition -+ * itself because it is adding to a value from a different DIE: so this added -+ * generality is not needed yet. -+ */ -+struct die_override { -+ int tag; -+ int attribute; -+ enum { DIE_OVERRIDE_REPLACE, DIE_OVERRIDE_ADD } op; -+ Dwarf_Sword value; -+ struct die_override *chain; -+}; -+ -+/* -+ * Compute the type ID of a DWARF DIE (with possibly-overridden attributes) and -+ * return it in a new dynamically-allocated string. -+ * -+ * Optionally, call a callback with the computed ID once we know it (this is a -+ * recursive process, so the callback can be called multiple times as the ID -+ * is built up). -+ * -+ * An ID of NULL indicates that this DIE has no ID and need not be considered. -+ */ -+static char *type_id(Dwarf_Die *die, struct die_override *overrides, -+ void (*fun)(Dwarf_Die *die, -+ const char *id, -+ struct die_override *overrides, -+ void *data), -+ void *data) __attribute__((__warn_unused_result__)); -+ -+/* -+ * Internal: allows flags to be passed to affect one (and only one) type ID -+ * recursion, without affecting other type_id()s launched from the 'fun'. -+ */ -+static char *type_id_internal(Dwarf_Die *die, -+ struct die_override *overrides, -+ void (*fun)(Dwarf_Die *die, -+ const char *id, -+ struct die_override *overrides, -+ void *data), -+ void *data, -+ int flags); -+ -+/* -+ * Internal: generate the type ID for a type DIE. -+ * -+ * If there are no overrides, look for a bit_size and bit_offset and pass them -+ * down as well. -+ */ -+static char *type_id_type_die(Dwarf_Die *die, -+ Dwarf_Die *type_die, -+ struct die_override *overrides, -+ void (*fun)(Dwarf_Die *die, -+ const char *id, -+ struct die_override *overrides, -+ void *data), -+ void *data); -+ -+/* -+ * Convert 'long unsigned int' to 'sizetype'. Internal use within type_id(). -+ */ -+#define TI_COLLAPSE_SIZETYPE 0x1 -+ -+/* -+ * Process a file, calling the dwarf_process function for every type found -+ * therein (even types in functions). Optionally call tu_init() at the start of -+ * each translation unit, and tu_done() at the end. -+ */ -+static void process_file(const char *file_name, -+ void (*dwarf_process)(const char *module_name, -+ const char *file_name, -+ Dwarf_Die *die, -+ Dwarf_Die *parent_die, -+ void *data), -+ void (*tu_init)(const char *module_name, -+ const char *file_name, -+ Dwarf_Die *tu_die, -+ void *data), -+ void (*tu_done)(const char *module_name, -+ const char *file_name, -+ Dwarf_Die *tu_die, -+ void *data), -+ void *data); -+ -+/* -+ * process_file() helper, walking over the top level and picking up types -+ * therein. -+ */ -+static void process_tu_func(const char *module_name, -+ const char *file_name, -+ Dwarf *dwarf, -+ Dwarf_Die *parent_die, -+ Dwarf_Die *die, -+ void (*dwarf_process)(const char *module_name, -+ const char *file_name, -+ Dwarf_Die *die, -+ Dwarf_Die *parent_die, -+ void *data), -+ void *data); -+ -+/* -+ * Records the type ID of interesting types, the files they are contained in, -+ * and their DWARF offset, so they can be found rapidly. -+ * -+ * Used to avoid rescanning files that can contain no duplicates. -+ */ -+struct dedup_id_file { -+ char *file_name; -+ char *id; -+ Dwarf_Off dieoff; -+}; -+ -+/* -+ * The structure used as the data argument for dedup() and -+ * dedup_alias_fixup(). -+ * -+ * structs_seen tracks the IDs of structures marked as duplicates within a given -+ * translation unit, in order that recursion terminates if two such structures -+ * have pointers to each other. -+ * -+ * vars_seen tracks variables seen in this module, mapping from unadorned name -+ * to a non-NULL pointer (for static, non-'external') or NULL (for non-static or -+ * 'extern'). If a static variable coexists with any other variable with the -+ * same name, static or not, the variable is blacklisted. (Non-static -+ * coexistence is fine, because they are just different references to the same -+ * variable). Note that management of this variable is a little annoying -+ * because it varies by module, not by TU, so we can't use tu_init/tu_done to -+ * manage its lifetime. -+ * -+ * named_structs tracks type IDs and contained modules for every type that may -+ * contain undetected duplicates and thus may require rescanning. -+ * -+ * dwfl and dwfl_file_name identify the opened DWARF file (if any) during the -+ * second duplicates detection pass. -+ * -+ * repeat_detection is set by each phase if it considers that another round of -+ * alias fixup detection is needed. -+ */ -+struct dedup_state { -+ const char *file_name; -+ const char *module_name; -+ GHashTable *structs_seen; -+ GList *named_structs; -+ GHashTable *vars_seen; -+ char *dwfl_file_name; -+ Dwarf *dwarf; -+ Dwfl *dwfl; -+ int repeat_detection; -+}; -+ -+/* -+ * Scan and identify duplicates across the entire set of object files. -+ */ -+static void scan_dups(void); -+ -+/* -+ * Recursively detect duplicate types and types referenced by them, and -+ * determine which CTF file they should be located in, and request a -+ * dedup_alias_fixup() pass if any structures are shared. -+ * Determine the mapping from translation unit name to module name. -+ */ -+static void dedup(const char *module_name, const char *file_name, -+ Dwarf_Die *die, Dwarf_Die *parent_die, void *data); -+ -+/* -+ * Do the underlying marking of a DIE as shared, iff need be. (No variable -+ * blacklisting, non-opaque structure checks, or anything else needed only by -+ * top-level DIEs.) -+ * -+ * This function may be called multiple times for overridden DIEs that are -+ * dependent types of bitfields. -+ */ -+static void dedup_mark_inner_die(const char *module_name, Dwarf_Die *die, -+ const char *id, -+ struct die_override *overrides, -+ void *data); -+ -+/* -+ * Note in the dedup_id_file list that we will rescan a DIE in a later duplicate -+ * detection pass. -+ * -+ * A type_id() callback. -+ */ -+static void dedup_will_rescan(Dwarf_Die *die, const char *id, -+ struct die_override *overrides, void *data); -+ -+/* -+ * Note the variable referenced by this DIE in vars_seen: blacklist it if an -+ * entry for this variable already exists in vars_seen and this instance is -+ * static, or if a static entry already exists in vars_seen, whether this -+ * instance is static or not. -+ */ -+static void dedup_blacklist_var_dups(Dwarf_Die *die, -+ struct dedup_state *state); -+ -+/* -+ * Detect duplicates and mark seen types for a given type, via a type_id() -+ * callback: used to detect dependent types (particularly those at child-DIE -+ * level) as duplicates. -+ */ -+static void dedup_typeid(Dwarf_Die *die, const char *id, -+ struct die_override *overrides, void *data); -+ -+/* -+ * Mark any aggregates contained within a particular type DIE as seen. This is -+ * needed since even nameless aggregates contained within other aggregates can -+ * be used as the type of members of the outer aggregate (though they cannot -+ * possibly be found in a module different from that of their containing -+ * aggregate, any more than a structure member can). -+ */ -+static void mark_seen_contained(Dwarf_Die *die, const char *module_name, -+ struct die_override *overrides, void *data); -+ -+/* -+ * Determine if some type (whose ultimate base type is an non-opaque structure, -+ * alias, or enum) has an opaque equivalent which is shared, and mark it and -+ * all its bases as shared too if so. -+ * -+ * A list_filter() filter function. -+ */ -+static int dedup_alias_fixup(void *id_file_data, void *data); -+ -+/* -+ * Mark a basic type shared by name and intern it in all relevant hashes. (Used -+ * for marking basic types we don't have a DIE for.) -+ */ -+static void mark_shared_by_name(ctf_file_t *ctf, ctf_id_t ctf_id, -+ const char *name); -+ -+/* -+ * Determine if a type is a named struct, union, or enum. -+ * -+ * A type_id() callback. -+ */ -+static void is_named_struct_union_enum(Dwarf_Die *die, const char *unused, -+ struct die_override *overrides, -+ void *data); -+ -+/* -+ * Set up state for dedup(). A tu_init() callback. -+ */ -+static void dedup_tu_init(const char *module_name, const char *file_name, -+ Dwarf_Die *tu_die, void *data); -+ -+/* -+ * Free state for dedup(). A tu_done() callback. -+ */ -+static void dedup_tu_done(const char *module_name, const char *file_name, -+ Dwarf_Die *tu_die, void *data); -+ -+/* -+ * Free DWARF state for dedup(). -+ */ -+static void dedup_dwarf_free(struct dedup_state *state); -+ -+/* -+ * Determine if a type is duplicated and needs sharing. -+ */ -+enum needs_sharing { NS_NOT_SHARED, NS_NO_MARKING, NS_NEEDS_SHARING }; -+static enum needs_sharing type_needs_sharing(const char *module_name, -+ const char *id); -+ -+/* -+ * Mark a type (optionally, with an already-known ID) as duplicated and located -+ * in the shared CTF table. -+ * -+ * A type_id() callback (though also called directly). -+ */ -+static void mark_shared(Dwarf_Die *die, const char *id, -+ struct die_override *overrides, void *data); -+ -+/* -+ * Construct CTF out of each type. -+ */ -+static void construct_ctf(const char *module_name, const char *file_name, -+ Dwarf_Die *die, -+ Dwarf_Die *parent_die, -+ void *unused __unused__); -+ -+/* -+ * Write out the CTF files from the per_module->ctf_file into files in the -+ * output directory (if standalone), or into the output file (otherwise). -+ */ -+static void write_types(char *output, int standalone); -+ -+/* -+ * Construct CTF out of each type and return that type's ID and file. -+ */ -+static struct ctf_full_id *construct_ctf_id(const char *module_name, -+ const char *file_name, -+ Dwarf_Die *die, -+ Dwarf_Die *parent_die, -+ struct die_override *overrides); -+ -+/* -+ * Things to do after a CTF recursion step. -+ */ -+enum skip_type { SKIP_CONTINUE = 0, SKIP_SKIP, SKIP_ABORT }; -+ -+/* -+ * Recursive over a given DWARF DIE and its children andconstruct CTF out of it. -+ * -+ * Most parameters are shared with the ctf_assembly_fun: see the comment below. -+ */ -+static ctf_id_t die_to_ctf(const char *module_name, const char *file_name, -+ Dwarf_Die *die, Dwarf_Die *parent_die, -+ ctf_file_t *ctf, ctf_id_t parent_ctf_id, -+ struct die_override *overrides, int top_level_type, -+ int backwards, enum skip_type *skip, int *replace, -+ const char *id); -+ -+/* -+ * Return the next DIE, if that DIE needs to be emitted before this one. -+ */ -+static Dwarf_Die *die_emit_next_backwards(Dwarf_Die *next, Dwarf_Die *die, -+ struct die_override *overrides); -+ -+/* -+ * Look up a type through its reference: return its ctf_id_t, or -+ * recursively construct it if need be. -+ * -+ * Must be called on a DIE with a type attribute. -+ */ -+static ctf_id_t lookup_ctf_type(const char *module_name, const char *file_name, -+ Dwarf_Die *die, ctf_file_t *ctf, -+ struct die_override *overrides, -+ const char *locerrstr); -+ -+/* -+ * Assemble a given DIE and its children into CTF in some fashion, returning the -+ * ID of the top-level piece of generated CTF (only relevant for aggregates). -+ * -+ * The parent_ctf_id is the ID of the CTF entity that was or is being generated -+ * from the enclosing DWARF DIE, or 0 if population succeeded but did not yield -+ * a type ID (e.g. for variable assembly), or -1 on error. The parent_die is -+ * the parent of the current DWARF DIE, and is always populated (even if just -+ * with the CU's DIE). The parent_ctf_id is always in the same CTF file as the -+ * ctf_id, just as the parent DWARF DIE is always in the same DWARF CU: this is -+ * lexical scope, not dynamic, so referenced types themselves located at the top -+ * level have the CU as their parent. -+ * -+ * Returning an error value (see below) indicates that no CTF was generated from -+ * this DWARF DIE. -+ * -+ * Setting skip to SKIP_ABORT indicates that the translation of this entity -+ * failed, and the entire top-level type of which it is a part should be -+ * skipped. Setting it to SKIP_SKIP indicates that this entity does not need to -+ * be translated (perhaps because it already exists), so recursion into -+ * sub-entities can be skipped, but translation of the containing type should -+ * continue. Setting it to SKIP_CONTINUE indicates no error. -+ * -+ * Setting 'replace' to 1 in a child DIE indicates that this type should -+ * entirely *replace* its parent's type (generally because it has wrapped it up -+ * in something). This replacemenu takes immediate effect for later children of -+ * the same DIE. -+ * -+ * die_to_ctf() calls these functions repeatedly for every child of the -+ * requested DIE: the CTF ID eventually returned is whatever ID is returned by -+ * the last such function, and parent_ctf_id is repeatedly replaced with the ID -+ * returned by the last assembly function. Thus, assembly functions that -+ * augment an already-present ctf_id should return parent_ctf_id: assembly -+ * functions that wrap it in a new ctf_id referring to the parent_ctf_id should -+ * return the new ID. (Assembly functions should never entirely disregard the -+ * parent_ctf_id.) -+ */ -+typedef ctf_id_t (*ctf_assembly_fun)(const char *module_name, -+ const char *file_name, -+ Dwarf_Die *die, -+ Dwarf_Die *parent_die, -+ ctf_file_t *ctf, -+ ctf_id_t parent_ctf_id, -+ const char *locerrstr, -+ struct die_override *overrides, -+ int top_level_type, -+ enum skip_type *skip, -+ int *replace); -+ -+#define ASSEMBLY_FUN(name) \ -+ static ctf_id_t assemble_ctf_##name(const char *module_name, \ -+ const char *file_name, \ -+ Dwarf_Die *die, \ -+ Dwarf_Die *parent_die, \ -+ ctf_file_t *ctf, \ -+ ctf_id_t parent_ctf_id, \ -+ const char *locerrstr, \ -+ struct die_override *overrides, \ -+ int top_level_type, \ -+ enum skip_type *skip, \ -+ int *replace) -+ -+/* -+ * Defined assembly functions. -+ */ -+ASSEMBLY_FUN(base); -+ASSEMBLY_FUN(array); -+ASSEMBLY_FUN(array_dimension); -+ASSEMBLY_FUN(cvr_qual); -+ASSEMBLY_FUN(enumeration); -+ASSEMBLY_FUN(enumerator); -+ASSEMBLY_FUN(pointer); -+ASSEMBLY_FUN(struct_union); -+ASSEMBLY_FUN(su_member); -+ASSEMBLY_FUN(typedef); -+ASSEMBLY_FUN(variable); -+ -+/* -+ * An assembly filter is an optional function called with the DIE and parent DIE -+ * of a top-level type alone, before calling down into the process_file() -+ * processing function: it can be used to rapidly determine that this DIE is not -+ * worth processing. (It should return 0 in this case, and nonzero otherwise.) -+ */ -+typedef int (*ctf_assembly_filter_fun)(const char *file_name, -+ Dwarf *dwarf, -+ Dwarf_Die *die, -+ Dwarf_Die *parent_die); -+ -+/* -+ * A CTF assembly filter function which excludes all types not at the global -+ * scope (i.e. whose immediate parent is not a CU DIE) and which does not have a -+ * structure or union as its ultimate dependent type. (All structures and -+ * unions and everything dependent on them must be recorded, even inside -+ * functions, because GCC may emit references to the opaque variants of those -+ * types from file scope.) -+ */ -+static int filter_ctf_file_scope(const char *file_name, -+ Dwarf *dwarf, -+ Dwarf_Die *die, -+ Dwarf_Die *parent_die); -+ -+/* -+ * A CTF assembly filter function which excludes all names not at the global -+ * scope, all static symbols, and all names whose names are unlikely to be -+ * interesting. (DTrace userspace contains a similar list, but the two lists -+ * need not be in sync.) -+ */ -+static int filter_ctf_uninteresting(const char *file_name, -+ Dwarf *dwarf, -+ Dwarf_Die *die, -+ Dwarf_Die *parent_die); -+ -+/* -+ * Error return values from CTF assembly functions. These differ only in that -+ * die_to_ctf() reports the ctf_errmsg() if CTF_NO_ERROR_REPORTED is returned, -+ * but says nothing in the CTF_ERROR_REPORTED case. -+ */ -+#define CTF_NO_ERROR_REPORTED CTF_ERR -+#define CTF_ERROR_REPORTED (-2L) -+ -+/* -+ * The total number of type errors encountered. -+ */ -+static long num_errors; -+ -+/* -+ * A mapping from DW_TAG_* to functions which assemble this DW_TAG_* and -+ * possibly its children into the passed CTF. This table is not used -+ * directly, but rather assembled into a lookup table. -+ */ -+static struct assembly_tab_t -+{ -+ int tag; -+ ctf_assembly_filter_fun filter; -+ ctf_assembly_fun fun; -+} assembly_tab_init[] = -+{{ DW_TAG_base_type, filter_ctf_file_scope, assemble_ctf_base }, -+ { DW_TAG_array_type, filter_ctf_file_scope, assemble_ctf_array }, -+ { DW_TAG_subrange_type, NULL, assemble_ctf_array_dimension }, -+ { DW_TAG_const_type, filter_ctf_file_scope, assemble_ctf_cvr_qual }, -+ { DW_TAG_restrict_type, filter_ctf_file_scope, assemble_ctf_cvr_qual }, -+ { DW_TAG_enumeration_type, NULL, assemble_ctf_enumeration }, -+ { DW_TAG_enumerator, NULL, assemble_ctf_enumerator }, -+ { DW_TAG_pointer_type, filter_ctf_file_scope, assemble_ctf_pointer }, -+ { DW_TAG_structure_type, NULL, assemble_ctf_struct_union }, -+ { DW_TAG_union_type, NULL, assemble_ctf_struct_union }, -+ { DW_TAG_member, NULL, assemble_ctf_su_member }, -+ { DW_TAG_typedef, NULL, assemble_ctf_typedef }, -+ { DW_TAG_variable, filter_ctf_uninteresting, assemble_ctf_variable }, -+ { DW_TAG_volatile_type, filter_ctf_file_scope, assemble_ctf_cvr_qual }, -+ { 0, NULL }}; -+ -+/* -+ * The CTF assembly and filter lookup tables, in constructed form. -+ */ -+static ctf_assembly_fun *assembly_tab; -+static ctf_assembly_filter_fun *assembly_filter_tab; -+static size_t assembly_len; -+ -+/* -+ * Populate the assembly_tab and assembly_filter_tab from the assembly_tab_init. -+ */ -+static void init_assembly_tab(void); -+ -+/* -+ * A mapping from sizeof() to CTF type encoding. -+ */ -+struct type_encoding_tab { -+ size_t size; -+ int ctf_encoding; -+}; -+ -+/* -+ * Given a type encoding table, and a size, return the CTF encoding for that -+ * type, or 0 if none. -+ */ -+static int find_ctf_encoding(struct type_encoding_tab *type_tab, size_t size); -+ -+/* -+ * Count the number of members of a DWARF aggregate. -+ */ -+static long count_dwarf_members(Dwarf_Die *die); -+ -+/* -+ * Given a DIE that may contain a type attribute, look up the target of that -+ * attribute and return it, or NULL if none. -+ */ -+static Dwarf_Die *private_dwarf_type(Dwarf_Die *die, Dwarf_Die *target_die); -+ -+/* -+ * Check for existence of an attribute in a DIE, chasing through -+ * DW_AT_specification if need be. -+ */ -+static inline int private_dwarf_hasattr(Dwarf_Die *die, -+ unsigned int search_name); -+ -+/* -+ * Return a DIE attribute, chasing through DW_AT_specification if need be. -+ */ -+static inline Dwarf_Attribute *private_dwarf_attr(Dwarf_Die *die, -+ unsigned int search_name, -+ Dwarf_Attribute *result); -+ -+/* -+ * Given a DIE that contains a udata attribute, look up that attribute and -+ * return its value (optionally overridden or modified by the die_overrides). -+ */ -+static inline Dwarf_Word private_dwarf_udata(Dwarf_Die *die, int attribute, -+ struct die_override *overrides); -+ -+/* -+ * Given a DIE, return its byte size, if known and interpretable, or -1 -+ * otherwise. -+ */ -+static inline long long private_dwarf_size(Dwarf_Die *die); -+ -+/* -+ * Find an override in an override list. -+ */ -+static struct die_override * -+private_find_override(Dwarf_Die *die, -+ int attribute, -+ struct die_override *overrides); -+ -+/* -+ * Determine the dimensions of an array subrange, or 0 if variable. -+ */ -+static Dwarf_Word private_subrange_dimensions(Dwarf_Die *die); -+ -+/* -+ * A string appender working on dynamic strings. -+ */ -+static char *str_append(char *s, const char *append) -+ __attribute__((__warn_unused_result__)); -+ -+/* -+ * A vararg string appender. -+ */ -+static char *str_appendn(char *s, ...) -+ __attribute__((__warn_unused_result__, sentinel)); -+ -+/* -+ * An error-checking strdup(). -+ */ -+static char *xstrdup(const char *s) __attribute__((__nonnull__, -+ __warn_unused_result__, -+ __malloc__)); -+ -+/* -+ * Filter a GList, calling a predicate on it and removing all elements for which -+ * the predicate returns true, calling the free_func on them if set. -+ */ -+typedef int (*filter_pred_fun) (void *element, void *data); -+static GList *list_filter(GList *list, filter_pred_fun fun, -+ GDestroyNotify free_func, void *data); -+ -+/* -+ * Intern an atom in the atoms table and return it, or free it and return the -+ * existing atom if one is already interned. (Despite the type signature, this -+ * return value is constant and should not be freed.) -+ */ -+static void *intern(char *atom); -+ -+/* -+ * Figure out the (pathless, suffixless) module name for a given module file (.o -+ * or .ko), and return it in a new dynamically allocated string. -+ * -+ * Takes the object_to_module mapping into account. -+ */ -+static char *fn_to_module(const char *file_name); -+ -+/* -+ * Determine, and cache, absolute filenames. -+ */ -+static const char *abs_file_name(const char *file_name); -+ -+/* -+ * Determine absolute filenames relative to some other directory: do not cache -+ * them. It is the caller's responsibility to free them. -+ */ -+static char *rel_abs_file_name(const char *file_name, const char *relative_to); -+ -+/* -+ * Free a per_module's contents. -+ */ -+static void private_per_module_free(void *per_module); -+ -+/* -+ * Free a dedup_id_file's contents. -+ */ -+static void free_dups_id_file(void *id_file); -+ -+/* -+ * Free a fn_to_die_to_parent subhash. -+ */ -+static void private_fn_die_parent_free(void *ptr); -+ -+/* -+ * dwarf_dieoffset() with a return type better for printf(). -+ */ -+#define DIEOFFSET(die) (unsigned long) dwarf_dieoffset((die)) -+ -+/* -+ * A line-shortener with a kernel-familiar name for fprintfing to stderr. -+ */ -+#define pr_err(fmt, ...) fprintf(stderr, fmt, ## __VA_ARGS__); -+ -+/* Initialization. */ -+ -+int main(int argc, char *argv[]) -+{ -+ char *output; -+ -+ trace = getenv("DWARF2CTF_TRACE"); -+ -+ if ((argc != 4 && argc != 7) || -+ (argc == 4 && strcmp(argv[2], "-e") != 0)) { -+ pr_err("Syntax: dwarf2ctf output-file srcdir objects.builtin\n"); -+ pr_err(" modules.builtin member.blacklist filelist\n"); -+ pr_err(" or dwarf2ctf output-dir -e filelist\n" -+ "for external module use\n"); -+ exit(1); -+ } -+ -+ output = argv[1]; -+ -+ elf_version(EV_CURRENT); -+ -+ if (elf_errno()) { -+ pr_err("Version synchronization fault: %s\n", -+ elf_errmsg(elf_errno())); -+ exit(1); -+ } -+ -+ init_assembly_tab(); -+ object_to_module = g_hash_table_new_full(g_str_hash, g_str_equal, -+ free, free); -+ -+ /* -+ * When not building an external module, we run over all the arguments -+ * at once, deduplicating them. In external-module mode, we act as if -+ * independently invoked with every argument. -+ */ -+ if (strcmp(argv[2], "-e") != 0) { -+ const char *srcdir; -+ char *builtin_objects_file; -+ char *builtin_module_file; -+ char *member_blacklist_file; -+ -+ srcdir = argv[2]; -+ builtin_objects_file = argv[3]; -+ builtin_module_file = argv[4]; -+ member_blacklist_file = argv[5]; -+ deduplicating = 1; -+ -+ init_builtin(builtin_objects_file, builtin_module_file); -+ init_member_blacklist(member_blacklist_file, srcdir); -+ init_object_names(argv[6]); -+ -+ run(output, 0); -+ } else { -+ char *single_object_name; -+ char **all_object_names; -+ size_t all_object_names_cnt; -+ size_t i; -+ -+ deduplicating = 0; -+ init_object_names(argv[3]); -+ -+ /* -+ * Repeatedly populate object_names with one object name, and -+ * call run() with that. -+ */ -+ all_object_names = object_names; -+ all_object_names_cnt = object_names_cnt; -+ object_names = &single_object_name; -+ object_names_cnt = 1; -+ -+ for (i = 0; i < all_object_names_cnt; i++) { -+ single_object_name = all_object_names[i]; -+ -+ run(output, 1); -+ } -+ } -+ -+ g_hash_table_destroy(object_to_module); -+ -+ if (num_errors > 0) -+ pr_err("%li CTF construction errors.\n", num_errors); -+ -+ return 0; -+} -+ -+/* -+ * Run dwarf2ctf over a single object file or set thereof. -+ * -+ * output is the directory into which the CTF goes, if 'standalone', or the -+ * CTF archive file name otherwise. -+ */ -+static void run(char *output, int standalone) -+{ -+ size_t i; -+ -+ /* -+ * Create all the hashes, assemble the translation unit->module list for -+ * builtin modules, and create the shared CTF file if deduplicating. -+ */ -+ -+ atoms = g_hash_table_new_full(g_str_hash, g_str_equal, free, NULL); -+ id_to_type = g_hash_table_new_full(g_str_hash, g_str_equal, -+ NULL, free); -+ id_to_module = g_hash_table_new_full(g_str_hash, g_str_equal, -+ NULL, free); -+ per_module = g_hash_table_new_full(g_str_hash, g_str_equal, free, -+ private_per_module_free); -+ variable_blacklist = g_hash_table_new_full(g_str_hash, g_str_equal, -+ free, free); -+ fn_to_die_to_parent = g_hash_table_new_full(g_str_hash, -+ g_str_equal, free, -+ private_fn_die_parent_free); -+ -+ dw_ctf_trace("Initializing...\n"); -+ -+ if (deduplicating) -+ init_ctf_table("shared_ctf"); -+ -+ scan_dups(); -+ -+ /* -+ * Now construct CTF out of the types. -+ */ -+ dw_ctf_trace("CTF construction.\n"); -+ for (i = 0; i < object_names_cnt; i++) -+ process_file(object_names[i], construct_ctf, NULL, NULL, NULL); -+ -+ /* -+ * Finally, emit the types into their .ctf files, and generate the -+ * necessary linker scripts. -+ */ -+ dw_ctf_trace("Writeout.\n"); -+ write_types(output, standalone); -+ -+ g_hash_table_destroy(id_to_type); -+ g_hash_table_destroy(id_to_module); -+ g_hash_table_destroy(per_module); -+ g_hash_table_destroy(variable_blacklist); -+ g_hash_table_destroy(fn_to_die_to_parent); -+ g_hash_table_destroy(atoms); -+} -+ -+/* -+ * Populate the object_names list from the module filelist. -+ */ -+static void init_object_names(const char *object_names_file) -+{ -+ FILE *f; -+ char *line = NULL; -+ size_t line_size = 0; -+ -+ f = fopen(object_names_file, "r"); -+ if (f == NULL) { -+ pr_err("Cannot open object names file %s: %s\n", -+ object_names_file, strerror(errno)); -+ exit(1); -+ } -+ -+ /* -+ * This needs no massaging other than linefeed removal, just reading and -+ * stashing. -+ */ -+ -+ while (getline(&line, &line_size, f) >= 0) { -+ size_t len = strlen(line); -+ -+ if (len == 0) -+ continue; -+ -+ if (line[len-1] == '\n') -+ line[len-1] = '\0'; -+ -+ object_names = realloc(object_names, -+ ++object_names_cnt * -+ sizeof(char *)); -+ -+ if (object_names == NULL) { -+ pr_err("Out of memory reading %s\n", object_names_file); -+ exit(1); -+ } -+ -+ object_names[object_names_cnt-1] = xstrdup(line); -+ } -+ free(line); -+ -+ if (ferror(f)) { -+ pr_err("Error reading from %s: %s\n", object_names_file, -+ strerror(errno)); -+ exit(1); -+ } -+ -+ fclose(f); -+} -+ -+/* -+ * Populate object_to_module from the objects.builtin and modules.builtin file. -+ */ -+static void init_builtin(const char *builtin_objects_file, -+ const char *builtin_module_file) -+{ -+ FILE *f; -+ struct modules_thick_iter *i; -+ char *line = NULL; -+ size_t line_size = 0; -+ char *module_name = NULL; -+ char **paths; -+ -+ /* -+ * Iterate over all modules in modules_thick.builtin and add each to -+ * object_to_module. -+ */ -+ i = modules_thick_iter_new(builtin_module_file); -+ if (i == NULL) { -+ pr_err("Cannot iterate over builtin module file.\n"); -+ exit(1); -+ } -+ -+ while ((paths = modules_thick_iter_next(i, &module_name)) != NULL) { -+ size_t j; -+ -+ for (j = 0; paths[j] != NULL; j++) { -+ dw_ctf_trace("noting built-in module mapping %s -> %s\n", -+ module_name, paths[j]); -+ g_hash_table_replace(object_to_module, -+ strdup(paths[j]), -+ xstrdup(module_name)); -+ } -+ free(paths); -+ } -+ free(module_name); -+ modules_thick_iter_free(i); -+ -+ f = fopen(builtin_objects_file, "r"); -+ if (f == NULL) { -+ pr_err("Cannot open builtin objects file %s: %s\n", -+ builtin_objects_file, strerror(errno)); -+ exit(1); -+ } -+ -+ /* -+ * Those entries in builtin.objects that are not already known are -+ * unconditionally-built-in object files. -+ */ -+ while (getline(&line, &line_size, f) >= 0) { -+ size_t len = strlen(line); -+ -+ if (len == 0) -+ continue; -+ -+ if (line[len-1] == '\n') -+ line[len-1] = '\0'; -+ -+ if (!g_hash_table_lookup(object_to_module, line)) -+ g_hash_table_replace(object_to_module, xstrdup(line), -+ xstrdup("vmlinux")); -+ } -+ -+ if (ferror(f)) { -+ pr_err("Error reading from %s: %s\n", builtin_objects_file, -+ strerror(errno)); -+ exit(1); -+ } -+ -+ free(line); -+ fclose(f); -+} -+ -+/* -+ * Translate the assembly lookup table into the assembly_tab and -+ * assembly_filter_tab arrays. -+ */ -+static void init_assembly_tab(void) -+{ -+ struct assembly_tab_t *walk; -+ -+ for (walk = assembly_tab_init; walk->fun != NULL; walk++) { -+ if (assembly_len < walk->tag) -+ assembly_len = walk->tag; -+ } -+ -+ assembly_tab = calloc(sizeof(ctf_assembly_fun *), assembly_len + 1); -+ assembly_filter_tab = calloc(sizeof(ctf_assembly_filter_fun *), -+ assembly_len + 1); -+ if ((assembly_tab == NULL) || (assembly_filter_tab == NULL)) { -+ pr_err("Out of memory allocating assembly table\n"); -+ exit(1); -+ } -+ -+ for (walk = assembly_tab_init; walk->fun != NULL; walk++) { -+ assembly_tab[walk->tag] = walk->fun; -+ assembly_filter_tab[walk->tag] = walk->filter; -+ } -+} -+ -+/* -+ * Populate the member blacklist from the member_blacklist file. -+ */ -+static void init_member_blacklist(const char *member_blacklist_file, -+ const char *srcdir) -+{ -+ FILE *f; -+ char *line = NULL; -+ size_t line_num = 0; -+ size_t line_size = 0; -+ -+ /* -+ * Not having a member blacklist is not an error. -+ */ -+ f = fopen(member_blacklist_file, "r"); -+ if (f == NULL) -+ return; -+ -+ member_blacklist = g_hash_table_new(g_str_hash, g_str_equal); -+ -+ while (getline(&line, &line_size, f) >= 0) { -+ size_t len = strlen(line); -+ char *last_colon; -+ const char *last_dot; -+ char *absolutized; -+ -+ line_num++; -+ -+ if (len == 0) -+ continue; -+ -+ if (line[len-1] == '\n') -+ line[len-1] = '\0'; -+ -+ last_colon = strrchr(line, ':'); -+ last_dot = strrchr(last_colon + 1, '.'); -+ if (!last_colon || !last_dot) { -+ pr_err("Syntax error on line %li of %s.\n" -+ "Syntax: filename:structure.member.\n", -+ line_num, member_blacklist_file); -+ continue; -+ } -+ -+ *last_colon = '\0'; -+ last_colon++; -+ absolutized = rel_abs_file_name(line, srcdir); -+ absolutized = str_appendn(absolutized, ":", last_colon, NULL); -+ -+ g_hash_table_insert(member_blacklist, absolutized, NULL); -+ } -+ free(line); -+ -+ if (ferror(f)) { -+ pr_err("Error reading from %s: %s\n", member_blacklist_file, -+ strerror(errno)); -+ exit(1); -+ } -+ -+ fclose(f); -+} -+ -+/* -+ * Return 1 if a given DWARF DIE, which must be a DW_TAG_member, appears in the -+ * member blacklist. -+ */ -+static int member_blacklisted(Dwarf_Die *die, Dwarf_Die *parent_die) -+{ -+ const char *fname = dwarf_decl_file(die); -+ char *id; -+ int blacklisted = 0; -+ -+ /* -+ * If there is no member blacklist, do nothing. -+ */ -+ if (!member_blacklist) -+ return 0; -+ -+ /* -+ * Unnamed structure and union members cannot be blacklisted, for now. -+ */ -+ if ((dwarf_diename(parent_die) == NULL) || -+ (dwarf_diename(die) == NULL)) -+ return 0; -+ -+ /* -+ * The compiler can define its own structures, which appear in no -+ * decl_file. -+ * -+ * We can't blacklist them with this mechanism, so skip them. -+ */ -+ if (__builtin_expect(fname == NULL, 0)) -+ return 0; -+ -+ fname = abs_file_name(fname); -+ -+ if (dwarf_tag(die) != DW_TAG_member || -+ (dwarf_tag(parent_die) != DW_TAG_structure_type && -+ dwarf_tag(parent_die) != DW_TAG_union_type)) { -+ pr_err("Warning: member_blacklisted() called on " -+ "%s:%s.%s at offset %li, which is not a structure member.\n", -+ fname, dwarf_diename(parent_die), dwarf_diename(die), -+ DIEOFFSET(die)); -+ return 0; -+ } -+ -+ id = xstrdup(fname); -+ id = str_appendn(id, ":", dwarf_diename(parent_die), ".", -+ dwarf_diename(die), NULL); -+ -+ if (g_hash_table_lookup_extended(member_blacklist, id, NULL, NULL)) -+ blacklisted = 1; -+ -+ free(id); -+ return blacklisted; -+} -+ -+/* -+ * Initialize a CTF type table, and possibly fill it with those special types -+ * that appear in CTF but not in DWARF (such as 'void'). (This filling happens -+ * only for the type table named "shared_ctf", unless deduplication is turned -+ * off.) -+ * -+ * If this is a local type table, and deduplication is active, make the global -+ * type table its parent. -+ */ -+static void init_ctf_table(const char *module_name) -+{ -+ ctf_file_t *ctf_file; -+ struct per_module *new_per_mod; -+ int ctf_err; -+ -+ ctf_file = ctf_create(&ctf_err); -+ if (ctf_file == NULL) { -+ pr_err("Cannot create CTF file: %s\n", strerror(ctf_err)); -+ exit(1); -+ } -+ new_per_mod = malloc(sizeof(struct per_module)); -+ if (new_per_mod == NULL) { -+ pr_err("Out of memory allocating per-module CTF info\n"); -+ exit(1); -+ } -+ -+ new_per_mod->ctf_file = ctf_file; -+ new_per_mod->member_counts = g_hash_table_new_full(g_str_hash, -+ g_str_equal, -+ free, free); -+ g_hash_table_replace(per_module, xstrdup(module_name), new_per_mod); -+ -+ dw_ctf_trace("Initializing module: %s\n", module_name); -+ if ((strcmp(module_name, "shared_ctf") == 0) || -+ !deduplicating) { -+ ctf_encoding_t void_encoding = { CTF_INT_SIGNED, 0, 0 }; -+ ctf_encoding_t int_encoding = { CTF_INT_SIGNED, 0, -+ sizeof(int) * 8 }; -+ ctf_id_t int_type; -+ ctf_id_t func_type; -+ ctf_funcinfo_t func_info; -+ -+ /* -+ * Global types module, or deduplication is disabled. Add a -+ * type for 'void *' to point to, and a type for the return -+ * value of pointers to functions: then add the (single, -+ * universal) pointer-to-function value. -+ */ -+ ctf_void_type = ctf_add_integer(ctf_file, CTF_ADD_ROOT, -+ "void", &void_encoding); -+ int_type = ctf_add_integer(ctf_file, CTF_ADD_ROOT, "int", -+ &int_encoding); -+ mark_shared_by_name(ctf_file, ctf_void_type, "void"); -+ mark_shared_by_name(ctf_file, int_type, "int"); -+ -+ func_info.ctc_return = int_type; -+ func_info.ctc_argc = 0; -+ func_info.ctc_flags = 0; -+ func_type = ctf_add_function(ctf_file, CTF_ADD_ROOT, -+ &func_info, NULL); -+ ctf_funcptr_type = ctf_add_pointer(ctf_file, CTF_ADD_ROOT, -+ func_type); -+ -+ if (ctf_update(ctf_file) < 0) { -+ pr_err("Cannot initialize shared CTF file: %s\n", -+ ctf_errmsg(ctf_errno(ctf_file))); -+ exit(1); -+ } -+ } else { -+ /* -+ * Local types module with deduplication enabled: point the -+ * parent at the global CTF file, which must exist by this -+ * point. -+ */ -+ if (ctf_import(ctf_file, lookup_ctf_file("shared_ctf")) < 0) { -+ pr_err("Cannot set parent of CTF file for module %s: %s\n", -+ module_name, ctf_errmsg(ctf_errno(ctf_file))); -+ exit(1); -+ } -+ ctf_parent_name_set(ctf_file, "shared_ctf"); -+ } -+ -+ dw_ctf_trace("Created CTF file for module %s: %p\n", -+ module_name, ctf_file); -+} -+ -+/* DWARF walkers. */ -+ -+/* -+ * Initialize the child->parent DIE mapping for a single file. -+ */ -+static void init_parent_die(const char *file_name, Dwfl *dwfl) -+{ -+ GHashTable *offs; -+ Dwarf_Die *tu_die = NULL; -+ Dwarf_Addr junk; -+ -+ offs = g_hash_table_new(g_direct_hash, g_direct_equal); -+ if (offs == NULL) { -+ pr_err("Out of memory creating DIE offset hash\n"); -+ exit(1); -+ } -+ -+ while ((tu_die = dwfl_nextcu(dwfl, tu_die, &junk)) != NULL) { -+ init_parent_die_internal(file_name, offs, tu_die, 0, 0); -+ } -+ -+ g_hash_table_insert(fn_to_die_to_parent, -+ strdup(abs_file_name(file_name)), offs); -+} -+ -+/* -+ * Initialize one layer of a child->parent mapping. -+ * -+ * We traverse children of top-level subprograms hunting for anything we know -+ * how to emit, and record parent->child mappings for all intermediate DIEs. -+ */ -+static int init_parent_die_internal(const char *file_name, -+ GHashTable *offs, Dwarf_Die *parent, -+ int depth, int found_subprogram) -+{ -+ Dwarf_Die child; -+ int sib_ret; -+ Dwarf_Off parent_offset; -+ const char *err; -+ int add_parent = 0; -+ -+ if (dwarf_tag(parent) == DW_TAG_subprogram) -+ found_subprogram = 1; -+ -+ switch (dwarf_child(parent, &child)) { -+ case -1: -+ err = "child DIEs"; -+ goto err; -+ case 1: /* This DIE has no children */ -+ goto out; -+ } -+ -+ parent_offset = dwarf_dieoffset(parent); -+ -+ do { -+ int add_child = 0; -+ -+ /* -+ * Add links from the parent to all children for which a -+ * recursive call says they should be added, and note that we -+ * should add links to the parent too. Always look down to -+ * depth 2, since the topmost level is always -+ * DW_TAG_compile_unit, and we are interested in -+ * DW_TAG_subprograms one level below that. -+ */ -+ if (found_subprogram || depth < 2) -+ add_child = init_parent_die_internal(file_name, offs, -+ &child, depth+1, -+ found_subprogram); -+ -+ if (add_child) { -+ g_hash_table_insert(offs, -+ GUINT_TO_POINTER(dwarf_dieoffset(&child)), -+ GUINT_TO_POINTER(parent_offset)); -+ add_parent = 1; -+ } -+ } while ((sib_ret = dwarf_siblingof (&child, &child)) == 0); -+ -+ if (sib_ret == -1) { -+ err = "sibling DIEs"; -+ goto err; -+ } -+ -+out: -+ /* -+ * Emit a link for the next level up if we're under a subprogram and -+ * either we emitted a child link or the parent is itself something we -+ * know how to emit (and thus might possibly appear in a type DIE we -+ * care about). -+ */ -+ return (found_subprogram && -+ (add_parent || -+ (dwarf_tag(parent) <= assembly_len && -+ assembly_tab[dwarf_tag(parent)] != NULL))); -+err: -+ pr_err("Cannot fetch %s of DIE at offset %lu in %s: %s\n", -+ err, DIEOFFSET(parent), file_name, -+ dwarf_errmsg(dwarf_errno())); -+ exit(1); -+} -+ -+/* -+ * Type ID computation. -+ * -+ * A type ID is a constant, recursively-constructed, dynamically-allocated -+ * string describing a given DWARF DIE in such a way that any DWARF file -+ * containing the same type will have the same type ID. (It even works for -+ * variables! Variables of the same name and referring to the same type have -+ * the same ID...) -+ * -+ * Optionally, call a callback with the computed ID once we know it (this is a -+ * recursive process, so the callback can be called multiple times as the ID is -+ * built up). -+ * -+ * An ID of NULL indicates that this DIE has no ID and need not be considered. -+ * -+ * It is probably an error for two DWARF DIEs representing top-level types to -+ * return the same ID, but for certain other DIEs (notably those representing -+ * the members of structures or unions), it is expected that they return the -+ * same ID as their type DIE. -+ * -+ * This function is the hottest hot spot in dwarf2ctf, so is somewhat -+ * aggressively optimized. -+ * -+ * The "overrides" allow the overriding of DWARF attributes, so that the -+ * machinery notices different DWARF from what actually appears in the -+ * debuginfo, so that the CTF that is emitted is suitably modified (and possibly -+ * duplicated). This is mostly used by type_id() to generate different IDs for -+ * dependent types of bitfields, but can be used for other purposes too, such as -+ * adjusting the offsets of types in unnamed structures, etc. Overrides are -+ * passed down if provided: overrides relating to bitfields are only applied by -+ * type_id() if no other overrides are provided. -+ * -+ * In general, you do not need to pass overrides down if you know you will only -+ * be called directly on top-level DIEs, but otherwise, you should do so. -+ */ -+static char *type_id(Dwarf_Die *die, -+ struct die_override *overrides, -+ void (*fun)(Dwarf_Die *die, -+ const char *id, -+ struct die_override *overrides, -+ void *data), -+ void *data) -+{ -+ return type_id_internal(die, overrides, fun, data, 0); -+} -+ -+/* -+ * Internal: generate the type ID for a type DIE. -+ * -+ * If there are no overrides, look for a bit_size and bit_offset and pass them -+ * down as well. -+ */ -+static char *type_id_type_die(Dwarf_Die *die, -+ Dwarf_Die *type_die, -+ struct die_override *overrides, -+ void (*fun)(Dwarf_Die *die, -+ const char *id, -+ struct die_override *overrides, -+ void *data), -+ void *data) -+{ -+ char *id; -+ -+ /* -+ * bit_size and bit_offset go together: we can assume that if a member -+ * has the one, it has the other. -+ */ -+ -+ if (private_dwarf_hasattr(die, DW_AT_bit_size)) { -+ Dwarf_Word size; -+ Dwarf_Word offset; -+ -+ size = private_dwarf_udata(die, DW_AT_bit_size, NULL); -+ offset = private_dwarf_udata(die, DW_AT_bit_offset, NULL); -+ struct die_override o[] = { -+ { DW_TAG_base_type, -+ DW_AT_bit_size, -+ DIE_OVERRIDE_REPLACE, -+ size, NULL }, -+ { DW_TAG_base_type, -+ DW_AT_bit_offset, -+ DIE_OVERRIDE_REPLACE, -+ offset, overrides }, -+ {0} -+ }; -+ id = type_id(type_die, o, fun, data); -+ } else -+ id = type_id(type_die, overrides, fun, data); -+ return id; -+} -+ -+/* -+ * Internal: allows flags to be passed to affect one (and only one) type ID -+ * recursion, without affecting other type_id()s launched from the 'fun'. -+ */ -+static char *type_id_internal(Dwarf_Die *die, -+ struct die_override *overrides, -+ void (*fun)(Dwarf_Die *die, -+ const char *id, -+ struct die_override *overrides, -+ void *data), -+ void *data, -+ int flags) -+{ -+ char *id = NULL; -+ int no_type_id = 0; -+ int decorated = 1; -+ -+ /* -+ * The ID of a null pointer is NULL. -+ */ -+ if (die == NULL) -+ return NULL; -+ -+ /* -+ * The ID of a function pointer is '//fp//', as a special case, -+ * with no location or overrides, ever. -+ */ -+ if (dwarf_tag(die) == DW_TAG_subroutine_type) { -+ id = xstrdup("//fp//"); -+ if (fun) -+ fun(die, id, NULL, data); -+ return id; -+ } -+ -+ /* -+ * If we have a type DIE, generate it first, passing any overrides down. -+ * (Base types and enumerations don't have a type DIE that CTF can -+ * encode the type of in any useful fashion.) -+ * -+ * Otherwise, note the location of this DIE, providing scoping -+ * information for all types based upon this one. Location elements are -+ * separated by //, an element impossible in a Linux path. The -+ * blacklist type prefix (if set) follows this (which is a name which, -+ * while not impossible in a Linux path, is very unlikely.) -+ * -+ * Array dimensions get none of this: they must be contained within -+ * another DIE, so will always have a location attached via that DIE, -+ * and get their type chased further down (so as to arrange that they -+ * appear inside an [].) -+ */ -+ if (dwarf_tag(die) != DW_TAG_subrange_type) { -+ if ((dwarf_tag(die) != DW_TAG_base_type) && -+ (dwarf_tag(die) != DW_TAG_enumeration_type)) { -+ Dwarf_Die type_die; -+ Dwarf_Die *diep = private_dwarf_type(die, &type_die); -+ -+ if (diep) -+ id = type_id_type_die(die, diep, overrides, -+ fun, data); -+ } -+ -+ /* -+ * Location information. We use cached realpath() results, and -+ * call str_appendn() only once, minimizing the number of -+ * strlen()s. -+ */ -+ if (id == NULL) { -+ const char *decl_file_name = dwarf_decl_file(die); -+ int decl_line_num; -+ const char *fname = ""; -+ char line_num[21] = ""; /* > than 2^64's digit count */ -+ -+ no_type_id = 1; -+ if (decl_file_name != NULL) -+ fname = abs_file_name(decl_file_name); -+ -+ if (dwarf_decl_line(die, &decl_line_num) >= 0) { -+ snprintf(line_num, sizeof(line_num), "%i", -+ decl_line_num); -+ } -+ id = str_appendn(id, fname, "//", line_num, "//", NULL); -+ } -+ } -+ -+ /* -+ * We implement this via a switch statement, rather than a jump table -+ * like the assembly_tab, simply because most cases are so small that -+ * splitting them into separate functions would do more harm than good -+ * to readability. -+ * -+ * WARNING: The spaces in the strings in this switch statement are not -+ * just for appearance: types with spaces in their names are impossible -+ * in C. If you move those spaces around for appearance's sake, please -+ * adjust mark_shared_by_name and dedup_alias_fixup(), which -+ * construct the IDs of basic types, structures, and unions by hand. -+ */ -+ switch (dwarf_tag(die)) { -+ case DW_TAG_base_type: { -+ Dwarf_Word bit_size = -1; -+ Dwarf_Word type_size = -1; -+ Dwarf_Word bit_offset = -1; -+ const char *diename = dwarf_diename(die); -+ -+ if ((flags & TI_COLLAPSE_SIZETYPE) && -+ (strcmp(diename, "long unsigned int") == 0)) -+ diename = "sizetype"; -+ -+ /* -+ * CTF encodes the size and bitwise-offset of bit-fields in the -+ * base type, so it must be stored once for each size, even if -+ * it only appears once for all sizes in the DWARF. -+ */ -+ if (private_dwarf_hasattr(die, DW_AT_bit_size) || -+ private_find_override(die, DW_AT_bit_size, -+ overrides)) -+ bit_size = private_dwarf_udata(die, DW_AT_bit_size, -+ overrides); -+ if (private_dwarf_hasattr(die, DW_AT_bit_offset) || -+ private_find_override(die, DW_AT_bit_offset, -+ overrides)) -+ bit_offset = private_dwarf_udata(die, DW_AT_bit_offset, -+ overrides); -+ -+ /* -+ * Bitfields that occupy their entire containing type are not -+ * bitfields, but just redundant DWARF. GCC emits these now and -+ * again, but the dups would trip CTF consistency checks, so -+ * must be skipped. -+ */ -+ if (bit_size > -1) { -+ /* -+ * This "may be omitted" in DWARF, but GCC doesn't: -+ * bitfields always get both. (See -+ * gcc/dwarf2out.c:gen_field_die().) -+ */ -+ type_size = private_dwarf_udata(die, DW_AT_bit_size, -+ overrides); -+ } -+ if (bit_size != type_size) { -+ char bitsize[22]; /* > 2^64's digit count */ -+ char bitoffset[22]; /* > 2^64's digit count */ -+ -+ snprintf(bitsize, sizeof(bitsize), "%li", bit_size); -+ id = str_appendn(id, diename, ":", bitsize, NULL); -+ if (bit_offset != -1) { -+ snprintf(bitoffset, sizeof(bitoffset), "%li", -+ bit_offset); -+ id = str_appendn(id, ":", bitoffset, NULL); -+ } -+ id = str_append(id, " "); -+ } else { -+ /* -+ * Ordinary (non-bit-field) base type. -+ */ -+ id = str_appendn(id, diename, " ", NULL); -+ } -+ break; -+ } -+ case DW_TAG_enumeration_type: -+ id = str_appendn(id, "enum ", dwarf_diename(die), " ", NULL); -+ break; -+ case DW_TAG_structure_type: -+ case DW_TAG_union_type: { -+ /* -+ * Incorporate the unaligned sizeof() the structure, if -+ * statically known (the offset of the last member in the DWARF) -+ * so that most structures which are redefined on the fly by -+ * preprocessor defines are disambiguated despite being defined -+ * in the same place. -+ * -+ * Only do this if this is a non-opaque structure/union -+ * definition: opaque definitions cannot have a size, but if -+ * they do by some mischance get one, notating it will mess up -+ * the several other places that manually construct opaque -+ * structure identifiers (and cannot incorporate a size, since -+ * they don't know it). -+ */ -+ const char *sou; -+ -+ if (strncmp(id, "////", 4) != 0) { -+ long long size; -+ char byte_size[24]; -+ -+ size = private_dwarf_size(die); -+ if (size > -1) { -+ sprintf(byte_size, "%lli", size); -+ id = str_appendn(id, byte_size, "//", NULL); -+ } -+ } -+ -+ if (dwarf_tag(die) == DW_TAG_union_type) -+ sou = "union "; -+ else -+ sou = "struct "; -+ -+ id = str_appendn(id, sou, dwarf_diename(die), " ", NULL); -+ break; -+ } -+ case DW_TAG_variable: -+ id = str_appendn(id, "var ", dwarf_diename(die), " ", NULL); -+ break; -+ case DW_TAG_typedef: -+ id = str_appendn(id, "typedef ", dwarf_diename(die), " ", NULL); -+ break; -+ case DW_TAG_const_type: -+ id = str_append(id, "const "); -+ break; -+ case DW_TAG_restrict_type: -+ id = str_append(id, "restrict "); -+ break; -+ case DW_TAG_volatile_type: -+ id = str_append(id, "volatile "); -+ break; -+ case DW_TAG_pointer_type: -+ if (no_type_id) -+ id = str_append(id, "void "); -+ id = str_append(id, "* "); -+ break; -+ -+ case DW_TAG_array_type: { -+ /* -+ * No explicit notation: all done per-dimension: so recurse to -+ * those. -+ */ -+ -+ int sib_ret; -+ int dimens = 0; -+ Dwarf_Die dim_die; -+ -+ switch (dwarf_child(die, &dim_die)) { -+ case -1: -+ pr_err("Corrupt DWARF: Cannot get array dimensions: %s\n", -+ dwarf_errmsg(dwarf_errno())); -+ exit(1); -+ case 1: /* No dimensions. */ -+ id = str_append(id, "[] "); -+ break; -+ default: -+ dimens = 1; -+ } -+ -+ if (!dimens) -+ break; -+ -+ do { -+ char *sub_id = type_id_internal(&dim_die, overrides, -+ fun, data, -+ TI_COLLAPSE_SIZETYPE); -+ id = str_append(id, sub_id); -+ free(sub_id); -+ } while ((sib_ret = dwarf_siblingof(&dim_die, &dim_die)) == 0); -+ -+ if (sib_ret == -1) { -+ pr_err("Corrupt DWARF: Cannot get array dimensions: %s\n", -+ dwarf_errmsg(dwarf_errno())); -+ exit(1); -+ } -+ break; -+ } -+ case DW_TAG_subrange_type: { -+ Dwarf_Word nelems = private_subrange_dimensions(die); -+ -+ id = str_append(id, "["); -+ -+ if (nelems > 0) { -+ Dwarf_Die type_die; -+ char elems[22]; /* bigger than 2^64's digit count */ -+ char *sub_id = type_id_internal(private_dwarf_type(die, &type_die), -+ overrides, fun, data, -+ TI_COLLAPSE_SIZETYPE); -+ -+ snprintf(elems, sizeof(elems), " %li", nelems); -+ id = str_appendn(id, sub_id, elems, NULL); -+ free(sub_id); -+ } -+ id = str_append(id, "] "); -+ break; -+ } -+ default: -+ /* -+ * Some tags (e.g. structure members) get the same ID as their -+ * associated type. We don't need to call the hook function -+ * again for such tags. -+ */ -+ decorated = 0; -+ } -+ -+ if (fun && decorated) -+ fun(die, id, overrides, data); -+ -+ return id; -+} -+ -+/* -+ * Process a file, calling the dwarf_process function for every top-level type -+ * found therein. Optionally call tu_init() at the start of each translation -+ * unit, and tu_done() at the end. -+ */ -+static void process_file(const char *file_name, -+ void (*dwarf_process)(const char *module_name, -+ const char *file_name, -+ Dwarf_Die *die, -+ Dwarf_Die *parent_die, -+ void *data), -+ void (*tu_init)(const char *module_name, -+ const char *file_name, -+ Dwarf_Die *tu_die, -+ void *data), -+ void (*tu_done)(const char *module_name, -+ const char *file_name, -+ Dwarf_Die *tu_die, -+ void *data), -+ void *data) -+{ -+ const char *err; -+ char *fn_module_name = fn_to_module(file_name); -+ const char *module_name = fn_module_name; -+ -+ Dwfl_Module *mod; -+ Dwfl *dwfl; -+ Dwarf *dwarf; -+ GHashTable *seen_before = g_hash_table_new_full(g_str_hash, g_str_equal, -+ free, free); -+ Dwarf_Die *tu_die = NULL; -+ Dwarf_Addr junk; -+ -+ if (seen_before == NULL) { -+ pr_err("Out of memory creating seen_before hash\n"); -+ exit(1); -+ } -+ -+ dwfl = simple_dwfl_new(file_name, &mod); -+ dwarf = dwfl_module_getdwarf(mod, &junk); -+ -+ /* -+ * On first traversal, make sure the DIE parent mapping is populated, -+ * so that filter_ctf_file_scope can use it. -+ */ -+ if (!g_hash_table_lookup_extended(fn_to_die_to_parent, -+ abs_file_name(file_name), -+ NULL, NULL)) -+ init_parent_die(file_name, dwfl); -+ -+ while ((tu_die = dwfl_nextcu(dwfl, tu_die, &junk)) != NULL) { -+ const char *tu_name; -+ -+ if (dwarf_tag(tu_die) != DW_TAG_compile_unit) { -+ err = "Malformed DWARF: non-compile_unit at top level"; -+ goto fail; -+ } -+ -+ tu_name = dwarf_diename(tu_die); -+ -+ dw_ctf_trace("Processing %s\n", tu_name); -+ -+ /* -+ * If we have seen this TU before, skip it. We assume that -+ * types in multiple identical TUs are always entirely -+ * identical. This lets us skip cases where the same object -+ * file is linked in multiple places without scanning every type -+ * in it. (Note: this may be inaccurate if a TU is built -+ * repeatedly with different #defines in force. I hope this -+ * cannot happen, but if it does, a workaround a-la libtool is -+ * simple: rename or symlink the TU for such repeated builds.) -+ * -+ * Otherwise, note the name of the module to which this TU maps, -+ * if it is not already known: otherwise, extract that name. -+ * -+ * This is purely an optimization: it breaks somewhat for -+ * multifile modules but this has no effect but a slight -+ * slowdown. -+ */ -+ if (g_hash_table_lookup_extended(seen_before, tu_name, -+ NULL, NULL)) -+ continue; -+ -+ g_hash_table_replace(seen_before, xstrdup(tu_name), NULL); -+ -+ /* -+ * We are only interested in top-level definitions within each -+ * TU. -+ */ -+ Dwarf_Die die; -+ -+ switch (dwarf_child(tu_die, &die)) { -+ case -1: -+ err = "fetch first child of TU"; -+ goto fail; -+ case 1: /* No DIEs at all in this TU */ -+ continue; -+ default: /* Child DIEs exist. */ -+ break; -+ } -+ -+ if (tu_init != NULL) -+ tu_init(module_name, file_name, tu_die, data); -+ -+ process_tu_func(module_name, file_name, dwarf, tu_die, &die, -+ dwarf_process, data); -+ -+ if (tu_done != NULL) -+ tu_done(module_name, file_name, tu_die, data); -+ } -+ -+ free(fn_module_name); -+ simple_dwfl_free(dwfl); -+ g_hash_table_destroy(seen_before); -+ -+ return; -+ -+ fail: -+ pr_err("Cannot %s for %s: %s\n", err, module_name, -+ dwarf_errmsg(dwarf_errno())); -+ exit(1); -+} -+ -+/* -+ * process_file() helper, walking over the top level and picking up types -+ * therein. -+ */ -+static void process_tu_func(const char *module_name, -+ const char *file_name, -+ Dwarf *dwarf, -+ Dwarf_Die *parent_die, -+ Dwarf_Die *die, -+ void (*dwarf_process)(const char *module_name, -+ const char *file_name, -+ Dwarf_Die *die, -+ Dwarf_Die *parent_die, -+ void *data), -+ void *data) -+{ -+ const char *err; -+ int sib_ret; -+ -+ /* -+ * We are only interested in definitions for which we can (eventually) -+ * emit CTF: call the processing function for all such. -+ */ -+ do { -+ if ((dwarf_tag(die) <= assembly_len) && -+ (assembly_filter_tab[dwarf_tag(die)] == NULL || -+ assembly_filter_tab[dwarf_tag(die)](file_name, dwarf, die, -+ parent_die)) && -+ (assembly_tab[dwarf_tag(die)] != NULL)) -+ dwarf_process(module_name, file_name, die, -+ parent_die, data); -+ } while ((sib_ret = dwarf_siblingof(die, die)) == 0); -+ -+ if (sib_ret == -1) { -+ err = "fetch sibling"; -+ goto fail; -+ } -+ -+ return; -+ fail: -+ pr_err("Cannot %s for %s: %s\n", err, module_name, -+ dwarf_errmsg(dwarf_errno())); -+ exit(1); -+} -+ -+/* Duplicate detection. */ -+ -+/* -+ * Scan and identify duplicates across the entire set of object files. -+ */ -+static void scan_dups(void) -+{ -+ size_t i; -+ -+ /* -+ * First, determine which types are referenced by more than one -+ * translation unit, and construct the mapping from translation unit to -+ * non-builtin module name. -+ * -+ * The first pass detects duplicated types in need of sharing, without -+ * considering opaque/transparent structure/union aliasing. It requests -+ * an alias detection pass if any structures, or typedefs to them, are -+ * newly marked as shared. -+ * -+ * We must do this even when deduplication is disabled, because we need -+ * the TU->module-name mapping, even if in this case it is trivial. -+ */ -+ -+ struct dedup_state state = {0}; -+ -+ dw_ctf_trace("Duplicate detection: primary pass.\n"); -+ -+ /* -+ * This is merely flushed between TUs, not recreated: we create it here. -+ */ -+ state.vars_seen = g_hash_table_new_full(g_str_hash, -+ g_str_equal, -+ free, NULL); -+ -+ for (i = 0; i < object_names_cnt; i++) -+ process_file(object_names[i], dedup, -+ dedup_tu_init, dedup_tu_done, &state); -+ -+ if ((!state.repeat_detection) || !deduplicating) -+ goto out; -+ -+ do { -+ /* -+ * The second pass recognizes that opaque structures must be -+ * shared if the transparent equivalents are, and vice versa, -+ * and re-traces all transparent types that need sharing. -+ * -+ * It requests another alias detection pass if any non-opaque -+ * structures are newly marked as shared. -+ */ -+ dw_ctf_trace("Duplicate detection: alias fixup pass.\n"); -+ -+ state.repeat_detection = 0; -+ state.named_structs = list_filter(state.named_structs, -+ dedup_alias_fixup, -+ free_dups_id_file, &state); -+ } while (state.repeat_detection); -+ out: -+ g_hash_table_destroy(state.vars_seen); -+ dedup_dwarf_free(&state); -+ dw_ctf_trace("Duplicate detection: complete.\n"); -+ dw_ctf_trace("%llu distinct type IDs known.\n", -+ (unsigned long long) g_hash_table_size(id_to_module)); -+ dw_ctf_trace("%llu variables blacklisted for static/nonstatic conflicts.\n", -+ (unsigned long long) g_hash_table_size(variable_blacklist)); -+ g_list_free_full(state.named_structs, free_dups_id_file); -+} -+ -+/* -+ * Set up state for dedup(). A tu_init() callback. -+ */ -+static void dedup_tu_init(const char *module_name, const char *file_name, -+ Dwarf_Die *tu_die, void *data) -+{ -+ struct dedup_state *state = data; -+ struct per_module *per_mod; -+ -+ /* -+ * Make sure that even if this module has no types in it we still end up -+ * generating a CTF file. (Userspace depends on this, since a CTF file -+ * with no types in means the module is known and typeless, while no CTF -+ * file at all means the module is not known.) -+ */ -+ -+ per_mod = g_hash_table_lookup(per_module, module_name); -+ if (per_mod == NULL) { -+ init_ctf_table(module_name); -+ dw_ctf_trace("%s: initialized CTF file.\n", module_name); -+ } -+ -+ state->structs_seen = g_hash_table_new(g_str_hash, g_str_equal); -+ g_hash_table_remove_all(state->vars_seen); -+ state->module_name = module_name; -+} -+ -+/* -+ * Free state for dedup(). A tu_done() callback. -+ */ -+static void dedup_tu_done(const char *module_name, const char *file_name, -+ Dwarf_Die *tu_die, void *data) -+{ -+ struct dedup_state *state = data; -+ -+ /* -+ * We have to annul module_name because it is freed between object files -+ * by process_file(). Since we use that to track whether vars_seen -+ * needs reconstructing, that means we have to destroy that as well. -+ */ -+ g_hash_table_destroy(state->structs_seen); -+ state->structs_seen = NULL; -+ state->module_name = NULL; -+} -+ -+/* -+ * Free DWARF state for dedup(). -+ */ -+static void dedup_dwarf_free(struct dedup_state *state) -+{ -+ if (state->dwfl == NULL) -+ return; -+ simple_dwfl_free(state->dwfl); -+ state->dwfl = NULL; -+ state->dwarf = NULL; -+ free(state->dwfl_file_name); -+ state->dwfl_file_name = NULL; -+ if (state->structs_seen) -+ g_hash_table_destroy(state->structs_seen); -+ state->structs_seen = NULL; -+} -+ -+/* -+ * Duplicate detection. -+ * -+ * Scan for duplicate types. A duplicate type is defined as any type which -+ * appears in more than one module, or, more precisely, any type for which a -+ * type with the same ID already exists in another module. -+ * -+ * This pass also constructs the id_to_module table, so is essential even when -+ * deduplication is disabled (though then it need be run only once.) -+ */ -+ -+static void dedup(const char *module_name, const char *file_name, -+ Dwarf_Die *die, Dwarf_Die *parent_die, void *data) -+{ -+ struct dedup_state *state = data; -+ int is_sou = 0; -+ char *id = type_id(die, NULL, is_named_struct_union_enum, &is_sou); -+ -+ state->file_name = file_name; -+ /* -+ * If a DWARF-4 type signature is found, abort. While we can support -+ * DWARF-4 eventually, support in elfutils is insufficiently robust for -+ * now (elfutils 0.152). -+ */ -+ if (private_dwarf_hasattr(die, DW_AT_type)) { -+ Dwarf_Attribute type_attr; -+ -+ if ((private_dwarf_attr(die, DW_AT_type, &type_attr) != NULL) && -+ (dwarf_whatform(&type_attr) == DW_FORM_ref_sig8)) { -+ pr_err("Sorry, not yet implemented: %s contains DWARF-4 debugging information.\n", -+ module_name); -+ exit(1); -+ } -+ } -+ -+ /* -+ * Non-anonymous, non-opaque structure/union/enum types in -+ * non-dedup-blacklisted modules get their names and locations recorded -+ * for subsequent passes; all type_id()-descendant types are similarly -+ * noted. -+ */ -+ if (is_sou && strncmp(id, "////", strlen("////")) != 0) -+ free(type_id(die, NULL, dedup_will_rescan, state)); -+ -+ /* -+ * Handle static variable blacklisting. (We still shuffle blacklisted -+ * variables into the right place in id_to_module because we check for -+ * blacklisting at the lowest level, by which point we have already -+ * depended on id_to_module being correctly populated.) -+ * -+ * Avoid calling this for recursive dependent-type scans: variables -+ * cannot be dependent types. -+ */ -+ if (parent_die != NULL && dwarf_tag(die) == DW_TAG_variable) -+ dedup_blacklist_var_dups(die, state); -+ -+ dedup_mark_inner_die(module_name, die, id, NULL, data); -+ free(id); -+} -+ -+/* -+ * Do the underlying marking of a DIE as shared, iff need be. (No variable -+ * blacklisting, non-opaque structure checks, or anything else needed only by -+ * top-level DIEs.) -+ * -+ * This function may be called multiple times for overridden DIEs that are -+ * dependent types of bitfields. (On multiple calls for normal types, the -+ * second call will enter the NS_NO_MARKING case block and terminate recursion.) -+ */ -+static void dedup_mark_inner_die(const char *module_name, Dwarf_Die *die, -+ const char *id, -+ struct die_override *overrides, -+ void *data) -+{ -+ /* -+ * If we know of a single module incorporating this type, and it is not -+ * the same as the module we are currently in, then this type is -+ * duplicated across modules and belongs in the global type table. -+ * (This means that duplicated types are repeatedly so marked: this -+ * is unavoidable, because pass 3 requires re-marking structures that -+ * have already been marked, to pick up unmarked intermediate types.) -+ * -+ * We never consider types in modules on the deduplication blacklist -+ * to introduce duplicates. -+ */ -+ switch (type_needs_sharing(module_name, id)) { -+ case NS_NEEDS_SHARING: -+ mark_shared(die, NULL, overrides, data); -+ mark_seen_contained(die, "shared_ctf", overrides, data); -+ /* Fall through */ -+ case NS_NO_MARKING: -+ /* -+ * A duplicated type, but in the same module, or deduplication -+ * is disabled, so id_to_module is already correct. (When -+ * deduplication is disabled, we will be running with only one -+ * module at a time, and id_to_module will be a trivial -+ * mapping.) -+ */ -+ return; -+ case NS_NOT_SHARED: -+ break; -+ } -+ -+ /* -+ * Record that we have seen this type, and all its dependent types, in -+ * this module (or in the shared module if need be). -+ */ -+ -+ dw_ctf_trace("Marking %s as seen in %s\n", id, module_name); -+ g_hash_table_replace(id_to_module, intern(xstrdup(id)), -+ xstrdup(module_name)); -+ mark_seen_contained(die, module_name, overrides, data); -+ free(type_id(die, overrides, dedup_typeid, data)); -+} -+ -+/* -+ * Note in the dedup_id_file list that we will rescan a DIE in a later duplicate -+ * detection pass. -+ * -+ * A type_id() callback. -+ */ -+static void dedup_will_rescan(Dwarf_Die *die, const char *id, -+ struct die_override *overrides, void *data) -+{ -+ struct dedup_state *state = data; -+ struct dedup_id_file *id_file; -+ -+ /* -+ * We don't care about array index types, which will never be structures -+ * in C. -+ */ -+ if (id[0] == '[') -+ return; -+ -+ id_file = calloc(1, sizeof(struct dedup_id_file)); -+ if (id_file == NULL) { -+ pr_err("Out of memory allocating id_file\n"); -+ exit(1); -+ } -+ id_file->file_name = intern(xstrdup(state->file_name)); -+ id_file->id = intern(xstrdup(id)); -+ id_file->dieoff = dwarf_dieoffset(die); -+ state->named_structs = g_list_prepend(state->named_structs, id_file); -+} -+ -+/* -+ * Note the variable referenced by this DIE in vars_seen: blacklist it if an -+ * entry for this variable already exists in vars_seen and this instance is -+ * static, or if a static entry already exists in vars_seen, whether this -+ * instance is static or not. -+ */ -+static void dedup_blacklist_var_dups(Dwarf_Die *die, -+ struct dedup_state *state) -+{ -+ void *static_var; -+ int blacklist = 0; -+ -+ if (g_hash_table_lookup_extended(state->vars_seen, -+ dwarf_diename(die), -+ NULL, &static_var)) { -+ if (!private_dwarf_hasattr(die, DW_AT_external) && -+ !private_dwarf_hasattr(die, DW_AT_declaration)) -+ blacklist = 1; -+ if (static_var != NULL) -+ blacklist = 1; -+ } else -+ /* -+ * We need a non-NULL address here, but that is all we need. -+ * The address of a random variable will do. -+ */ -+ g_hash_table_insert(state->vars_seen, -+ xstrdup(dwarf_diename(die)), -+ (!private_dwarf_hasattr(die, DW_AT_external) && -+ !private_dwarf_hasattr(die, DW_AT_declaration)) ? -+ &static_var : NULL); -+ -+ if (blacklist) { -+ char *var = NULL; -+ var = str_appendn(var, state->module_name, "`", -+ dwarf_diename(die), NULL); -+ g_hash_table_replace(variable_blacklist, var, NULL); -+ } -+} -+ -+/* -+ * Free a dedup_id_file's contents. -+ */ -+static void free_dups_id_file(void *data) -+{ -+ struct dedup_id_file *id_file = data; -+ free(id_file); -+} -+ -+/* -+ * Determine if a type is duplicated and needs sharing. -+ */ -+static enum needs_sharing type_needs_sharing(const char *module_name, -+ const char *id) -+{ -+ const char *existing_type_module; -+ existing_type_module = g_hash_table_lookup(id_to_module, id); -+ -+ /* -+ * Types not already known about do not need sharing. -+ * -+ * Types already in the current modules and any types in external-module -+ * mode do not even need marking. -+ */ -+ if (existing_type_module == NULL) -+ return NS_NOT_SHARED; -+ -+ if ((strcmp(existing_type_module, module_name) == 0) || -+ (strcmp(existing_type_module, "shared_ctf") == 0) || -+ !deduplicating) -+ return NS_NO_MARKING; -+ -+ return NS_NEEDS_SHARING; -+} -+ -+/* -+ * Detect duplicates and mark seen types for a given type, via a type_id() -+ * callback: used to detect dependent types (particularly those at child-DIE -+ * level) as duplicates. -+ */ -+static void dedup_typeid(Dwarf_Die *die, const char *id, -+ struct die_override *overrides, void *data) -+{ -+ struct dedup_state *state = data; -+ -+ dedup_mark_inner_die(state->module_name, die, id, overrides, data); -+} -+ -+/* -+ * Mark any types contained within a particular type DIE as seen. This is -+ * needed since even nameless types contained within other aggregates can be -+ * used as the type of members in any of their enclosing aggregates (though they -+ * cannot possibly be found in a module different from that of their containing -+ * aggregate, any more than a structure member can). -+ */ -+static void mark_seen_contained(Dwarf_Die *die, const char *module_name, -+ struct die_override *overrides, -+ void *data) -+{ -+ const char *err; -+ Dwarf_Die child; -+ -+ if ((dwarf_tag(die) != DW_TAG_structure_type) && -+ (dwarf_tag(die) != DW_TAG_union_type)) -+ return; -+ -+ switch (dwarf_child(die, &child)) { -+ case -1: -+ err = "fetch first child of aggregate"; -+ goto fail; -+ case 1: /* No DIEs at all in this aggregate */ -+ return; -+ default: /* Child DIEs exist. */ -+ break; -+ } -+ -+ /* -+ * We iterate over all immediate children and recursively call ourselves -+ * for all those of type DW_TAG_structure_type and DW_TAG_union_type. -+ * -+ * Further, everything with an entry in assembly_tab other than -+ * non-bitfield members needs marking, since these may be declared at -+ * structure scope rather than being confined to global scope. -+ * Non-bitfield members are skipped because they cannot be used as the -+ * type of another field. These types cannot be duplicates if their -+ * containing type is not a duplicate, and typedefs cannot occur at this -+ * level so they cannot be aliased; thus we can mark them directly -+ * without going back into the top of dedup(). -+ * -+ * (Bit-field members are not skipped: they use different CTF from their -+ * non-bitfield equivalents, even though they refer to the same -+ * top-level DIE. The actual different CTF is handled by type_id() -+ * itself, but we do have to call it.) -+ */ -+ int sib_ret; -+ -+ do -+ switch (dwarf_tag(&child)) { -+ case DW_TAG_member: { -+ /* -+ * bit_size and bit_offset go together: we can assume -+ * that if a member has the one, it has the other, -+ * is a bitfield, and needs recursive marking. -+ */ -+ if (dwarf_tag(&child) == DW_TAG_member && -+ !private_dwarf_hasattr(&child, DW_AT_bit_size)) -+ break; -+ -+ free(type_id(&child, overrides, dedup_typeid, data)); -+ break; -+ } -+ case DW_TAG_structure_type: -+ case DW_TAG_union_type: -+ mark_seen_contained(&child, module_name, overrides, data); -+ /* fall through */ -+ default: -+ if (dwarf_tag(&child) <= assembly_len && -+ assembly_tab[dwarf_tag(&child)] != NULL) { -+ -+ char *id = type_id(&child, overrides, NULL, NULL); -+ -+ dw_ctf_trace("Marking member %s as seen in " -+ "%s\n", id, module_name); -+ g_hash_table_replace(id_to_module, intern(id), -+ xstrdup(module_name)); -+ } -+ } -+ while ((sib_ret = dwarf_siblingof(&child, &child)) == 0); -+ -+ if (sib_ret == -1) { -+ err = "iterate over members"; -+ goto fail; -+ } -+ -+ return; -+ -+ fail: -+ pr_err("Cannot %s while marking aggregates as seen: %s\n", -+ err, dwfl_errmsg(dwfl_errno())); -+ exit(1); -+} -+ -+/* -+ * Mark a type as duplicated and located in the shared CTF table. Recursive, -+ * via the type_id() callback mechanism. -+ * -+ * A type_id() callback (though also called directly). -+ */ -+static void mark_shared(Dwarf_Die *die, const char *id, -+ struct die_override *overrides, void *data) -+{ -+ struct dedup_state *state = data; -+ const char *existing_module; -+ -+ /* -+ * Non-recursive call. Trigger type_id for its recursive callback, -+ * throwing the result away. -+ */ -+ if (id == NULL) { -+ free(type_id(die, overrides, mark_shared, state)); -+ return; -+ } -+ -+ existing_module = g_hash_table_lookup(id_to_module, id); -+ -+ if ((existing_module == NULL) || -+ (strcmp(existing_module, "shared_ctf") != 0)) { -+ -+ dw_ctf_trace("Marking %s as duplicate\n", id); -+ g_hash_table_replace(id_to_module, intern(xstrdup(id)), -+ xstrdup("shared_ctf")); -+ -+ /* -+ * Newly-marked structures/unions/enums must trigger a new -+ * duplicate detection pass (even if they are opaque). -+ */ -+ -+ if (((dwarf_tag(die) == DW_TAG_structure_type) || -+ (dwarf_tag(die) == DW_TAG_union_type) || -+ (dwarf_tag(die) == DW_TAG_enumeration_type)) && -+ (!state->repeat_detection)) { -+ dw_ctf_trace("Requesting another duplicate detection pass.\n"); -+ state->repeat_detection = 1; -+ } -+ } -+ -+ /* -+ * If this is a structure or union, mark its members as duplicates too. -+ * -+ * Do this even if we've seen this structure before, as this instance of -+ * the structure may have more members than the last we saw. However, -+ * if we have seen this structure before *in this translation unit*, -+ * skip it, to avoid infinite recursion in mutually referential -+ * structures. -+ */ -+ if ((dwarf_tag(die) == DW_TAG_structure_type) || -+ (dwarf_tag(die) == DW_TAG_union_type)) { -+ Dwarf_Die child; -+ -+ if (g_hash_table_lookup_extended(state->structs_seen, id, -+ NULL, NULL)) -+ return; -+ g_hash_table_replace(state->structs_seen, intern(xstrdup(id)), -+ NULL); -+ -+ switch (dwarf_child(die, &child)) { -+ case -1: -+ goto fail; -+ case 1: /* No DIEs at all in this aggregate */ -+ return; -+ } -+ -+ /* -+ * We are only interested in non-blacklisted children of type -+ * DW_TAG_member. -+ */ -+ int sib_ret; -+ -+ do -+ if ((dwarf_tag(&child) == DW_TAG_member) && -+ !member_blacklisted(&child, die)) -+ free(type_id(&child, overrides, -+ mark_shared, state)); -+ while ((sib_ret = dwarf_siblingof(&child, &child)) == 0); -+ -+ if (sib_ret == -1) -+ goto fail; -+ } -+ -+ return; -+ -+ fail: -+ pr_err("Cannot mark aggregate %s members as duplicated: %s\n", -+ dwarf_diename(die), dwarf_errmsg(dwarf_errno())); -+ exit(1); -+} -+ -+/* -+ * Determine if a type is a named struct, union, or enum. -+ * -+ * A type_id() callback. -+ */ -+static void is_named_struct_union_enum(Dwarf_Die *die, const char *unused, -+ struct die_override *overrides, -+ void *data) -+{ -+ int *is_sou = data; -+ -+ if (((dwarf_tag(die) == DW_TAG_structure_type) || -+ (dwarf_tag(die) == DW_TAG_union_type) || -+ (dwarf_tag(die) == DW_TAG_enumeration_type)) && -+ (private_dwarf_hasattr(die, DW_AT_name))) -+ *is_sou = 1; -+} -+ -+/* -+ * Duplicate detection alias fixup pass. Once the first pass is complete, we -+ * may have marked an opaque 'struct/union/enum foo' for sharing but not caught -+ * the non-opaque instance, because no users of the non-opaque instance appeared -+ * in the DWARF after the opaque copy was detected as a duplicate. This pass -+ * detects such cases, and marks their members as duplicates too. -+ * -+ * (The inverse case of a non-opaque structure/union/enum detected as a -+ * duplicate after the last usage of its opaque alias will be caught by this -+ * trap too.) -+ * -+ * Warning: this routine directly computes type_id()s without access to the -+ * corresponding type DIE, and as such is dependent on the format of type_id()s. -+ * (This is why it must run over non-opaque structures: given a non-opaque -+ * structure, its opaque alias is easy to compute, but the converse is not -+ * true.) -+ * -+ * As a list_filter() filter function, returns nonzero if this structure will -+ * not need to be checked again (because both its opaque and transparent -+ * variants are shared). -+ */ -+static int dedup_alias_fixup(void *id_file_data, void *data) -+{ -+ struct dedup_id_file *id_file = id_file_data; -+ struct dedup_state *state = data; -+ -+ int transparent_shared = 0; -+ int opaque_shared = 0; -+ int made_shared = 0; -+ -+ char *opaque_id; -+ const char *line_num; -+ const char *type_size; -+ const char *type_name; -+ -+ /* -+ * Compute the opaque variant corresponding to this transparent type, -+ * and check to see if either is marked shared, then find the DIE and -+ * mark both as shared if either is. (Unfortunately this means a double -+ * recursion in such cases, but this is unavoidable.) -+ */ -+ -+ line_num = strstr(id_file->id, "//"); -+ if (!line_num) { -+ pr_err("Internal error: type ID %s is corrupt.\n", -+ id_file->id); -+ exit(1); -+ } -+ -+ type_size = strstr(line_num + 2, "//"); -+ if (!type_size) { -+ pr_err("Internal error: type ID %s is corrupt.\n", -+ id_file->id); -+ exit(1); -+ } -+ -+ type_name = strstr(type_size + 2, "//"); -+ if (!type_name) { -+ /* -+ * That's OK: the type size is optional, so what we thought was -+ * the type size is actually the type name. -+ */ -+ type_name = type_size; -+ } -+ type_name += 2; -+ -+ opaque_id = xstrdup("////"); -+ opaque_id = str_append(opaque_id, type_name); -+ -+ const char *transparent_module = g_hash_table_lookup(id_to_module, -+ id_file->id); -+ const char *opaque_module = g_hash_table_lookup(id_to_module, -+ opaque_id); -+ -+ transparent_shared = ((transparent_module != NULL) && -+ (strcmp(transparent_module, "shared_ctf") == 0)); -+ -+ opaque_shared = ((opaque_module != NULL) && -+ (strcmp(opaque_module, "shared_ctf") == 0)); -+ -+ /* -+ * Transparent type needs sharing. -+ */ -+ if (opaque_shared && !transparent_shared) { -+ Dwarf_Die die; -+ Dwfl_Module *mod; -+ Dwarf_Addr dummy; -+ -+ /* -+ * Since we are not using process_file(), we must handle -+ * translation unit switches by hand, including resetting -+ * structs_seen. We also need to open the DWARF file, since -+ * type_id() needs access to the DIE of this type and all its -+ * dependent types as well. -+ */ -+ -+ if (state->dwfl != NULL && -+ strcmp(state->dwfl_file_name, id_file->file_name) != 0) -+ dedup_dwarf_free(state); -+ -+ if (state->dwfl_file_name == NULL) { -+ state->dwfl = simple_dwfl_new(id_file->file_name, &mod); -+ state->dwarf = dwfl_module_getdwarf(mod, &dummy); -+ state->dwfl_file_name = xstrdup(id_file->file_name); -+ if (state->structs_seen) -+ g_hash_table_destroy(state->structs_seen); -+ state->structs_seen = g_hash_table_new(g_str_hash, -+ g_str_equal); -+ } -+ if (!dwarf_offdie(state->dwarf, id_file->dieoff, -+ &die)) { -+ pr_err("Cannot look up offset %li in %s for type with ID %s\n", -+ id_file->dieoff, id_file->file_name, id_file->id); -+ exit(1); -+ } -+ mark_shared(&die, NULL, NULL, state); -+ made_shared = 1; -+ } -+ -+ /* -+ * We don't have the opaque type's DIE, so we can't use mark_shared(): -+ * this is also good since this triggers another duplicate detection -+ * pass, and we don't want to trigger another pass merely because of a -+ * nonshared opaque type (since they don't have members that may have -+ * structure or union type themselves and thus force more unshared -+ * types to become shared). -+ * -+ * Instead, do it by hand: this is simple, as member recursion is -+ * guaranteed not to be required for an opaque type. -+ */ -+ if (transparent_shared && !opaque_shared) { -+ dw_ctf_trace("Marking %s as duplicate\n", opaque_id); -+ g_hash_table_replace(id_to_module, intern(xstrdup(opaque_id)), -+ xstrdup("shared_ctf")); -+ made_shared = 1; -+ } -+ -+ free(opaque_id); -+ -+ return made_shared || (opaque_shared && transparent_shared); -+} -+ -+/* -+ * Mark a basic type shared by name and intern it in all relevant hashes. (Used -+ * for marking basic types we don't have a DIE for.) -+ */ -+static void mark_shared_by_name(ctf_file_t *ctf, ctf_id_t ctf_id, -+ const char *name) -+{ -+ struct ctf_full_id static_ctf_id = { ctf, ctf_id }; -+ struct ctf_full_id *full_ctf_id; -+ char *id = NULL; -+ -+ full_ctf_id = malloc(sizeof(struct ctf_full_id)); -+ if (full_ctf_id == NULL) { -+ pr_err("%s: out of memory\n", __func__); -+ exit(1); -+ } -+ *full_ctf_id = static_ctf_id; -+ -+ id = str_appendn(id, "////", name, " ", NULL); -+#ifdef DEBUG -+ strcpy(full_ctf_id->module_name, "shared_ctf"); -+ strcpy(full_ctf_id->file_name, "<built-in type>"); -+#endif -+ g_hash_table_replace(id_to_module, intern(xstrdup(id)), xstrdup("shared_ctf")); -+ g_hash_table_replace(id_to_type, intern(id), full_ctf_id); -+} -+ -+/* -+ * Type assembly. -+ * -+ * Given a DWARF DIE corresponding to a top-level type, call the appropriate -+ * construction function, passing it the appropriate ctf_file_t, constructing it -+ * if necessary, and stashing them in the appropriate hashes. Return the -+ * ctf_file_t and ctf_id_t of this type. -+ * -+ * Indirectly recursively called for types depending on other types, and for -+ * the types of variables (which for the sake of argument we call 'types' here -+ * too, since we treat them exactly like types, and dealing with types is our -+ * most important function). In such calls, the module_name may be 'shared_ctf' -+ * if this type is in the shared CTF repository. -+ * -+ * Select properties of the DIE can be overridden via the overrides array, if -+ * needed. -+ */ -+static struct ctf_full_id *construct_ctf_id(const char *module_name, -+ const char *file_name, -+ Dwarf_Die *die, -+ Dwarf_Die *parent_die, -+ struct die_override *overrides) -+{ -+ char *id = type_id(die, overrides, NULL, NULL); -+ char *ctf_module; -+ ctf_file_t *ctf; -+ ctf_snapshot_id_t snapshot; -+ -+ dw_ctf_trace(" %p: %s: looking up %s: %s\n", &id, -+ module_name ? module_name : "(no module)", -+ dwarf_diename(die), id); -+ /* -+ * Make sure this type does not already exist. (Recursive chasing for -+ * referenced types can lead to construct_ctf() being called on them -+ * more than once.) -+ */ -+ struct ctf_full_id *ctf_id; -+ -+ ctf_id = g_hash_table_lookup(id_to_type, id); -+ if (ctf_id != NULL) { -+ dw_ctf_trace(" %p: %p:%i found in module %s, file %s\n", &id, -+ ctf_id->ctf_file, (int) ctf_id->ctf_id, -+ ctf_id->module_name, ctf_id->file_name); -+ free(id); -+ return ctf_id; -+ } -+ -+ /* -+ * Create the CTF file for this type, if it does not exist. Verify that -+ * the duplicate-detection pass scanned this type, and that this is -+ * either the current module or the shared CTF module. -+ */ -+ -+ ctf_module = g_hash_table_lookup(id_to_module, id); -+ -+ if (ctf_module == NULL) { -+ pr_err("Internal error: within file %s, module %s, type at DIE offset %lx\n" -+ "with ID %s was not already noted by dedup().\n", -+ file_name, module_name, DIEOFFSET(die), id); -+ pr_err("dedup() is probably buggy.\n"); -+ exit(1); -+ } -+ -+ if ((strcmp(ctf_module, module_name) != 0) && -+ (strcmp(ctf_module, "shared_ctf") != 0)) { -+ pr_err("Internal error: within file %s, module %s, type at DIE offset %lx\n" -+ "with ID %s is in a different non-shared module, %s.\n", -+ file_name, module_name, DIEOFFSET(die), id, ctf_module); -+ pr_err("dedup() is probably buggy.\n"); -+ exit(1); -+ } -+ -+ ctf = lookup_ctf_file(ctf_module); -+ -+ /* -+ * Construct the CTF, then insert the top-level CTF entity into the -+ * id->type hash so that references from other types can find it, and -+ * update the CTF container. If conversion failed, roll back all -+ * changes made since the last successful call to this function. -+ * -+ * NOTE: references within DWARF to non-top-level types will currently -+ * fail, but I'm not sure if these can exist. (The type ID -+ * representation implicitly assumes that they cannot.) -+ */ -+ -+ snapshot = ctf_snapshot(ctf); -+ -+ enum skip_type skip = SKIP_CONTINUE; -+ -+ dw_ctf_trace("%p: into die_to_ctf() for %s\n", &id, id); -+ ctf_id_t this_ctf_id = die_to_ctf(ctf_module, file_name, die, -+ parent_die, ctf, -1, overrides, -+ 1, 0, &skip, NULL, id); -+ dw_ctf_trace("%p: out of die_to_ctf()\n", &id); -+ -+ ctf_id = malloc(sizeof(struct ctf_full_id)); -+ if (ctf_id == NULL) { -+ pr_err("Out of memory\n"); -+ exit(1); -+ } -+ -+ if (skip != SKIP_ABORT) { -+ ctf_id->ctf_file = ctf; -+ ctf_id->ctf_id = this_ctf_id; -+#ifdef DEBUG -+ strcpy(ctf_id->module_name, ctf_module); -+ strcpy(ctf_id->file_name, file_name); -+#endif -+ dw_ctf_trace(" %lx: %s: new type added, CTF ID %p:%li\n", -+ DIEOFFSET(die), id, ctf_id->ctf_file, -+ ctf_id->ctf_id); -+ -+ g_hash_table_replace(id_to_type, intern(id), ctf_id); -+ } else { -+ /* -+ * Failure. Remove the type from the id_to_type mapping, if it -+ * is there, and discard any added types from the CTF. -+ * -+ * If we have had to ctf_update() due to a new type getting -+ * used, the rollback will fail: discard instead. It might leave -+ * some spurious types hanging around but it will clean up as -+ * much as we can at this point. (This cannot happen when -+ * LIBDTRACE_CTF_OMISSIBLE_CTF_UPDATE, but it costs nothing to -+ * leave in: failure is a rare case.) -+ */ -+ -+ if (ctf_rollback(ctf, snapshot) < 0) -+ if (ctf_errno(ctf) == ECTF_OVERROLLBACK) -+ ctf_discard(ctf); -+ -+ free(ctf_id); -+ ctf_id = NULL; -+ -+ g_hash_table_remove(id_to_type, id); -+ free(id); -+ -+ dw_ctf_trace(" %p: (failure)\n", &id); -+ } -+ -+ return ctf_id; -+} -+ -+/* -+ * Given a DWARF DIE corresponding to a top-level type, or to an aggregate -+ * member, and the ctf_file_t where it is to be placed, call the appropriate -+ * construction function to place it and (for aggregates) its siblings there, -+ * recursing to handle contained aggregates. -+ * -+ * The parameters to this function are: -+ * -+ * module_name: The kernel module. -+ * file_name: The object file. -+ * die: The DWARF DIE. -+ * parent_die: Its parent, i.e. if a structure member, this is a structure: if -+ * top-level, this is a CU DIE. -+ * ctf: The CTF file this object should go into (possibly shared_ctf). -+ * parent_ctf_id: The CTF ID of the parent DIE, or -1 if none. -+ * struct die_override: Overrides for DWARF attributes (a NULL-terminated array, -+ * or NULL). -+ * top_level_type: 1 if this is a top-level type that can have a name and be -+ * referred to by other types. -+ * backwards: if 1, this is an internal call to process a series of bitfields -+ * with descending bit_offset and identical data_member_location. -+ * skip: The error-handling / skipping enum. -+ * replace: if 1, this type should replace its parent type entirely. -+ * id: the ID of this type. -+ * -+ * Note: id is only defined when top_level_type is 1. (We never use it -+ * in other situations, and computing it is quite expensive.) -+ */ -+static ctf_id_t die_to_ctf(const char *module_name, const char *file_name, -+ Dwarf_Die *die, Dwarf_Die *parent_die, -+ ctf_file_t *ctf, ctf_id_t parent_ctf_id, -+ struct die_override *overrides, int top_level_type, -+ int backwards, enum skip_type *skip, int *replace, -+ const char *id) -+{ -+ int sib_ret = 0; -+ ctf_id_t this_ctf_id; -+ int dummy; -+ -+ do { -+ const char *id_name; -+ const char *decl_file_name = dwarf_decl_file(die); -+ int decl_line_num; -+ int emitted_backwards = 0; -+ char locerrstr[1024]; -+ Dwarf_Die next_die; -+ -+ /* -+ * If the next DWARF DIE is at the same location as this one but -+ * with a lower bit_offset, we need to process the set of DIEs -+ * at this location in *reverse*, because DWARF has the DIEs in -+ * declaration order, while CTF wants them in in-memory order: -+ * so recurse to handle the next until we get to an element with -+ * a sibling at a different data_member_location (safe because -+ * there can't be that many of them per data_member_location), -+ * then (at the end of die_to_ctf()) exit the recursion and skip -+ * over the lot. -+ * -+ * We can ignore 'replace' and the return value of die_to_ctf -+ * because bitfields must be structure or union members and -+ * cannot be array dimensions. -+ */ -+ if (die_emit_next_backwards(&next_die, die, -+ overrides) != NULL) { -+ ctf_id_t dummy; -+ -+ dw_ctf_trace("Emitting %s:%s:%lx backwards\n", -+ module_name, file_name, -+ DIEOFFSET(&next_die)); -+ -+ dummy = die_to_ctf(module_name, file_name, &next_die, -+ parent_die, ctf, parent_ctf_id, -+ overrides, top_level_type, 1, skip, -+ replace, NULL); -+ if (*skip == SKIP_ABORT) -+ return dummy; -+ emitted_backwards = 1; -+ } -+ -+ /* -+ * Compute a name for our current location, for error messages. -+ * (The type representation could be used, but is likely to be -+ * hard for users to comprehend, and should we move to a hashed -+ * representation would be entirely useless for this purpose.) -+ */ -+ if ((decl_file_name == NULL) || -+ (dwarf_decl_line(die, &decl_line_num) < 0)) { -+ decl_file_name = "global"; -+ decl_line_num = 0; -+ } -+ -+ id_name = dwarf_diename(die); -+ if (id_name == NULL) -+ id_name = "(unnamed type)"; -+ -+ snprintf(locerrstr, sizeof(locerrstr), "%s:%i:%s", -+ decl_file_name, decl_line_num, id_name); -+ -+ dw_ctf_trace("Working over %s:%s:%s:%lx:%x with CTF file %p\n", -+ module_name, file_name, -+ dwarf_diename(die)==NULL?"NULL":dwarf_diename(die), -+ DIEOFFSET(die), dwarf_tag(die), ctf); -+ -+ /* -+ * Only process a given node, or its children, if we know how to -+ * do so. -+ */ -+ if ((dwarf_tag(die) >= assembly_len) || -+ (assembly_tab[dwarf_tag(die)] == NULL)) { -+ pr_err("%s:%i: warning: skipping identifier " -+ "%s with unknown DWARF tag %lx.\n", -+ decl_file_name, decl_line_num, id_name, -+ (unsigned long) dwarf_tag(die)); -+ return -1; -+ } -+ -+ *skip = SKIP_CONTINUE; -+ -+ this_ctf_id = assembly_tab[dwarf_tag(die)](module_name, -+ file_name, -+ die, parent_die, -+ ctf, parent_ctf_id, -+ locerrstr, -+ overrides, -+ top_level_type, -+ skip, -+ replace ? replace : -+ &dummy); -+ dw_ctf_trace("%s: out of assembly function for tag %lx with type ID %li\n", -+ locerrstr, (unsigned long) dwarf_tag(die), -+ this_ctf_id); -+ -+ if (this_ctf_id < 0) { -+ if ((this_ctf_id == CTF_NO_ERROR_REPORTED) && -+ (ctf_errno(ctf) != 0)) -+ pr_err("%s: CTF error in assembly of item with tag %i: %s\n", -+ locerrstr, dwarf_tag(die), -+ ctf_errmsg(ctf_errno(ctf))); -+ -+ num_errors++; -+#ifdef DEBUG -+ exit(1); -+#endif -+ *skip = SKIP_ABORT; -+ } -+ -+ /* -+ * Add newly-added non-skipped top-level structure or union CTF -+ * IDs to the type table at once. This allows circular type -+ * references via pointers in structure/union member DIEs to be -+ * looked up correctly. -+ */ -+ if (top_level_type && (*skip == SKIP_CONTINUE) && -+ ((dwarf_tag(die) == DW_TAG_structure_type) || -+ (dwarf_tag(die) == DW_TAG_union_type))) { -+ struct ctf_full_id full_ctf_id = { ctf, this_ctf_id }; -+ struct ctf_full_id *ctf_id; -+ -+#ifdef DEBUG -+ strcpy(full_ctf_id.module_name, module_name); -+ strcpy(full_ctf_id.file_name, file_name); -+#endif -+ -+ ctf_id = malloc(sizeof(struct ctf_full_id)); -+ if (ctf_id == NULL) { -+ fprintf(stderr, -+ "Out of memory allocating type ID\n"); -+ exit(1); -+ } -+ -+ dw_ctf_trace(" %s: immediate addition of %s, CTF ID " -+ "%p:%li in module %s, file %s\n", __func__, -+ id, full_ctf_id.ctf_file, full_ctf_id.ctf_id, -+ module_name, file_name); -+ *ctf_id = full_ctf_id; -+ -+ g_hash_table_replace(id_to_type, intern(xstrdup(id)), -+ ctf_id); -+ } -+ -+ /* -+ * Recurse to handle contained DIEs. -+ */ -+ -+ if ((dwarf_haschildren(die)) && (*skip == SKIP_CONTINUE)) { -+ Dwarf_Die child_die; -+ ctf_id_t new_id; -+ int replace = 0; -+ -+ if (dwarf_child(die, &child_die) < 0) { -+ pr_err("%s: Cannot recurse to DWARF DIE children: %s\n", -+ locerrstr, dwarf_errmsg(dwarf_errno())); -+ exit(1); -+ } -+ -+ new_id = die_to_ctf(module_name, file_name, &child_die, -+ die, ctf, this_ctf_id, overrides, 0, -+ 0, skip, &replace, NULL); -+ if (replace) -+ this_ctf_id = new_id; -+ } -+ -+ /* -+ * If we are walking backwards over a bunch of bitfields, this -+ * is a recursive walk, not an iterative one: return. -+ */ -+ if (backwards) -+ return this_ctf_id; -+ -+ /* -+ * We are not walking backwards, but this is the final stage of -+ * a bunch of backwards emissions: walk forwards until we hit -+ * the last one again. -+ */ -+ if (emitted_backwards) -+ while (die_emit_next_backwards(&next_die, die, -+ overrides) != NULL) -+ *die = next_die; -+ -+ /* -+ * Walk siblings of non-top-level types only: the sibling walk -+ * of top-level types is done by process_file(), so that -+ * construct_ctf_id() gets a chance to put each such type in the -+ * right CTF file. -+ */ -+ } while (*skip != SKIP_ABORT && !top_level_type && -+ (sib_ret = dwarf_siblingof(die, die)) == 0); -+ -+ if (sib_ret == -1) { -+ pr_err("In module %s, failure walking the sibling list: %s\n", -+ module_name, dwarf_errmsg(dwarf_errno())); -+ exit(1); -+ } -+ -+ dw_ctf_trace("New type ID: %p:%li\n", ctf, this_ctf_id); -+ return this_ctf_id; -+} -+ -+/* -+ * Calls construct_ctf_id() and throws the ID away. Used as a process_file() -+ * callback. -+ */ -+static void construct_ctf(const char *module_name, const char *file_name, -+ Dwarf_Die *die, Dwarf_Die *parent_die, -+ void *unused __unused__) -+{ -+ construct_ctf_id(module_name, file_name, die, parent_die, NULL); -+} -+ -+/* -+ * Return the next DIE, if that DIE needs to be emitted before this one. -+ */ -+static Dwarf_Die *die_emit_next_backwards(Dwarf_Die *next, Dwarf_Die *die, -+ struct die_override *overrides) -+{ -+ if (dwarf_tag(die) == DW_TAG_member && -+ dwarf_siblingof(die, next) == 0 && -+ dwarf_tag(next) == DW_TAG_member && -+ private_dwarf_hasattr(die, DW_AT_data_member_location) && -+ private_dwarf_hasattr(next, DW_AT_data_member_location) && -+ private_dwarf_udata(die, DW_AT_data_member_location, overrides) == -+ private_dwarf_udata(next, DW_AT_data_member_location, overrides) && -+ private_dwarf_hasattr(die, DW_AT_bit_offset) && -+ private_dwarf_hasattr(next, DW_AT_bit_offset) && -+ private_dwarf_udata(die, DW_AT_bit_offset, overrides) > -+ private_dwarf_udata(next, DW_AT_bit_offset, overrides)) -+ return next; -+ return NULL; -+} -+ -+/* -+ * Look up a type through its reference: return its ctf_id, or recursively -+ * construct it if need be. -+ */ -+static ctf_id_t lookup_ctf_type(const char *module_name, const char *file_name, -+ Dwarf_Die *die, ctf_file_t *ctf, -+ struct die_override *overrides, -+ const char *locerrstr) -+{ -+ Dwarf_Die tmp; -+ Dwarf_Die *type_die = private_dwarf_type(die, &tmp); -+ Dwarf_Die cu_die; -+ struct ctf_full_id *type_ref; -+ -+ /* -+ * Pointers to functions and void are special cases: there is only one -+ * of each of these in CTF, so we can use global singletons. -+ */ -+ -+ if (type_die == NULL) -+ return ctf_void_type; -+ -+ if (dwarf_tag(type_die) == DW_TAG_subroutine_type) -+ return ctf_funcptr_type; -+ -+ /* -+ * Look up or construct CTF for this type. -+ */ -+ -+ dwarf_diecu(type_die, &cu_die, NULL, NULL); -+ -+ dw_ctf_trace(" %s: Looking up dependent type at offset %lx for type %s at module %s, file %s\n", -+ locerrstr, DIEOFFSET(type_die), -+ dwarf_diename(die) ? dwarf_diename(die) : "NULL", -+ module_name, file_name); -+ -+ type_ref = construct_ctf_id(module_name, file_name, -+ type_die, &cu_die, overrides); -+ -+ /* -+ * Pass any error back up. -+ */ -+ if (type_ref == NULL) { -+ pr_err("%s: type lookup failed.\n", locerrstr); -+ return -1; -+ } -+ -+ if ((type_ref->ctf_file != ctf) && -+ type_ref->ctf_file != lookup_ctf_file("shared_ctf")) { -+#ifdef DEBUG -+ pr_err("%s: Internal error: lookup of %s found in different file: " -+ "%s/%s versus %s/%s.\n", locerrstr, -+ dwarf_diename(die) ? dwarf_diename(die) : "(unnamed)", -+ type_ref->module_name, type_ref->file_name, -+ module_name, file_name); -+#else -+ pr_err("%s: Internal error: lookup of %s found in different file.\n", -+ locerrstr, dwarf_diename(die) ? dwarf_diename(die) : -+ "(unnamed)"); -+#endif -+ pr_err("dedup() is probably buggy.\n"); -+ exit(1); -+ } -+ -+ return type_ref->ctf_id; -+} -+ -+/* Assembly functions. */ -+ -+#define CTF_DW_ENFORCE(attribute) do \ -+ if (!private_dwarf_hasattr(die, (DW_AT_##attribute))) { \ -+ pr_err("%s: %s: %lx: skipping type, %s attribute not present.\n", \ -+ locerrstr, __func__, DIEOFFSET(die), \ -+ #attribute); \ -+ *skip = SKIP_ABORT; \ -+ return CTF_ERROR_REPORTED; \ -+ } \ -+ while (0) -+ -+#define CTF_DW_ENFORCE_NOT(attribute) do \ -+ if (private_dwarf_hasattr(die, (DW_AT_##attribute))) { \ -+ pr_err("%s: %s: %lx: skipping type, %s attribute not supported.\n", \ -+ locerrstr, __func__, DIEOFFSET(die), \ -+ #attribute); \ -+ *skip = SKIP_ABORT; \ -+ return CTF_ERROR_REPORTED; \ -+ } \ -+ while (0) -+ -+#define ROOT_TYPE(x) (x) ? CTF_ADD_ROOT : CTF_ADD_NONROOT -+ -+/* -+ * A CTF assembly filter function which excludes all types not at the global -+ * scope (i.e. whose immediate parent is not a CU DIE), and all types which -+ * reference a type which is not at the global scope (thus ruling out local type -+ * definitions for which the compiler is not consistently emitting all -+ * intermediate types at the local scope). -+ */ -+static int filter_ctf_file_scope(const char *file_name, Dwarf *dwarf, -+ Dwarf_Die *die, Dwarf_Die *parent_die) -+{ -+ Dwarf_Die type_die; -+ GHashTable *parents; -+ -+ /* -+ * A type not dependent on another is acceptable iff it is at the global -+ * scope. -+ */ -+ if (private_dwarf_type(die, &type_die) == NULL) -+ return (dwarf_tag(parent_die) == DW_TAG_compile_unit); -+ -+ /* -+ * No type we reference may have a subprogram DIE as any of its parents. -+ */ -+ parents = g_hash_table_lookup(fn_to_die_to_parent, -+ abs_file_name(file_name)); -+ -+ do { -+ Dwarf_Die parent = type_die; -+ Dwarf_Off parent_off = 0; -+ -+ do { -+ if (parent_off != 0 && -+ !dwarf_offdie(dwarf, parent_off, &parent)) -+ break; -+ if (dwarf_tag(&parent) == DW_TAG_subprogram) -+ return 0; -+ } while ((parent_off = GPOINTER_TO_UINT(g_hash_table_lookup(parents, -+ GUINT_TO_POINTER(dwarf_dieoffset(&parent))))) -+ != 0); -+ } while (private_dwarf_type(&type_die, &type_die) != NULL); -+ -+ return 1; -+} -+ -+/* -+ * A CTF assembly filter function which excludes all names not at the global -+ * scope, and all names whose names are unlikely to be interesting. (DTrace -+ * userspace contains a similar list, but the two lists need not be in sync.) -+ */ -+static int filter_ctf_uninteresting(const char *file_name __unused__, -+ Dwarf *dwarf __unused__, -+ Dwarf_Die *die, Dwarf_Die *parent_die) -+{ -+ const char *sym_name = dwarf_diename(die); -+ -+ /* -+ * 'Variables' with no name are not interesting. -+ */ -+ if (sym_name == NULL) -+ return 0; -+ -+#define strstarts(var, x) (strncmp(var, x, strlen(x)) == 0) -+ return ((dwarf_tag(parent_die) == DW_TAG_compile_unit) && -+ !((strcmp(sym_name, "__per_cpu_start") == 0) || -+ (strcmp(sym_name, "__per_cpu_end") == 0) || -+ (strcmp(sym_name, "_sdt_probes") == 0) || -+ (strstarts(sym_name, "__crc_")) || -+ (strstarts(sym_name, "__ksymtab_")) || -+ (strstarts(sym_name, "__kcrctab_")) || -+ (strstarts(sym_name, "__kstrtab_")) || -+ (strstarts(sym_name, "__param_")) || -+ (strstarts(sym_name, "__syscall_meta__")) || -+ (strstarts(sym_name, "__p_syscall_meta__")) || -+ (strstarts(sym_name, "__event_")) || -+ (strstarts(sym_name, "event_")) || -+ (strstarts(sym_name, "ftrace_event_")) || -+ (strstarts(sym_name, "types__")) || -+ (strstarts(sym_name, "args__")) || -+ (strstarts(sym_name, "__tracepoint_")) || -+ (strstarts(sym_name, "__tpstrtab_")) || -+ (strstarts(sym_name, "__tpstrtab__")) || -+ (strstarts(sym_name, "__initcall_")) || -+ (strstarts(sym_name, "__setup_")) || -+ (strstarts(sym_name, "__pci_fixup_")) || -+ (strstr(sym_name, ".") != NULL))); -+#undef strstarts -+} -+ -+/* -+ * Assemble base types. -+ */ -+static ctf_id_t assemble_ctf_base(const char *module_name, -+ const char *file_name, Dwarf_Die *die, -+ Dwarf_Die *parent_die, ctf_file_t *ctf, -+ ctf_id_t parent_ctf_id, const char *locerrstr, -+ struct die_override *overrides, -+ int top_level_type, enum skip_type *skip, -+ int *replace) -+{ -+ typedef ctf_id_t (*ctf_add_fun)(ctf_file_t *, uint_t, -+ const char *, const ctf_encoding_t *); -+ -+ const char *name = dwarf_diename(die); -+ Dwarf_Word encoding, size; -+ ctf_add_fun ctf_add_func; -+ ctf_encoding_t ctf_encoding; -+ size_t encoding_search; -+ struct die_override *bit_size_override, *bit_offset_override; -+ -+ struct dwarf_encoding_tab { -+ Dwarf_Word encoding; -+ ctf_add_fun func; -+ uint_t encoding_fixed; -+ struct type_encoding_tab *size_lookup; -+ }; -+ -+ struct type_encoding_tab float_encoding[] = { -+ {sizeof(float), CTF_FP_SINGLE }, -+ {sizeof(double), CTF_FP_DOUBLE }, -+ {sizeof(long double), CTF_FP_LDOUBLE }, -+ {0, 0} -+ }; -+ -+ struct type_encoding_tab float_cplx_encoding[] = { -+ {sizeof(float), CTF_FP_CPLX }, -+ {sizeof(double), CTF_FP_DCPLX }, -+ {sizeof(long double), CTF_FP_LDCPLX }, -+ {0, 0} -+ }; -+ -+ struct type_encoding_tab float_imagry_encoding[] = { -+ {sizeof(float), CTF_FP_IMAGRY }, -+ {sizeof(double), CTF_FP_DIMAGRY }, -+ {sizeof(long double), CTF_FP_LDIMAGRY }, -+ {0, 0} -+ }; -+ -+ struct dwarf_encoding_tab all_encodings[] = { -+ {DW_ATE_boolean, ctf_add_integer, CTF_INT_BOOL, NULL}, -+ {DW_ATE_signed, ctf_add_integer, CTF_INT_SIGNED, NULL}, -+ {DW_ATE_signed_char, ctf_add_integer, -+ CTF_INT_SIGNED | CTF_INT_CHAR, NULL}, -+ {DW_ATE_unsigned, ctf_add_integer, 0, NULL}, -+ {DW_ATE_unsigned_char, ctf_add_integer, CTF_INT_CHAR, NULL}, -+ {DW_ATE_float, ctf_add_float, 0, float_encoding}, -+ {DW_ATE_complex_float, ctf_add_float, 0, float_cplx_encoding}, -+ {DW_ATE_imaginary_float, ctf_add_float, 0, -+ float_imagry_encoding}, -+ {0, 0, 0, 0} -+ }; -+ -+ CTF_DW_ENFORCE(name); -+ CTF_DW_ENFORCE(encoding); -+ CTF_DW_ENFORCE(byte_size); -+ CTF_DW_ENFORCE_NOT(endianity); -+ -+ encoding = private_dwarf_udata(die, DW_AT_encoding, overrides); -+ size = private_dwarf_udata(die, DW_AT_byte_size, overrides); -+ -+ for (encoding_search = 0; all_encodings[encoding_search].func != 0; -+ encoding_search++) { -+ if (all_encodings[encoding_search].encoding == encoding) { -+ ctf_add_func = all_encodings[encoding_search].func; -+ if (all_encodings[encoding_search].size_lookup != NULL) -+ ctf_encoding.cte_format = -+ find_ctf_encoding(all_encodings[encoding_search].size_lookup, -+ size); -+ else -+ ctf_encoding.cte_format = -+ all_encodings[encoding_search].encoding_fixed; -+ break; -+ } -+ } -+ -+ if (all_encodings[encoding_search].func == 0) { -+ pr_err("%s: skipping type, base type %li not yet implemented.\n", -+ locerrstr, (long) encoding); -+ *skip = SKIP_ABORT; -+ return CTF_ERROR_REPORTED; -+ } -+ -+ /* -+ * Handle bitfields. Only look at overrides, since bitfields can only -+ * be members of structures in C, thus derived from the referencing DIE. -+ * Bitfields are never top-level types in C, even though they are in -+ * DWARF. -+ */ -+ bit_size_override = private_find_override(die, DW_AT_bit_size, -+ overrides); -+ bit_offset_override = private_find_override(die, DW_AT_bit_offset, -+ overrides); -+ if (bit_size_override) { -+ ctf_encoding.cte_bits = bit_size_override->value; -+ top_level_type = 0; -+ } else -+ ctf_encoding.cte_bits = size * 8; -+ -+ if (bit_offset_override) { -+#if __BYTE_ORDER == __BIG_ENDIAN -+ ctf_encoding.cte_offset = bit_offset_override->value; -+#else -+ /* -+ * The figure here counts from the left to the leftmost edge of -+ * the bitfield: we want to count from the right to the -+ * rightmost edge. -+ */ -+ ctf_encoding.cte_offset = (size * 8) - -+ bit_offset_override->value - ctf_encoding.cte_bits; -+ dw_ctf_trace("Endianizing cte_offset from %x to %x\n", -+ (unsigned int) bit_offset_override->value, -+ ctf_encoding.cte_offset); -+#endif -+ } else -+ ctf_encoding.cte_offset = 0; -+ -+#ifdef DEBUG -+ if (bit_size_override || bit_offset_override) -+ dw_ctf_trace("Bitfield overrides: bit size %i; bit offset %i\n", -+ ctf_encoding.cte_bits, ctf_encoding.cte_offset); -+#endif -+ -+ return ctf_add_func(ctf, ROOT_TYPE(top_level_type), name, -+ &ctf_encoding); -+} -+ -+/* -+ * Assemble pointer types. -+ */ -+static ctf_id_t assemble_ctf_pointer(const char *module_name, -+ const char *file_name, -+ Dwarf_Die *die, Dwarf_Die *parent_die, -+ ctf_file_t *ctf, ctf_id_t parent_ctf_id, -+ const char *locerrstr, -+ struct die_override *overrides, -+ int top_level_type, -+ enum skip_type *skip, int *replace) -+{ -+ ctf_id_t type_ref; -+ -+ type_ref = lookup_ctf_type(module_name, file_name, die, ctf, -+ overrides, locerrstr); -+ if (type_ref < 0) { -+ *skip = SKIP_ABORT; -+ return CTF_ERROR_REPORTED; -+ } -+ -+ /* -+ * Pointers to functions are all the same type in CTF: don't bother -+ * adding it over again. -+ */ -+ if (type_ref == ctf_funcptr_type) -+ return type_ref; -+ -+ return ctf_add_pointer(ctf, ROOT_TYPE(top_level_type), type_ref); -+} -+ -+/* -+ * Assemble array types. This function looks up the array type, but does not do -+ * any array construction: that is left to assemble_ctf_array_dimension(). -+ */ -+static ctf_id_t assemble_ctf_array(const char *module_name, -+ const char *file_name, Dwarf_Die *die, -+ Dwarf_Die *parent_die, ctf_file_t *ctf, -+ ctf_id_t parent_ctf_id, -+ const char *locerrstr, -+ struct die_override *overrides, -+ int top_level_type, -+ enum skip_type *skip, int *replace) -+{ -+ ctf_id_t type_ref; -+ -+ CTF_DW_ENFORCE_NOT(ordering); -+ CTF_DW_ENFORCE_NOT(bit_stride); -+ CTF_DW_ENFORCE_NOT(byte_stride); -+ -+ type_ref = lookup_ctf_type(module_name, file_name, die, ctf, -+ overrides, locerrstr); -+ if (type_ref < 0) { -+ *skip = SKIP_ABORT; -+ return CTF_ERROR_REPORTED; -+ } -+ return type_ref; -+} -+ -+/* -+ * Assemble an array dimension, wrapping an array round the parent_ctf_id and -+ * replacing it. -+ */ -+static ctf_id_t assemble_ctf_array_dimension(const char *module_name, -+ const char *file_name, -+ Dwarf_Die *die, -+ Dwarf_Die *parent_die, -+ ctf_file_t *ctf, -+ ctf_id_t parent_ctf_id, -+ const char *locerrstr, -+ struct die_override *overrides, -+ int top_level_type, -+ enum skip_type *skip, -+ int *replace) -+{ -+ ctf_arinfo_t arinfo; -+ -+ CTF_DW_ENFORCE_NOT(bit_size); -+ CTF_DW_ENFORCE_NOT(byte_size); -+ CTF_DW_ENFORCE_NOT(bit_stride); -+ CTF_DW_ENFORCE_NOT(byte_stride); -+ CTF_DW_ENFORCE_NOT(lower_bound); -+ CTF_DW_ENFORCE_NOT(threads_scaled); -+ -+ arinfo.ctr_contents = parent_ctf_id; -+ -+ arinfo.ctr_index = lookup_ctf_type(module_name, file_name, -+ die, ctf, overrides, locerrstr); -+ if (arinfo.ctr_index < 0) { -+ *skip = SKIP_ABORT; -+ return CTF_ERROR_REPORTED; -+ } -+ -+ arinfo.ctr_nelems = private_subrange_dimensions(die); -+ -+ /* -+ * For each array dimension, construct an appropriate array of the -+ * type-so-far, overriding the parent type. -+ */ -+ -+ *replace = 1; -+ return ctf_add_array(ctf, ROOT_TYPE(top_level_type), &arinfo); -+} -+ -+/* -+ * Assemble an enumeration. -+ */ -+static ctf_id_t assemble_ctf_enumeration(const char *module_name, -+ const char *file_name, -+ Dwarf_Die *die, -+ Dwarf_Die *parent_die, -+ ctf_file_t *ctf, -+ ctf_id_t parent_ctf_id, -+ const char *locerrstr, -+ struct die_override *overrides, -+ int top_level_type, -+ enum skip_type *skip, -+ int *replace) -+{ -+ const char *name = dwarf_diename(die); -+ -+ return ctf_add_enum(ctf, ROOT_TYPE(top_level_type), name); -+} -+ -+/* -+ * Assemble an enumeration value. -+ */ -+static ctf_id_t assemble_ctf_enumerator(const char *module_name, -+ const char *file_name, -+ Dwarf_Die *die, -+ Dwarf_Die *parent_die, -+ ctf_file_t *ctf, -+ ctf_id_t parent_ctf_id, -+ const char *locerrstr, -+ struct die_override *overrides, -+ int top_level_type, -+ enum skip_type *skip, -+ int *replace) -+{ -+ const char *name = dwarf_diename(die); -+ Dwarf_Word value; -+ int err; -+ -+ CTF_DW_ENFORCE(name); -+ CTF_DW_ENFORCE(const_value); -+ CTF_DW_ENFORCE_NOT(bit_stride); -+ CTF_DW_ENFORCE_NOT(byte_stride); -+ -+ value = private_dwarf_udata(die, DW_AT_const_value, overrides); -+ err = ctf_add_enumerator(ctf, parent_ctf_id, name, value); -+ -+ if (err != 0) -+ return err; -+ -+ return parent_ctf_id; -+} -+ -+/* -+ * Assemble a typedef. -+ */ -+static ctf_id_t assemble_ctf_typedef(const char *module_name, -+ const char *file_name, -+ Dwarf_Die *die, -+ Dwarf_Die *parent_die, -+ ctf_file_t *ctf, -+ ctf_id_t parent_ctf_id, -+ const char *locerrstr, -+ struct die_override *overrides, -+ int top_level_type, -+ enum skip_type *skip, -+ int *replace) -+{ -+ const char *name = dwarf_diename(die); -+ ctf_id_t type_ref; -+ -+ CTF_DW_ENFORCE(name); -+ -+ type_ref = lookup_ctf_type(module_name, file_name, die, ctf, -+ overrides, locerrstr); -+ if (type_ref < 0) { -+ *skip = SKIP_ABORT; -+ return CTF_ERROR_REPORTED; -+ } -+ -+ return ctf_add_typedef(ctf, ROOT_TYPE(top_level_type), name, type_ref); -+} -+ -+/* -+ * Assemble a const/volatile/restrict qualifier. -+ */ -+static ctf_id_t assemble_ctf_cvr_qual(const char *module_name, -+ const char *file_name, -+ Dwarf_Die *die, -+ Dwarf_Die *parent_die, -+ ctf_file_t *ctf, -+ ctf_id_t parent_ctf_id, -+ const char *locerrstr, -+ struct die_override *overrides, -+ int top_level_type, -+ enum skip_type *skip, -+ int *replace) -+{ -+ ctf_id_t (*ctf_cvr_fun)(ctf_file_t *fp, uint_t flag, ctf_id_t ref); -+ ctf_id_t type_ref; -+ -+ switch (dwarf_tag(die)) { -+ case DW_TAG_const_type: ctf_cvr_fun = ctf_add_const; break; -+ case DW_TAG_volatile_type: ctf_cvr_fun = ctf_add_volatile; break; -+ case DW_TAG_restrict_type: ctf_cvr_fun = ctf_add_restrict; break; -+ default: -+ pr_err("%s: internal error: assemble_ctf_cvr_qual() called with\n" -+ "non-const/volatile/restrict: %i\n", locerrstr, dwarf_tag(die)); -+ exit(1); -+ } -+ -+ type_ref = lookup_ctf_type(module_name, file_name, die, ctf, -+ overrides, locerrstr); -+ if (type_ref < 0) { -+ *skip = SKIP_ABORT; -+ return CTF_ERROR_REPORTED; -+ } -+ -+ return ctf_cvr_fun(ctf, ROOT_TYPE(top_level_type), type_ref); -+} -+ -+/* -+ * Assemble a structure or union type. This assembles only the type itself, not -+ * its constituent members: that is done by assemble_ctf_su_member(). -+ * -+ * We assume that if a structure or union type is discovered with more members -+ * than an earlier-discovered type, that it is compatible with that earlier type -+ * and a superset of it. -+ * -+ * FIXME: in debug mode we should not assume this. -+ */ -+static ctf_id_t assemble_ctf_struct_union(const char *module_name, -+ const char *file_name, -+ Dwarf_Die *die, -+ Dwarf_Die *parent_die, -+ ctf_file_t *ctf, -+ ctf_id_t parent_ctf_id, -+ const char *locerrstr, -+ struct die_override *overrides, -+ int top_level_type, -+ enum skip_type *skip, -+ int *replace) -+{ -+ ctf_id_t (*ctf_add_sou)(ctf_file_t *fp, uint_t flag, const char *name, -+ size_t size); -+ -+ const char *name = dwarf_diename(die); -+ int is_union = (dwarf_tag(die) == DW_TAG_union_type); -+ struct ctf_memb_count *member_count = NULL; -+ ctf_id_t id; -+ long long size; -+ -+ /* -+ * FIXME: these both need handling for DWARF4 support. -+ */ -+ CTF_DW_ENFORCE_NOT(specification); -+ CTF_DW_ENFORCE_NOT(signature); -+ -+ /* -+ * Figure out the size of the type (if possible) and force it into the -+ * CTF to ensure that struct/union padding is added appropriately. -+ * -+ * If we don't know it, force a size of zero, which is interpreted as -+ * being equivalent to a call to the unsized struct/union addition -+ * function, letting libdtrace-ctf figure out a likely size as best it -+ * can. -+ */ -+ size = private_dwarf_size(die); -+ if (size < 0) -+ size = 0; -+ -+ /* -+ * Possibly we should ignore this entire structure, if we already know -+ * of one with the same name and at least as many members. If we -+ * already know of one and it is shorter, we want to use its ID rather -+ * than creating a new one. -+ * -+ * Note; by this point, the deduplicator has long run: thus we know for -+ * sure what module a potentially-shared type will end up in, and -+ * there's no need to double-check the shared CTF repository for types. -+ * We also know that the module must exist in the per_module hash. -+ */ -+ -+ if (name != NULL) { -+ char *structized_name = NULL; -+ struct per_module *ctf_pm; -+ -+ structized_name = str_appendn(structized_name, -+ is_union ? "u " : "s ", -+ name, NULL); -+ -+ ctf_pm = g_hash_table_lookup(per_module, module_name); -+ member_count = g_hash_table_lookup(ctf_pm->member_counts, -+ structized_name); -+ -+ if (member_count) { -+ free(structized_name); -+ dw_ctf_trace("%s: already exists (with ID %li) with %zi members\n" -+ "versus current %li members\n", -+ locerrstr, member_count->ctf_id, -+ member_count->count, -+ count_dwarf_members(die)); -+ -+ if (member_count->count < count_dwarf_members(die)) -+ return member_count->ctf_id; -+ -+ *skip = SKIP_SKIP; -+ return member_count->ctf_id; -+ } -+ -+ /* -+ * Not in existence yet. Create it. -+ */ -+ member_count = malloc(sizeof(struct ctf_memb_count)); -+ if (member_count == NULL) { -+ pr_err("Out of memory allocating structure/union member count\n"); -+ exit(1); -+ } -+ member_count->count = 0; -+ g_hash_table_insert(ctf_pm->member_counts, -+ structized_name, member_count); -+ } -+ -+ dw_ctf_trace("%s: adding structure %s\n", locerrstr, name); -+ -+ if (is_union) -+ ctf_add_sou = ctf_add_union_sized; -+ else -+ ctf_add_sou = ctf_add_struct_sized; -+ -+ id = ctf_add_sou(ctf, ROOT_TYPE(top_level_type), name, size); -+ -+ if (member_count != NULL) -+ member_count->ctf_id = id; -+ -+ return id; -+} -+ -+/* -+ * Figure out the offset of this type, in bits. (This is split in two -+ * for bitfields, where the bitfield itself gets represented elsewhere, -+ * in the CTF type of the member itself.) -+ * -+ * DW_AT_data_bit_offset is the simple case. DW_AT_data_member_location -+ * is trickier, and, alas, the DWARF2 variation is the complex one. -+ */ -+static int ctf_su_offset(Dwarf_Die *die, const char *locerrstr, -+ struct die_override *overrides, ulong_t *offset, -+ ulong_t *bit_offset) -+{ -+ struct die_override *o; -+ -+ if (private_dwarf_hasattr(die, DW_AT_data_bit_offset)) -+ *offset = private_dwarf_udata(die, DW_AT_data_bit_offset, NULL); -+ else if (private_dwarf_hasattr(die, DW_AT_data_member_location)) { -+ Dwarf_Attribute location_attr; -+ -+ private_dwarf_attr(die, DW_AT_data_member_location, -+ &location_attr); -+ -+ switch (dwarf_whatform(&location_attr)) { -+ case DW_FORM_data1: -+ case DW_FORM_data2: -+ case DW_FORM_data4: -+ case DW_FORM_data8: -+ case DW_FORM_udata: -+ case DW_FORM_sdata: -+ { -+ /* -+ * Byte offset, with bit_offset of containing -+ * structure/union added, if present. -+ * -+ * (No overrides supported here, yet, due to lack of -+ * sdata overrides and the desire for consistency. -+ * We can add them if we start passing down -+ * DW_AT_data_member_location overrides.) -+ */ -+ if (dwarf_whatform(&location_attr) == DW_FORM_sdata) { -+ Dwarf_Sword location; -+ -+ dwarf_formsdata(&location_attr, &location); -+ *offset = location * 8; -+ } else { -+ Dwarf_Word location; -+ -+ dwarf_formudata(&location_attr, &location); -+ *offset = location * 8; -+ } -+ break; -+ } -+ case DW_FORM_block1: -+ case DW_FORM_block2: -+ case DW_FORM_block4: -+ { -+ Dwarf_Op *location; -+ size_t nlocs; -+ -+ /* -+ * DWARF 2 block-based data_member_location. This can -+ * be quite complicated in some situations (notably C++ -+ * virtual bases), but for normal structure members it -+ * is simple. FIXME for userspace tracing of C++. -+ * -+ * This is thoroughly specific to the forms of DWARF2 -+ * emitted by GCC. We don't need to feel guilty about -+ * this because elfutils does just the same thing. -+ */ -+ -+ if (dwarf_getlocation(&location_attr, &location, -+ &nlocs) < 0) { -+ pr_err("%s: offset not a valid location expression: %s\n", -+ locerrstr, dwarf_errmsg(dwarf_errno())); -+ return CTF_ERROR_REPORTED; -+ } -+ -+ if ((nlocs != 1) || -+ ((location[0].atom != DW_OP_plus_uconst) && -+ (location[0].atom != DW_OP_constu))) { -+ pr_err("%s: complex location lists not supported:\n" -+ "either C++ or non-GCC output: skipped\n", locerrstr); -+ return CTF_ERROR_REPORTED; -+ } -+ -+ *offset = location[0].number * 8; -+ break; -+ } -+ case DW_FORM_exprloc: -+ { -+ /* -+ * We need a full DWARF expression list interpreter to -+ * handle this. -+ */ -+ pr_err("DWARF 4 expression location lists not supported.\n"); -+ exit(1); -+ } -+ default: -+ { -+ pr_err("%s: expression location lists in form %u not supported.\n", -+ locerrstr, dwarf_whatform(&location_attr)); -+ exit(1); -+ } -+ } -+ } -+ -+ /* -+ * Handle the bit offset. -+ */ -+ if (private_dwarf_hasattr(die, DW_AT_bit_offset)) { -+ Dwarf_Attribute bit_attr; -+ Dwarf_Word bit; -+ -+ private_dwarf_attr(die, DW_AT_bit_offset, -+ &bit_attr); -+ dwarf_formudata(&bit_attr, &bit); -+ *bit_offset = bit; -+ } -+ -+ /* -+ * Handle the offset value override. It does not matter which method -+ * has been used to get the value. At this point offset is always -+ * the bit distance of the member from the structure/union start. -+ * -+ * The DW_AT_data_bit_offset override is always used to pass the offset -+ * around, so that we don't need to add special override handling for -+ * various forms of the DW_AT_data_member_location as a special case. -+ * This is safe as it is not possible to have both attributes attached -+ * to the same DIE per the DWARF4 standard, and if we have one attached -+ * as an override to a DIE that has the other, we will only ever need to -+ * use one (since no DIE can be both an unnamed struct/union and a -+ * bitfield at the same time). -+ */ -+ o = private_find_override(die, DW_AT_data_bit_offset, overrides); -+ if (o != NULL) { -+ if (o->op == DIE_OVERRIDE_REPLACE) -+ *offset = o->value; -+ else -+ *offset += o->value; -+ } -+ -+ return 0; -+} -+ -+/* -+ * Assemble a structure or union member. -+ * -+ * We only assemble a member of a given name if a member by that name does not -+ * already exist, and if the member is not blacklisted. -+ */ -+static ctf_id_t assemble_ctf_su_member(const char *module_name, -+ const char *file_name, -+ Dwarf_Die *die, -+ Dwarf_Die *parent_die, -+ ctf_file_t *ctf, -+ ctf_id_t parent_ctf_id, -+ const char *locerrstr, -+ struct die_override *overrides, -+ int top_level_type, -+ enum skip_type *skip, -+ int *replace) -+{ -+ ulong_t offset = 0; -+ ulong_t bit_offset = 0; -+ struct ctf_full_id *new_type; -+ Dwarf_Attribute type_attr; -+ Dwarf_Die type_die; -+ Dwarf_Die cu_die; -+ int err; -+ struct ctf_memb_count *member_count; -+ const char *struct_name = dwarf_diename(parent_die); -+ -+ CTF_DW_ENFORCE(type); -+ -+ /* -+ * Increment the member count of named structures. This is the number -+ * of members in the DWARF, not in the CTF: blacklisted members are -+ * counted too. -+ */ -+ if (struct_name != NULL) { -+ int is_union = (dwarf_tag(parent_die) == DW_TAG_union_type); -+ char *structized_name = NULL; -+ struct per_module *ctf_pm; -+ -+ structized_name = str_appendn(structized_name, -+ is_union ? "u " : "s ", -+ struct_name, NULL); -+ -+ ctf_pm = g_hash_table_lookup(per_module, module_name); -+ member_count = g_hash_table_lookup(ctf_pm->member_counts, -+ structized_name); -+ member_count->count++; -+ free(structized_name); -+ } -+ -+ /* -+ * If this member is blacklisted, just skip it. -+ */ -+ if (member_blacklisted(die, parent_die)) { -+ dw_ctf_trace("%s: blacklisted, skipping.\n", locerrstr); -+ return parent_ctf_id; -+ } -+ -+ /* -+ * Find the associated type so we can either add a member with that type -+ * (if it is named) or add its members directly (for unnamed types, -+ * which must be unnamed structs/unions): then figure out the member's -+ * offset. -+ */ -+ private_dwarf_attr(die, DW_AT_type, &type_attr); -+ if (dwarf_formref_die(&type_attr, &type_die) == NULL) { -+ pr_err("%s: nonexistent type reference.\n" -+ "Corrupted DWARF, cannot continue.\n", locerrstr); -+ exit(1); -+ } -+ dwarf_diecu(&type_die, &cu_die, NULL, NULL); -+ -+ err = ctf_su_offset(die, locerrstr, overrides, &offset, &bit_offset); -+ if (err < 0) { -+ *skip = SKIP_ABORT; -+ return err; -+ } -+ -+ /* -+ * If this is an unnamed struct/union, call directly back to -+ * die_to_ctf() to add this struct's members to the current structure, -+ * merging it seamlessly with its parent (excepting only the member -+ * offsets). Use DW_AT_data_bit_offset because it does not require -+ * the complexity of DW_AT_data_member_location to be faked. -+ */ -+ if (!private_dwarf_hasattr(die, DW_AT_name)) { -+ Dwarf_Die child_die; -+ int dummy = 0; -+ -+ if ((dwarf_tag(&type_die) != DW_TAG_structure_type) && -+ (dwarf_tag(&type_die) != DW_TAG_union_type)) { -+ pr_err("%s:%lx: not supported: anonymous structure member\n" -+ "not a structure or union.\n", locerrstr, -+ DIEOFFSET(die)); -+ *skip = SKIP_ABORT; -+ return CTF_ERROR_REPORTED; -+ } -+ -+ /* -+ * Anonymous structure or union with no members. Silently skip. -+ */ -+ switch (dwarf_child(&type_die, &child_die)) { -+ case -1: -+ *skip = SKIP_ABORT; -+ return CTF_ERROR_REPORTED; -+ case 1: /* No DIEs at all in this aggregate */ -+ return parent_ctf_id; -+ default: /* Child DIEs exist. */ -+ break; -+ } -+ -+ /* -+ * Add override that will adjust offset of the anonymous -+ * struct/union members during inlining. The bit_offset is -+ * ignored here as it is not expected that a nested -+ * structure/union will start on a non-byte-aligned boundary. -+ */ -+ struct die_override o[] = {{ dwarf_tag(&child_die), -+ DW_AT_data_bit_offset, -+ DIE_OVERRIDE_ADD, -+ offset, overrides }, {0}}; -+ -+ die_to_ctf(module_name, file_name, &child_die, parent_die, ctf, -+ parent_ctf_id, o, 0, 0, skip, &dummy, NULL); -+ -+ return parent_ctf_id; -+ } -+ -+ /* -+ * Get the CTF ID of this member's type, by recursive lookup. -+ * -+ * If this is a bitfield, we want to note that said type's size and -+ * bit-offset should be adjusted. -+ */ -+ if (private_dwarf_hasattr(die, DW_AT_bit_size)) { -+ struct die_override o[] = { -+ { DW_TAG_base_type, -+ DW_AT_bit_size, -+ DIE_OVERRIDE_REPLACE, -+ private_dwarf_udata(die, DW_AT_bit_size, -+ NULL), -+ NULL }, -+ { DW_TAG_base_type, -+ DW_AT_bit_offset, -+ DIE_OVERRIDE_REPLACE, -+ bit_offset, -+ overrides }, -+ {0} -+ }; -+ -+ new_type = construct_ctf_id(module_name, file_name, &type_die, -+ &cu_die, o); -+ } else { -+ if (bit_offset != 0) { -+ pr_err("%s:%s: error in member %s: No DW_AT_bit_size, but nonzero bit offset\n" -+ "of %lx in overall offset of %lx\n", locerrstr, -+ dwarf_diename(&cu_die), dwarf_diename(die), -+ bit_offset, offset); -+ return CTF_ERROR_REPORTED; -+ } -+ new_type = construct_ctf_id(module_name, file_name, &type_die, -+ &cu_die, NULL); -+ } -+ -+ if (new_type == NULL) { -+ *skip = SKIP_ABORT; -+ return CTF_ERROR_REPORTED; -+ } -+ -+ if ((new_type->ctf_file != ctf) && -+ (new_type->ctf_file != lookup_ctf_file("shared_ctf"))) { -+ pr_err("%s:%s: internal error: referenced type lookup for member %s\n" -+ "yields a different CTF file: %p versus %p\n", -+ locerrstr, dwarf_diename(&cu_die), dwarf_diename(die), -+ ctf, new_type->ctf_file); -+ pr_err("dedup() is probably buggy.\n"); -+ exit(1); -+ } -+ -+ if (ctf_add_member_offset(ctf, parent_ctf_id, dwarf_diename(die), -+ new_type->ctf_id, offset) < 0) { -+ /* -+ * If we have seen this member before, as part of another -+ * definition somewhere else, that's fine. We cannot recurse -+ * from this point, so we can just return the parent CTF ID, the -+ * ID of the containing structure. -+ */ -+ if (ctf_errno(ctf) == ECTF_DUPLICATE) -+ return parent_ctf_id; -+ -+ /* -+ * We have special handling for cases where CTF doesn't know of -+ * either this member's type or the enclosing structure: when -+ * libdtrace-ctf is old enough to need it, we try a ctf_update() -+ * in case this is recently added, but no special handling for -+ * other errors, which the caller must report. -+ */ -+ -+ if (ctf_errno(ctf) != ECTF_BADID && -+ ctf_errno(ctf) != ECTF_NOTSOU) -+ return CTF_NO_ERROR_REPORTED; -+ -+#ifndef LIBDTRACE_CTF_OMISSIBLE_CTF_UPDATE -+ ctf_file_t *shared_ctf; -+ -+ /* -+ * Try an update of the current CTF file first, to bring the -+ * type ID table up to date: if that doesn't work, try an update -+ * of the shared table. (If none is needed, this is cheap.) -+ */ -+ -+ if (ctf_update(new_type->ctf_file) < 0) { -+ pr_err("Cannot update CTF file: %s\n", -+ ctf_errmsg(ctf_errno(ctf))); -+ exit(1); -+ } -+ -+ if (ctf_add_member_offset(ctf, parent_ctf_id, -+ dwarf_diename(die), -+ new_type->ctf_id, -+ offset) == 0) -+ return parent_ctf_id; -+ -+ shared_ctf = lookup_ctf_file("shared_ctf"); -+ if (ctf_update(shared_ctf) < 0) { -+ pr_err("Cannot update shared CTF: %s\n", -+ ctf_errmsg(ctf_errno(shared_ctf))); -+ exit(1); -+ } -+ -+ if (ctf_add_member_offset(ctf, parent_ctf_id, -+ dwarf_diename(die), -+ new_type->ctf_id, -+ offset) == 0) -+ return parent_ctf_id; -+#endif -+#ifdef DEBUG -+ pr_err("%s: Internal error: %s %s:%s:%p:%i\n" -+ "on member addition to ctf_file %p.\n", -+ locerrstr, ctf_errmsg(ctf_errno(ctf)), -+ new_type->module_name, new_type->file_name, -+ new_type->ctf_file, (int) new_type->ctf_id, ctf); -+#else -+ pr_err("%s: Internal error: %s %p:%i\n" -+ "on member addition to ctf_file %p.\n", -+ locerrstr, ctf_errmsg(ctf_errno(ctf)), -+ new_type->ctf_file, (int) new_type->ctf_id, -+ ctf); -+#endif -+ return CTF_ERROR_REPORTED; -+ } -+ -+ return parent_ctf_id; -+} -+ -+/* -+ * Assemble a variable. -+ */ -+static ctf_id_t assemble_ctf_variable(const char *module_name, -+ const char *file_name, -+ Dwarf_Die *die, -+ Dwarf_Die *parent_die, -+ ctf_file_t *ctf, -+ ctf_id_t parent_ctf_id, -+ const char *locerrstr, -+ struct die_override *overrides, -+ int top_level_type, -+ enum skip_type *skip, -+ int *replace) -+{ -+ const char *name = dwarf_diename(die); -+ char *blacklist_name = NULL; -+ ctf_id_t type_ref; -+ int err; -+ -+ CTF_DW_ENFORCE(name); -+ -+ /* -+ * If blacklisted, just skip it. -+ */ -+ blacklist_name = str_appendn(blacklist_name, module_name, "`", -+ dwarf_diename(die), NULL); -+ if (g_hash_table_lookup_extended(variable_blacklist, blacklist_name, -+ NULL, NULL)) { -+ dw_ctf_trace("%s: variable %s is blacklisted for static/non-static ambiguity.\n", -+ file_name, blacklist_name); -+ free(blacklist_name); -+ return 0; -+ } -+ free(blacklist_name); -+ -+ type_ref = lookup_ctf_type(module_name, file_name, die, ctf, -+ overrides, locerrstr); -+ if (type_ref < 0) { -+ *skip = SKIP_ABORT; -+ return CTF_ERROR_REPORTED; -+ } -+ -+ /* -+ * This isn't a type: full DWARF child recursion and type-id addition is -+ * not called for. -+ */ -+ *skip = SKIP_SKIP; -+ -+ err = ctf_add_variable(ctf, name, type_ref); -+ -+ if (err == 0) -+ dw_ctf_trace("%p: Added variable %s, type %i\n", ctf, name, -+ (int)type_ref); -+ -+ /* -+ * Variable references to opaque versus non-opaque structures could only -+ * get deduplicated with yet another deduplication pass. This seems -+ * pointlessly expensive when nothing can refer to them: just skip -+ * duplicates instead. -+ */ -+ if ((err < 0) && (ctf_errno(ctf) == ECTF_DUPLICATE)) -+ return 0; -+ -+ return err; -+ -+} -+ -+/* Writeout. */ -+ -+static void write_types(char *output, int standalone) -+{ -+ GHashTableIter module_iter; -+ char *module; -+ struct per_module *per_mod; -+ ctf_file_t **ctfs; -+ const char **names; -+ size_t i = 0; -+ size_t ctf_count = g_hash_table_size(per_module); -+ -+ /* -+ * Work over all the modules and write their compressed CTF data out. -+ * Standalone modules get placed in files in the output directory named -+ * with names ending in .mod.ctf.new, and the makefile moves .ctf.new -+ * over the top of .ctf iff it has changed; built-in modules and the -+ * core kernel and shared type repository are placed into a CTF archive. -+ */ -+ if (standalone) { -+ if ((mkdir(output, 0777) < 0) && errno != EEXIST) { -+ perror("Cannot create .ctf directory"); -+ exit(1); -+ } -+ } else { -+ ctfs = calloc(ctf_count, sizeof(ctf_file_t *)); -+ names = calloc(ctf_count, sizeof(char *)); -+ if (!ctfs || !names) -+ pr_err("Out of memory in CTF writeout\n"); -+ } -+ -+ /* -+ * Write the files out (in standalone mode), or construct the arrays of -+ * module names and files to put in the archive (otherwise). -+ */ -+ g_hash_table_iter_init(&module_iter, per_module); -+ while (g_hash_table_iter_next(&module_iter, (void **) &module, -+ (void **)&per_mod)) { -+ int fd; -+ -+ dw_ctf_trace("Writing out %s\n", module); -+ -+ if (ctf_update(per_mod->ctf_file) < 0) { -+ pr_err("Cannot serialize CTF file %s: %s\n", -+ module, ctf_errmsg(ctf_errno(per_mod->ctf_file))); -+ exit(1); -+ } -+ -+ if (!standalone) { -+ names[i] = module; -+ ctfs[i] = per_mod->ctf_file; -+ i++; -+ } else { -+ char *path = NULL; -+ -+ path = str_appendn(path, output, "/", module, -+ ".mod.ctf.new", NULL); -+ -+ fd = open(path, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, -+ 0666); -+ if (fd < 0) { -+ pr_err("Cannot open CTF file %s for writing: %s\n", -+ path, strerror(errno)); -+ exit(1); -+ } -+ if (ctf_compress_write(per_mod->ctf_file, fd) < 0) { -+ pr_err("Cannot write to CTF file %s: " -+ "%s\n", path, -+ ctf_errmsg(ctf_errno(per_mod->ctf_file))); -+ exit(1); -+ } -+ if (close(fd) != 0) { -+ pr_err("Cannot close CTF file %s: %s\n", -+ path, strerror(errno)); -+ exit(1); -+ } -+ free(path); -+ } -+ } -+ -+ if (!standalone) { -+ int err; -+ -+ err = ctf_arc_write(output, ctfs, ctf_count, names, 4096); -+ if (err != 0) { -+ pr_err("Cannot write to CTF archive %s: %s\n", -+ output, err < ECTF_BASE ? strerror(err) : -+ ctf_errmsg(err)); -+ exit(1); -+ } -+ free(names); -+ free(ctfs); -+ } -+} -+ -+/* Utilities. */ -+ -+/* -+ * Given a DIE that may contain a type attribute, look up the target of that -+ * attribute and return it, or NULL if none. -+ */ -+static Dwarf_Die *private_dwarf_type(Dwarf_Die *die, Dwarf_Die *target_die) -+{ -+ Dwarf_Attribute type_ref_attr; -+ -+ if (private_dwarf_attr(die, DW_AT_type, &type_ref_attr) != NULL) { -+ if (dwarf_formref_die(&type_ref_attr, target_die) == NULL) { -+ pr_err("Corrupt DWARF at offset %lx: ref with no target.\n", -+ DIEOFFSET(die)); -+ exit(1); -+ } -+ return target_die; -+ } -+ -+ return NULL; -+} -+ -+/* -+ * Check for existence of an attribute in a DIE, chasing through -+ * DW_AT_specification if need be. -+ */ -+static inline int private_dwarf_hasattr(Dwarf_Die *die, -+ unsigned int search_name) -+{ -+ int hasattr = 0; -+ Dwarf_Attribute spec_ref_attr; -+ Dwarf_Die spec_die; -+ -+ /* -+ * DW_AT_declaration is not forwarded, because non-declarations can -+ * reference declarations via DW_AT_specification, without implying that -+ * the referencing DIE is a declaration. -+ */ -+ hasattr = dwarf_hasattr(die, search_name); -+ if (hasattr || (search_name == DW_AT_declaration)) -+ return hasattr; -+ -+ if (dwarf_attr(die, DW_AT_specification, &spec_ref_attr) != NULL) { -+ if (dwarf_formref_die(&spec_ref_attr, &spec_die) == NULL) { -+ pr_err("Corrupt DWARF at offset %lx: ref with no target.\n", -+ DIEOFFSET(die)); -+ exit(1); -+ } -+ return dwarf_hasattr(&spec_die, search_name); -+ } -+ return hasattr; -+} -+ -+/* -+ * Return a DIE attribute, chasing through DW_AT_specification if need be. -+ */ -+static inline Dwarf_Attribute *private_dwarf_attr(Dwarf_Die *die, -+ unsigned int search_name, -+ Dwarf_Attribute *result) -+{ -+ Dwarf_Attribute spec_ref_attr; -+ Dwarf_Die spec_die; -+ Dwarf_Attribute *ret; -+ -+ ret = dwarf_attr(die, search_name, result); -+ if (ret != NULL || (search_name == DW_AT_declaration)) -+ return ret; -+ -+ if (dwarf_attr(die, DW_AT_specification, &spec_ref_attr) != NULL) { -+ if (dwarf_formref_die(&spec_ref_attr, &spec_die) == NULL) { -+ pr_err("Corrupt DWARF at offset %lx: ref with no target.\n", -+ DIEOFFSET(die)); -+ exit(1); -+ } -+ return dwarf_attr(&spec_die, search_name, result); -+ } -+ -+ return NULL; -+} -+ -+/* -+ * Given a DIE that contains a udata attribute, look up that attribute and -+ * return its value (optionally overridden or modified by the die_overrides). -+ */ -+static inline Dwarf_Word private_dwarf_udata(Dwarf_Die *die, int attribute, -+ struct die_override *overrides) -+{ -+ Dwarf_Attribute attr; -+ Dwarf_Word value; -+ struct die_override *override; -+ -+ override = private_find_override(die, attribute, overrides); -+ -+ if (override && override->op == DIE_OVERRIDE_REPLACE) -+ return override->value; -+ -+ private_dwarf_attr(die, attribute, &attr); -+ dwarf_formudata(&attr, &value); -+ -+ if (override) -+ value += override->value; -+ -+ return value; -+} -+ -+/* -+ * Given a DIE, return its byte size, if known and interpretable, or -1 -+ * otherwise. -+ */ -+static inline long long -+private_dwarf_size(Dwarf_Die *die) -+{ -+ Dwarf_Attribute size_attr; -+ -+ if (private_dwarf_hasattr(die, DW_AT_byte_size)) { -+ private_dwarf_attr(die, DW_AT_byte_size, &size_attr); -+ -+ switch (dwarf_whatform(&size_attr)) { -+ case DW_FORM_data1: -+ case DW_FORM_data2: -+ case DW_FORM_data4: -+ case DW_FORM_data8: -+ case DW_FORM_udata: { -+ Dwarf_Word dw_size; -+ -+ dwarf_formudata(&size_attr, &dw_size); -+ return dw_size; -+ } -+ case DW_FORM_sdata: { -+ Dwarf_Sword dw_size; -+ -+ dwarf_formsdata(&size_attr, &dw_size); -+ return dw_size; -+ } -+ } -+ } -+ -+ /* -+ * exprloc or other type we don't know how to interpret yet. -+ */ -+ return -1; -+} -+ -+/* -+ * Find an override in an override list, walking up the chained overrides if -+ * need be, until one is found. -+ */ -+static struct die_override * -+private_find_override(Dwarf_Die *die, -+ int attribute, -+ struct die_override *overrides) -+{ -+ size_t i; -+ -+ if (overrides == NULL) -+ return NULL; -+ -+ while (overrides) { -+ struct die_override *chain = NULL; -+ for (i = 0; overrides[i].tag != 0; i++) { -+ chain = overrides[i].chain; -+ if ((overrides[i].tag == dwarf_tag(die)) && -+ (overrides[i].attribute == attribute)) -+ return &overrides[i]; -+ } -+ overrides = chain; -+ } -+ -+ return NULL; -+} -+ -+/* -+ * Determine the dimensions of an array subrange, or 0 if variable. -+ */ -+static Dwarf_Word private_subrange_dimensions(Dwarf_Die *die) -+{ -+ int flexible_array = 0; -+ Dwarf_Attribute nelem_attr; -+ Dwarf_Word nelems; -+ -+ if (((private_dwarf_attr(die, DW_AT_upper_bound, -+ &nelem_attr) == NULL) && -+ (private_dwarf_attr(die, DW_AT_count, -+ &nelem_attr) == NULL)) || -+ (!private_dwarf_hasattr(die, DW_AT_type))) -+ flexible_array = 1; -+ -+ if (!flexible_array) -+ switch (dwarf_whatform(&nelem_attr)) { -+ case DW_FORM_data1: -+ case DW_FORM_data2: -+ case DW_FORM_data4: -+ case DW_FORM_data8: -+ case DW_FORM_udata: -+ break; -+ default: -+ flexible_array = 1; -+ } -+ -+ if (flexible_array) -+ return 0; -+ -+ dwarf_formudata(&nelem_attr, &nelems); -+ -+ /* -+ * Upper bounds indicate that we have one more element than that, since -+ * C starts counting at zero. -+ */ -+ if (private_dwarf_hasattr(die, DW_AT_upper_bound)) -+ nelems++; -+ -+ return nelems; -+} -+ -+/* -+ * Intern an atom in the atoms table and return it, or free it and return the -+ * existing atom if one is already interned. (Despite the type signature, this -+ * return value is constant and should not be freed.) -+ */ -+static void *intern(char *atom) -+{ -+ void *foo; -+ -+ if (!g_hash_table_lookup_extended(atoms, atom, &foo, NULL)) { -+ g_hash_table_insert(atoms, atom, NULL); -+ foo = atom; -+ } else -+ free(atom); -+ -+ return foo; -+} -+ -+/* -+ * An error checking strdup(). -+ */ -+static char *xstrdup(const char *s) -+{ -+ char *s2 = strdup(s); -+ -+ if (s2 == NULL) { -+ pr_err("%s: Out of memory\n", __func__); -+ exit(1); -+ } -+ -+ return s2; -+} -+ -+/* -+ * A string appender working on dynamic strings. -+ */ -+static char *str_append(char *s, const char *append) -+{ -+ size_t s_len = 0; -+ -+ if (append == NULL) -+ return s; -+ -+ if (s != NULL) -+ s_len = strlen(s); -+ -+ size_t append_len = strlen(append); -+ -+ s = realloc(s, s_len + append_len + 1); -+ -+ if (s == NULL) { -+ pr_err("Out of memory appending a string of length %li to one of length %li\n", -+ strlen(append), s_len); -+ exit(1); -+ } -+ -+ memcpy(s + s_len, append, append_len); -+ s[s_len+append_len] = '\0'; -+ -+ return s; -+} -+ -+/* -+ * A vararg string appender. -+ */ -+static char *str_appendn(char *s, ...) -+{ -+ va_list ap; -+ const char *append; -+ size_t len, s_len = 0; -+ -+ va_start(ap, s); -+ if (s) -+ s_len = strlen(s); -+ len = s_len; -+ -+ append = va_arg(ap, const char *); -+ while (append != NULL) { -+ len += strlen(append); -+ append = va_arg(ap, char *); -+ } -+ va_end(ap); -+ -+ s = realloc(s, len + 1); -+ if (s == NULL) { -+ pr_err("Out of memory appending a string of length %li to one of length %li\n", -+ len - s_len, s_len); -+ exit(1); -+ } -+ -+ va_start(ap, s); -+ append = va_arg(ap, const char *); -+ while (append != NULL) { -+ size_t append_len = strlen(append); -+ -+ memcpy(s + s_len, append, append_len); -+ s_len += append_len; -+ -+ append = va_arg(ap, char *); -+ } -+ s[len] = '\0'; -+ va_end(ap); -+ -+ return s; -+} -+ -+/* -+ * Filter a GList, calling a predicate on it and removing all elements for which -+ * the predicate returns true, calling the free_func on them if set. -+ */ -+static GList *list_filter(GList *list, filter_pred_fun fun, -+ GDestroyNotify free_func, void *data) -+{ -+ GList *cur = list; -+ -+ while (cur) { -+ GList *next = cur->next; -+ -+ if (fun(cur->data, data)) { -+ if (free_func) -+ free_func(cur->data); -+ list = g_list_delete_link(list, cur); -+ } -+ cur = next; -+ } -+ -+ return list; -+} -+ -+/* -+ * Figure out the (pathless, suffixless) module name for a given module file (.o -+ * or .ko), and return it in a new dynamically allocated string. -+ * -+ * Takes the object_to_module mapping into account. -+ */ -+static char *fn_to_module(const char *file_name) -+{ -+ char *module_name; -+ char *chop, *dash; -+ -+ module_name = g_hash_table_lookup(object_to_module, file_name); -+ if (module_name != NULL) -+ return xstrdup(module_name); -+ -+ chop = strrchr(file_name, '/'); -+ if (chop != NULL) -+ module_name = xstrdup(++chop); -+ else -+ module_name = xstrdup(file_name); -+ -+ chop = strrchr(module_name, '.'); -+ if (chop != NULL) -+ *chop = '\0'; -+ -+ dash = module_name; -+ while (dash != NULL) { -+ dash = strchr(dash, '-'); -+ if (dash != NULL) -+ *dash = '_'; -+ } -+ -+ return module_name; -+} -+ -+/* -+ * Determine, and cache, absolute filenames. This is called in very hot -+ * paths, notably type_id(), and must be kept fast. -+ */ -+static const char *abs_file_name(const char *file_name) -+{ -+ static GHashTable *abs_file_names; -+ const char *abs_name; -+ -+ if (abs_file_names == NULL) -+ abs_file_names = g_hash_table_new_full(g_str_hash, g_str_equal, -+ free, free); -+ -+ abs_name = g_hash_table_lookup(abs_file_names, file_name); -+ -+ if (abs_name == NULL) { -+ char abspath[PATH_MAX] = ""; -+ -+ if (realpath(file_name, abspath) == NULL) -+ strcpy(abspath, file_name); -+ g_hash_table_replace(abs_file_names, -+ xstrdup(file_name), xstrdup(abspath)); -+ -+ abs_name = g_hash_table_lookup(abs_file_names, file_name); -+ } -+ -+ return abs_name; -+} -+ -+/* -+ * Determine absolute filenames relative to some other directory. This does not -+ * need to be fast. The returned name is dynamically allocated, and must be -+ * freed by the caller. -+ */ -+static char *rel_abs_file_name(const char *file_name, const char *relative_to) -+{ -+ int dir = -1; -+ static int warned = 0; -+ char *abspath; -+ /* -+ * If we can't get this name relatively, we might as well *try* to do it -+ * absolutely: but print a warning. -+ */ -+ dir = open(".", O_RDONLY | O_DIRECTORY); -+ if (dir < 0) { -+ if (!warned) { -+ perror("Cannot open current directory"); -+ warned = 1; -+ } -+ } else { -+ if (chdir(relative_to) < 0) -+ if (!warned) { -+ pr_err("Cannot change directory to " -+ "%s: %s\n", relative_to, -+ strerror(errno)); -+ warned = 1; -+ } -+ } -+ -+ abspath = realpath(file_name, NULL); -+ if (abspath == NULL) -+ abspath = xstrdup(file_name); -+ -+ if ((dir > -1) && (fchdir(dir) < 0)) { -+ perror("Cannot return to original directory after relative realpath()"); -+ exit(1); -+ } -+ -+ close(dir); -+ -+ return abspath; -+} -+ -+/* -+ * Given a type encoding table, and a size, return the CTF encoding for that -+ * type, or 0 if none. -+ */ -+static int find_ctf_encoding(struct type_encoding_tab *type_tab, size_t size) -+{ -+ size_t i; -+ -+ for (i = 0; type_tab[i].size != 0; i++) { -+ if (type_tab[i].size == size) -+ return type_tab[i].ctf_encoding; -+ } -+ return 0; -+} -+ -+/* -+ * Count the number of members of a DWARF aggregate. -+ */ -+static long count_dwarf_members(Dwarf_Die *d) -+{ -+ const char *err; -+ Dwarf_Die die; -+ -+ switch (dwarf_child(d, &die)) { -+ case -1: -+ err = "fetch first child of aggregate"; -+ goto fail; -+ case 1: /* No DIEs at all in this aggregate */ -+ return 0; -+ default: /* Child DIEs exist. */ -+ break; -+ } -+ -+ /* -+ * We are only interested in children of type DW_TAG_member. -+ */ -+ int sib_ret; -+ long count = 0; -+ -+ do -+ if (dwarf_tag(&die) == DW_TAG_member) -+ count++; -+ while ((sib_ret = dwarf_siblingof(&die, &die)) == 0); -+ -+ if (sib_ret == -1) { -+ err = "count members"; -+ goto fail; -+ } -+ -+ return count; -+ -+ fail: -+ pr_err("Cannot %s: %s\n", err, dwarf_errmsg(dwarf_errno())); -+ exit(1); -+} -+ -+/* -+ * Free a per_module's contents. -+ */ -+static void private_per_module_free(void *per_module) -+{ -+ struct per_module *per_mod = per_module; -+ -+ ctf_close(per_mod->ctf_file); -+ g_hash_table_destroy(per_mod->member_counts); -+ free(per_module); -+} -+ -+/* -+ * Free a fn_to_die_to_parent subhash. -+ */ -+static void private_fn_die_parent_free(void *ptr) -+{ -+ g_hash_table_destroy((GHashTable *) ptr); -+} -+ -+/* -+ * Get a ctf_file out of the per_module hash for a given module. -+ */ -+static ctf_file_t *lookup_ctf_file(const char *module_name) -+{ -+ struct per_module *per_mod; -+ -+ per_mod = g_hash_table_lookup(per_module, module_name); -+ if (per_mod == NULL) -+ return NULL; -+ return per_mod->ctf_file; -+} -diff --git a/scripts/dwarf2ctf/eu_simple.c b/scripts/dwarf2ctf/eu_simple.c -new file mode 100644 -index 000000000000..49886e5e5411 ---- /dev/null -+++ b/scripts/dwarf2ctf/eu_simple.c -@@ -0,0 +1,2 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#include "../eu_simple.c" -diff --git a/scripts/dwarf2ctf/member.blacklist b/scripts/dwarf2ctf/member.blacklist -new file mode 100644 -index 000000000000..85122def7b5f ---- /dev/null -+++ b/scripts/dwarf2ctf/member.blacklist -@@ -0,0 +1 @@ -+include/linux/netfilter/ipset/ip_set_ahash.h:ip_set_hash.next -diff --git a/scripts/eu_simple.c b/scripts/eu_simple.c -new file mode 100644 -index 000000000000..e2736f29d001 ---- /dev/null -+++ b/scripts/eu_simple.c -@@ -0,0 +1,356 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Convenience wrappers for functions in elfutils. -+ * -+ * (C) 2014, 2017 Oracle, Inc. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ */ -+ -+#include <errno.h> -+#include <stdio.h> -+#include <stdlib.h> -+#include <string.h> -+ -+#include <elfutils/libdwfl.h> -+#include <elfutils/version.h> -+ -+#include <eu_simple.h> -+ -+#define __unused__ __attribute__((__unused__)) -+ -+/* -+ * A version of dwfl_report_elf() that compensates for parameter changes in -+ * newer elfutils. -+ */ -+static Dwfl_Module *private_dwfl_report_elf(Dwfl *dwfl, const char *name, -+ const char *file_name, int fd, -+ GElf_Addr base) -+{ -+#if _ELFUTILS_PREREQ(0,156) -+ return dwfl_report_elf(dwfl, name, file_name, fd, base, 0); -+#else -+ return dwfl_report_elf(dwfl, name, file_name, fd, base); -+#endif -+} -+ -+/* -+ * Stub libdwfl callback, use only the ELF handle passed in. -+ */ -+static int no_debuginfo(Dwfl_Module *mod __unused__, -+ void **userdata __unused__, -+ const char *modname __unused__, -+ Dwarf_Addr base __unused__, -+ const char *file_name __unused__, -+ const char *debuglink_file __unused__, -+ GElf_Word debuglink_crc __unused__, -+ char **debuginfo_file_name __unused__) -+{ -+ return -1; -+} -+ -+/* -+ * Wrap up dwfl_new() complexities. -+ */ -+Dwfl *simple_dwfl_new(const char *file_name, Dwfl_Module **module) -+{ -+ const char *err; -+ -+ static Dwfl_Callbacks cb = { -+ .find_debuginfo = no_debuginfo, -+ .section_address = dwfl_offline_section_address -+ }; -+ Dwfl *dwfl = dwfl_begin(&cb); -+ Dwfl_Module *mod; -+ -+ if (dwfl == NULL) { -+ err = "initialize libdwfl"; -+ goto fail; -+ } -+ -+ mod = private_dwfl_report_elf(dwfl, "", file_name, -1, 0); -+ if (mod == NULL) { -+ err = "open object file with libdwfl"; -+ goto fail; -+ } -+ if (module) -+ *module = mod; -+ -+ if (dwfl_report_end(dwfl, NULL, NULL) != 0) { -+ err = "finish opening object file with libdwfl"; -+ goto fail; -+ } -+ -+ return dwfl; -+ fail: -+ fprintf(stderr, "Cannot %s for %s: %s\n", err, file_name, -+ dwfl_errmsg(dwfl_errno())); -+ exit(1); -+} -+ -+/* -+ * A variant of simple_dwfl_new() that iterates over multiple object files. -+ * (Used for thin archives.) -+ * -+ * Takes ownership of the paths, until free. -+ */ -+struct simple_dwfl_multi * -+simple_dwfl_new_multi(char **paths) -+{ -+ struct simple_dwfl_multi *multi; -+ -+ multi = malloc(sizeof(struct simple_dwfl_multi)); -+ if (multi == NULL) -+ return NULL; -+ -+ multi->paths = paths; -+ multi->i = -1; -+ multi->dwfl = NULL; -+ multi->last_die = NULL; -+ -+ return multi; -+} -+ -+/* -+ * A variant of dwfl_nextcu() that crosses file boundaries as needed, -+ * using the state in the simple_dwfl_multi. -+ */ -+Dwarf_Die * -+simple_dwfl_nextcu(struct simple_dwfl_multi *multi) -+{ -+ Dwarf_Addr junk; -+ -+ /* -+ * Switch object files as needed (and always, the first time). -+ */ -+ -+ if (multi->i >= 0) -+ multi->last_die = dwfl_nextcu(multi->dwfl, multi->last_die, -+ &junk); -+ -+ while (multi->last_die == NULL) { -+ simple_dwfl_free(multi->dwfl); -+ if (multi->paths[++multi->i] == NULL) { -+ multi->i = -1; -+ multi->dwfl = NULL; -+ multi->last_die = NULL; -+ return NULL; -+ } -+ -+ multi->dwfl = simple_dwfl_new(multi->paths[multi->i], NULL); -+ multi->last_die = dwfl_nextcu(multi->dwfl, multi->last_die, -+ &junk); -+ } -+ return multi->last_die; -+} -+ -+/* -+ * Free a simple_dwfl_new_multi: return its contained paths so the caller can -+ * free them again. (They are not changed, so the caller can just hang on to -+ * them if preferred.) -+ */ -+char ** -+simple_dwfl_free_multi(struct simple_dwfl_multi *multi) -+{ -+ char **paths = multi->paths; -+ simple_dwfl_free(multi->dwfl); -+ free(multi); -+ return paths; -+} -+ -+/* -+ * The converse of simple_dwfl_new(). -+ */ -+void simple_dwfl_free(Dwfl *dwfl) -+{ -+ if (dwfl != NULL) { -+ dwfl_report_end(dwfl, NULL, NULL); -+ dwfl_end(dwfl); -+ } -+} -+ -+ -+/* -+ * Read a modules_thick.builtin file and translate it into a stream of -+ * arguments suitable for simple_dwfl_new_multi(). -+ */ -+ -+/* -+ * Construct a modules_thick.builtin iterator. -+ */ -+struct modules_thick_iter * -+modules_thick_iter_new(const char *modules_thick_file) -+{ -+ struct modules_thick_iter *i; -+ -+ i = calloc(1, sizeof(struct modules_thick_iter)); -+ if (i == NULL) -+ return NULL; -+ -+ i->f = fopen(modules_thick_file, "r"); -+ -+ if (i->f == NULL) { -+ fprintf(stderr, "Cannot open builtin module file %s: %s\n", -+ modules_thick_file, strerror(errno)); -+ return NULL; -+ } -+ -+ return i; -+} -+ -+/* -+ * Iterate, returning a new null-terminated array of object file names, and a -+ * new dynamically-allocated module name. (The module name passed in is freed.) -+ * -+ * The array of object file names should be freed by the caller: the strings it -+ * points to are owned by the iterator, and should not be freed. -+ */ -+ -+char ** __attribute__((__nonnull__)) -+modules_thick_iter_next(struct modules_thick_iter *i, char **module_name) -+{ -+ size_t npaths = 1; -+ char **module_paths; -+ char *last_slash; -+ char *last_dot; -+ char *trailing_linefeed; -+ char *object_name = i->line; -+ char *dash; -+ int composite = 0; -+ -+ /* -+ * Read in all module entries, computing the suffixless, pathless name -+ * of the module and building the next arrayful of object file names for -+ * return. -+ * -+ * Modules can consist of multiple files: in this case, the portion -+ * before the colon is the path to the module (as before): the portion -+ * after the colon is a space-separated list of files that should be * -+ * considered part of this module. In this case, the portion before the -+ * name is an "object file" that does not actually exist: it is merged -+ * into built-in.a without ever being written out. -+ * -+ * All module names have - translated to _, to match what is done to the -+ * names of the same things when built as modules. -+ */ -+ -+ /* -+ * Reinvocation of exhausted iterator. Return NULL, once. -+ */ -+retry: -+ if (getline(&i->line, &i->line_size, i->f) < 0) { -+ if (ferror(i->f)) { -+ fprintf(stderr, "Error reading from modules_thick file:" -+ " %s\n", strerror(errno)); -+ exit(1); -+ } -+ rewind(i->f); -+ return NULL; -+ } -+ -+ if (i->line[0] == '\0') -+ goto retry; -+ -+ /* -+ * Slice the line in two at the colon, if any. If there is anything -+ * past the ': ', this is a composite module. (We allow for no colon -+ * for robustness, even though one should always be present.) -+ */ -+ if (strchr(i->line, ':') != NULL) { -+ char *name_start; -+ -+ object_name = strchr(i->line, ':'); -+ *object_name = '\0'; -+ object_name++; -+ name_start = object_name + strspn(object_name, " \n"); -+ if (*name_start != '\0') { -+ composite = 1; -+ object_name = name_start; -+ } -+ } -+ -+ /* -+ * Figure out the module name. -+ */ -+ last_slash = strrchr(i->line, '/'); -+ last_slash = (!last_slash) ? i->line : -+ last_slash + 1; -+ free(*module_name); -+ *module_name = strdup(last_slash); -+ dash = *module_name; -+ -+ while (dash != NULL) { -+ dash = strchr(dash, '-'); -+ if (dash != NULL) -+ *dash = '_'; -+ } -+ -+ last_dot = strrchr(*module_name, '.'); -+ if (last_dot != NULL) -+ *last_dot = '\0'; -+ -+ trailing_linefeed = strchr(object_name, '\n'); -+ if (trailing_linefeed != NULL) -+ *trailing_linefeed = '\0'; -+ -+ /* -+ * Multifile separator? Object file names explicitly stated: -+ * slice them up and shuffle them in. -+ * -+ * The array size may be an overestimate if any object file -+ * names start or end with spaces (very unlikely) but cannot be -+ * an underestimate. (Check for it anyway.) -+ */ -+ if (composite) { -+ char *one_object; -+ -+ for (npaths = 0, one_object = object_name; -+ one_object != NULL; -+ npaths++, one_object = strchr(one_object + 1, ' ')); -+ } -+ -+ module_paths = malloc((npaths + 1) * sizeof(char *)); -+ if (!module_paths) { -+ fprintf(stderr, "%s: out of memory on module %s\n", __func__, -+ *module_name); -+ exit(1); -+ } -+ -+ if (composite) { -+ char *one_object; -+ size_t i = 0; -+ -+ while ((one_object = strsep(&object_name, " ")) != NULL) { -+ if (i >= npaths) { -+ fprintf(stderr, "%s: num_objs overflow on module " -+ "%s: this is a bug.\n", __func__, -+ *module_name); -+ exit(1); -+ } -+ -+ module_paths[i++] = one_object; -+ } -+ } else -+ module_paths[0] = i->line; /* untransformed module name */ -+ -+ module_paths[npaths] = NULL; -+ -+ return module_paths; -+} -+ -+/* -+ * Free an iterator. Can be called while iteration is underway, so even -+ * state that is freed at the end of iteration must be freed here too. -+ */ -+void -+modules_thick_iter_free(struct modules_thick_iter *i) -+{ -+ if (i == NULL) -+ return; -+ fclose(i->f); -+ free(i->line); -+ free(i); -+} -diff --git a/scripts/eu_simple.h b/scripts/eu_simple.h -new file mode 100644 -index 000000000000..8ef9f9655077 ---- /dev/null -+++ b/scripts/eu_simple.h -@@ -0,0 +1,91 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Simplifying wrappers for functions in elfutils, and functions to -+ * feed them data. -+ * -+ * (C) 2014, 2017 Oracle, Inc. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ */ -+ -+#ifndef _LINUX_EU_SIMPLE_H -+#define _LINUX_EU_SIMPLE_H -+ -+#include <stdio.h> -+#include <stddef.h> -+#include <elfutils/libdwfl.h> -+ -+/* -+ * Iteration state for simple_dwfl_new_multi(). -+ */ -+struct simple_dwfl_multi { -+ char **paths; -+ ssize_t i; -+ Dwfl *dwfl; -+ Dwarf_Die *last_die; -+}; -+ -+/* -+ * Wrap up dwfl_new() complexities. -+ */ -+Dwfl *simple_dwfl_new(const char *file_name, Dwfl_Module **module); -+ -+/* -+ * A variant of simple_dwfl_new() that iterates over multiple object files. -+ * (Used for thin archives.) -+ * -+ * Takes ownership of the paths, until free. -+ */ -+struct simple_dwfl_multi *simple_dwfl_new_multi(char **paths); -+ -+/* -+ * A variant of dwfl_nextcu() that crosses file boundaries as needed, -+ * using the state in the simple_dwfl_multi. -+ */ -+Dwarf_Die *simple_dwfl_nextcu(struct simple_dwfl_multi *multi); -+ -+/* -+ * Free a simple_dwfl_new_multi: return its contained paths so the caller -+ * free them again. (They are not changed, so the caller can just hang on to -+ * them if preferred.) -+ */ -+char **simple_dwfl_free_multi(struct simple_dwfl_multi *multi); -+ -+/* -+ * The converse of simple_dwfl_new(). -+ */ -+void simple_dwfl_free(Dwfl *dwfl); -+ -+/* -+ * modules_thick.builtin iteration state. -+ */ -+struct modules_thick_iter { -+ FILE *f; -+ char *line; -+ size_t line_size; -+}; -+ -+/* -+ * Construct a modules_thick.builtin iterator. -+ */ -+struct modules_thick_iter * -+modules_thick_iter_new(const char *modules_thick_file); -+ -+/* -+ * Iterate, returning a new null-terminated array of object file names, and a -+ * new dynamically-allocated module name. (The module name passed in is freed.) -+ * -+ * The array of object file names should be freed by the caller: the strings it -+ * points to are owned by the iterator, and should not be freed. -+ */ -+ -+char ** __attribute__((__nonnull__)) -+modules_thick_iter_next(struct modules_thick_iter *i, char **module_name); -+ -+void -+modules_thick_iter_free(struct modules_thick_iter *i); -+ -+#endif -diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c -index a39d93e3c6ae..12445a834698 100644 ---- a/scripts/kconfig/confdata.c -+++ b/scripts/kconfig/confdata.c -@@ -710,6 +710,25 @@ static struct conf_printer header_printer_cb = - .print_comment = header_print_comment, - }; - -+/* -+ * Tristate printer -+ * -+ * This printer is used when generating the `include/config/tristate.conf' file. -+ */ -+static void -+tristate_print_symbol(FILE *fp, struct symbol *sym, const char *value, void *arg) -+{ -+ -+ if (sym->type == S_TRISTATE && *value != 'n') -+ fprintf(fp, "%s%s=%c\n", CONFIG_, sym->name, (char)toupper(*value)); -+} -+ -+static struct conf_printer tristate_printer_cb = -+{ -+ .print_symbol = tristate_print_symbol, -+ .print_comment = kconfig_print_comment, -+}; -+ - static void conf_write_symbol(FILE *fp, struct symbol *sym, - struct conf_printer *printer, void *printer_arg) - { -@@ -1043,7 +1062,7 @@ int conf_write_autoconf(int overwrite) - struct symbol *sym; - const char *name; - const char *autoconf_name = conf_get_autoconfig_name(); -- FILE *out, *out_h; -+ FILE *out, *tristate, *out_h; - int i; - - if (!overwrite && is_present(autoconf_name)) -@@ -1058,6 +1077,13 @@ int conf_write_autoconf(int overwrite) - if (!out) - return 1; - -+ tristate = fopen(".tmpconfig_tristate", "w"); -+ if (!tristate) { -+ fclose(out); -+ fclose(tristate); -+ return 1; -+ } -+ - out_h = fopen(".tmpconfig.h", "w"); - if (!out_h) { - fclose(out); -@@ -1065,6 +1091,7 @@ int conf_write_autoconf(int overwrite) - } - - conf_write_heading(out, &kconfig_printer_cb, NULL); -+ conf_write_heading(tristate, &tristate_printer_cb, NULL); - conf_write_heading(out_h, &header_printer_cb, NULL); - - for_all_symbols(i, sym) { -@@ -1072,11 +1099,13 @@ int conf_write_autoconf(int overwrite) - if (!(sym->flags & SYMBOL_WRITE) || !sym->name) - continue; - -- /* write symbols to auto.conf and autoconf.h */ -+ /* write symbols to auto.conf, tristate and header files */ - conf_write_symbol(out, sym, &kconfig_printer_cb, (void *)1); -+ conf_write_symbol(tristate, sym, &tristate_printer_cb, (void *)1); - conf_write_symbol(out_h, sym, &header_printer_cb, NULL); - } - fclose(out); -+ fclose(tristate); - fclose(out_h); - - name = getenv("KCONFIG_AUTOHEADER"); -@@ -1087,6 +1116,14 @@ int conf_write_autoconf(int overwrite) - if (rename(".tmpconfig.h", name)) - return 1; - -+ name = getenv("KCONFIG_TRISTATE"); -+ if (!name) -+ name = "include/config/tristate.conf"; -+ if (make_parent_dir(name)) -+ return 1; -+ if (rename(".tmpconfig_tristate", name)) -+ return 1; -+ - if (make_parent_dir(autoconf_name)) - return 1; - /* -diff --git a/scripts/move-if-change b/scripts/move-if-change -new file mode 100755 -index 000000000000..eb745af5d972 ---- /dev/null -+++ b/scripts/move-if-change -@@ -0,0 +1,8 @@ -+#!/bin/sh -+# SPDX-License-Identifier: GPL-2.0+ -+ -+if test -r "$2" && cmp -s "$1" "$2"; then -+ rm -f "$1" -+else -+ mv -f "$1" "$2" -+fi -diff --git a/scripts/package/mkspec b/scripts/package/mkspec -index 8640c278f1aa..63511c885a37 100755 ---- a/scripts/package/mkspec -+++ b/scripts/package/mkspec -@@ -27,6 +27,13 @@ if grep -q CONFIG_DRM=y .config; then - PROVIDES=kernel-drm - fi - -+# set CTF when configured -+if grep -q CONFIG_CTF=y .config; then -+ C= -+else -+ C=DEL -+fi -+ - PROVIDES="$PROVIDES kernel-$KERNELRELEASE" - __KERNELRELEASE=$(echo $KERNELRELEASE | sed -e "s/-/_/g") - EXCLUDES="$RCS_TAR_IGNORE --exclude=*vmlinux* --exclude=*.mod \ -@@ -38,6 +45,7 @@ EXCLUDES="$RCS_TAR_IGNORE --exclude=*vmlinux* --exclude=*.mod \ - # Labels: - # $S: this line is enabled only when building source package - # $M: this line is enabled only when CONFIG_MODULES is enabled -+# $C: this line is enabled only when CONFIG_CTF is enabled - sed -e '/^DEL/d' -e 's/^\t*//' <<EOF - Name: kernel - Summary: The Linux Kernel -@@ -48,6 +56,8 @@ sed -e '/^DEL/d' -e 's/^\t*//' <<EOF - Vendor: The Linux Community - URL: http://www.kernel.org - $S Source: kernel-$__KERNELRELEASE.tar.gz -+$C BuildRequires: libdtrace-ctf >= 0.5.0 -+$C BuildRequires: libdtrace-ctf-devel >= 0.5.0 - Provides: $PROVIDES - %define __spec_install_post /usr/lib/rpm/brp-compress || : - %define debug_package %{nil} -@@ -74,12 +84,14 @@ $S$M AutoReqProv: no - $S$M %description -n kernel-devel - $S$M This package provides kernel headers and makefiles sufficient to build modules - $S$M against the $__KERNELRELEASE kernel package. -+$C Requires: libdtrace-ctf >= 0.5.0 - $S$M - $S %prep - $S %setup -q - $S - $S %build - $S $MAKE %{?_smp_mflags} KBUILD_BUILD_VERSION=%{release} -+$S$C $MAKE %{?_smp_mflags} ctf - $S - %install - mkdir -p %{buildroot}/boot --- -2.27.0 - diff --git a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0002-kallsyms-introduce-new-proc-kallmodsyms-including-bu.patch b/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0002-kallsyms-introduce-new-proc-kallmodsyms-including-bu.patch deleted file mode 100644 index 2e756dc98854..000000000000 --- a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0002-kallsyms-introduce-new-proc-kallmodsyms-including-bu.patch +++ /dev/null @@ -1,1069 +0,0 @@ -From 18419a7bc28bc10eb84a39cd53e9caab92e8afb6 Mon Sep 17 00:00:00 2001 -From: Nick Alcock <nick.alcock@oracle.com> -Date: Wed, 14 Nov 2018 20:09:28 +0000 -Subject: [PATCH 02/20] kallsyms: introduce new /proc/kallmodsyms including - builtin modules too - -/proc/kallsyms is very useful for tracers and other tools that need to -map kernel symbols to addresses (sinful though it is to export such -addresses to userspace). However, for some uses it does not suffice. -We would like to be able to establish a mapping between module name and -kernel symbol that only changes when the kernel source code is changed: -if the kernel is recompiled so that some module becomes built in, it is -a desirable property for portability of tracing scripts that can include -module names if the name of this module does not change. - -i.e., as with the previous dwarf2ctf commit, we would like to report -e.g. ext4 symbols as residing in [ext4] even if ext4 happens to be built -into the kernel: it is enough that it *could* be built as a module. - -We use machinery shared with dwarf2ctf (in eu_simple.c) in conjunction -with a link map to compute the mapping from the address ranges -associated with built-in object files in vmlinux.o to module names, then -drop a list of these modules and pointers into that list into new -kallsyms sections (kallsyms_modules and kallsyms_symbol_modules). - -We also need symbol sizes to determine whether a given probe hit is -within a symbol or outside it (possibly miles outside it in a gap -between symbols). Adding that is much simpler, with only one new -section, kallsyms_sizes. - -The resulting file looks like this: - -ffffffff8b013d20 409 t pt_buffer_setup_aux -ffffffff8b014130 11f T intel_pt_interrupt -ffffffff8b014250 2d T cpu_emergency_stop_pt -ffffffff8b014280 13a t rapl_pmu_event_init [intel_rapl_perf] -ffffffff8b0143c0 bb t rapl_event_update [intel_rapl_perf] -ffffffff8b014480 10 t rapl_pmu_event_read [intel_rapl_perf] -ffffffff8b014490 a3 t rapl_cpu_offline [intel_rapl_perf] -ffffffff8b014540 24 t __rapl_event_show [intel_rapl_perf] -ffffffff8b014570 f2 t rapl_pmu_event_stop [intel_rapl_perf] - -This is emitted even if intel_rapl_perf is built into the kernel. - -As with /proc/kallsyms, non-root usage produces addresses that are -all-zero. (I am amenable to producing all-zero sizes, too, but without -the addresses this seems like pure paranoia.) - -Programs that consume /proc/kallmodsyms should note that unlike -/proc/kallsyms, kernel symbols for built-in modules may appear -interspersed with other symbols that are part of different modules or -part of no module at all. - -Signed-off-by: Nick Alcock <nick.alcock@oracle.com> -Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com> -Signed-off-by: Tomas Jedlicka <tomas.jedlicka@oracle.com> -Signed-off-by: Eugene Loh <eugene.loh@oracle.com> -Signed-off-by: David Mc Lean <david.mclean@oracle.com> -Signed-off-by: Vincent Lim <vincent.lim@oracle.com> ---- - Makefile | 2 +- - include/linux/kallsyms.h | 20 +++ - include/linux/module.h | 7 +- - init/Kconfig | 10 ++ - kernel/kallsyms.c | 162 +++++++++++++-------- - kernel/module.c | 4 +- - scripts/Makefile | 10 ++ - scripts/kallsyms.c | 301 +++++++++++++++++++++++++++++++++++++-- - scripts/link-vmlinux.sh | 23 ++- - scripts/namespace.pl | 6 + - 10 files changed, 472 insertions(+), 73 deletions(-) - -diff --git a/Makefile b/Makefile -index 814d9903bd3e..2aad854bb87d 100644 ---- a/Makefile -+++ b/Makefile -@@ -1294,7 +1294,7 @@ modules: $(if $(KBUILD_BUILTIN),vmlinux) modules.order - modules.order: descend - $(Q)$(AWK) '!x[$$0]++' $(addsuffix /$@, $(build-dirs)) > $@ - --ifneq (CONFIG_CTF@,'@') -+ifneq (CONFIG_CTF@CONFIG_KALLMODSYMS,'@') - - # We need to force everything to be built, since we need the .o files below. - KBUILD_BUILTIN := 1 -diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h -index 657a83b943f0..f9587589d0d1 100644 ---- a/include/linux/kallsyms.h -+++ b/include/linux/kallsyms.h -@@ -8,6 +8,7 @@ - - #include <linux/errno.h> - #include <linux/kernel.h> -+#include <linux/module.h> - #include <linux/stddef.h> - #include <linux/mm.h> - #include <linux/module.h> -@@ -71,6 +72,23 @@ static inline void *dereference_symbol_descriptor(void *ptr) - } - - #ifdef CONFIG_KALLSYMS -+/* To avoid using get_symbol_offset for every symbol, we carry prefix along. */ -+struct kallsym_iter { -+ loff_t pos; -+ loff_t pos_arch_end; -+ loff_t pos_mod_end; -+ loff_t pos_ftrace_mod_end; -+ unsigned long value; -+ unsigned int nameoff; /* If iterating in core kernel symbols. */ -+ unsigned long size; -+ char type; -+ char name[KSYM_NAME_LEN]; -+ char module_name[MODULE_NAME_LEN]; -+ int builtin_module; -+ int exported; -+ int show_value; -+}; -+ - /* Lookup the address for a symbol. Returns 0 if not found. */ - unsigned long kallsyms_lookup_name(const char *name); - -@@ -100,6 +118,8 @@ int lookup_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long * - /* How and when do we show kallsyms values? */ - extern int kallsyms_show_value(void); - -+extern void kallsyms_iter_reset(struct kallsym_iter *, loff_t); -+extern int kallsyms_iter_update(struct kallsym_iter *, loff_t); - #else /* !CONFIG_KALLSYMS */ - - static inline unsigned long kallsyms_lookup_name(const char *name) -diff --git a/include/linux/module.h b/include/linux/module.h -index 1ad393e62bef..0d9777ecee92 100644 ---- a/include/linux/module.h -+++ b/include/linux/module.h -@@ -605,7 +605,8 @@ bool each_symbol_section(bool (*fn)(const struct symsearch *arr, - /* Returns 0 and fills in value, defined and namebuf, or -ERANGE if - symnum out of range. */ - int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, -- char *name, char *module_name, int *exported); -+ char *name, char *module_name, unsigned long *size, -+ int *exported); - - /* Look for this name: can be of form module:name. */ - unsigned long module_kallsyms_lookup_name(const char *name); -@@ -787,8 +788,8 @@ static inline int lookup_module_symbol_attrs(unsigned long addr, unsigned long * - } - - static inline int module_get_kallsym(unsigned int symnum, unsigned long *value, -- char *type, char *name, -- char *module_name, int *exported) -+ char *type, char *name, char *module_name, -+ unsigned long *size, int *exported) - { - return -ERANGE; - } -diff --git a/init/Kconfig b/init/Kconfig -index ef59c5c36cdb..878907b7a72d 100644 ---- a/init/Kconfig -+++ b/init/Kconfig -@@ -1388,6 +1388,16 @@ config POSIX_TIMERS - - If unsure say y. - -+config KALLMODSYMS -+ default y -+ bool "Enable support for /proc/kallmodsyms" if EXPERT -+ depends on KALLSYMS -+ help -+ This option enables the /proc/kallmodsyms file, which maps symbols -+ to addresses and their associated modules. This support requires -+ a fairly recent elfutils: 0.152 -- 0.172 have been tested. -+ elfutils before 0.142 will definitely not work. -+ - config PRINTK - default y - bool "Enable support for printk" if EXPERT -diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c -index a9b3f660dee7..7096d1921459 100644 ---- a/kernel/kallsyms.c -+++ b/kernel/kallsyms.c -@@ -32,6 +32,7 @@ - */ - extern const unsigned long kallsyms_addresses[] __weak; - extern const int kallsyms_offsets[] __weak; -+extern const unsigned long kallsyms_sizes[] __weak; - extern const u8 kallsyms_names[] __weak; - - /* -@@ -46,6 +47,8 @@ __attribute__((weak, section(".rodata"))); - - extern const char kallsyms_token_table[] __weak; - extern const u16 kallsyms_token_index[] __weak; -+extern const char kallsyms_modules[] __weak; -+extern const u32 kallsyms_symbol_modules[] __weak; - - extern const unsigned int kallsyms_markers[] __weak; - -@@ -196,12 +199,24 @@ int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, - } - EXPORT_SYMBOL_GPL(kallsyms_on_each_symbol); - -+/* -+ * The caller passes in an address, and we return an index to the symbol -- -+ * potentially also size and offset information. -+ * But an address might map to multiple symbols because: -+ * - some symbols might have zero size -+ * - some symbols might be aliases of one another -+ * - some symbols might span (encompass) others -+ * The symbols should already be ordered so that, for a particular address, -+ * we first have the zero-size ones, then the biggest, then the smallest. -+ * So we find the index by: -+ * - finding the last symbol with the target address -+ * - backing the index up so long as both the address and size are unchanged -+ */ - static unsigned long get_symbol_pos(unsigned long addr, - unsigned long *symbolsize, - unsigned long *offset) - { -- unsigned long symbol_start = 0, symbol_end = 0; -- unsigned long i, low, high, mid; -+ unsigned long low, high, mid; - - /* This kernel should never had been booted. */ - if (!IS_ENABLED(CONFIG_KALLSYMS_BASE_RELATIVE)) -@@ -222,36 +237,17 @@ static unsigned long get_symbol_pos(unsigned long addr, - } - - /* -- * Search for the first aliased symbol. Aliased -- * symbols are symbols with the same address. -+ * Search for the first aliased symbol. - */ -- while (low && kallsyms_sym_address(low-1) == kallsyms_sym_address(low)) -+ while (low -+ && kallsyms_sym_address(low-1) == kallsyms_sym_address(low) -+ && kallsyms_sizes[low-1] == kallsyms_sizes[low]) - --low; - -- symbol_start = kallsyms_sym_address(low); -- -- /* Search for next non-aliased symbol. */ -- for (i = low + 1; i < kallsyms_num_syms; i++) { -- if (kallsyms_sym_address(i) > symbol_start) { -- symbol_end = kallsyms_sym_address(i); -- break; -- } -- } -- -- /* If we found no next symbol, we use the end of the section. */ -- if (!symbol_end) { -- if (is_kernel_inittext(addr)) -- symbol_end = (unsigned long)_einittext; -- else if (IS_ENABLED(CONFIG_KALLSYMS_ALL)) -- symbol_end = (unsigned long)_end; -- else -- symbol_end = (unsigned long)_etext; -- } -- - if (symbolsize) -- *symbolsize = symbol_end - symbol_start; -+ *symbolsize = kallsyms_sizes[low]; - if (offset) -- *offset = addr - symbol_start; -+ *offset = addr - kallsyms_sym_address(low); - - return low; - } -@@ -271,6 +267,7 @@ int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize, - return !!module_address_lookup(addr, symbolsize, offset, NULL, namebuf) || - !!__bpf_address_lookup(addr, symbolsize, offset, namebuf); - } -+EXPORT_SYMBOL_GPL(kallsyms_lookup_size_offset); - - /* - * Lookup an address -@@ -433,21 +430,6 @@ int sprint_backtrace(char *buffer, unsigned long address) - return __sprint_symbol(buffer, address, -1, 1); - } - --/* To avoid using get_symbol_offset for every symbol, we carry prefix along. */ --struct kallsym_iter { -- loff_t pos; -- loff_t pos_arch_end; -- loff_t pos_mod_end; -- loff_t pos_ftrace_mod_end; -- unsigned long value; -- unsigned int nameoff; /* If iterating in core kernel symbols. */ -- char type; -- char name[KSYM_NAME_LEN]; -- char module_name[MODULE_NAME_LEN]; -- int exported; -- int show_value; --}; -- - int __weak arch_get_kallsym(unsigned int symnum, unsigned long *value, - char *type, char *name) - { -@@ -473,7 +455,9 @@ static int get_ksymbol_mod(struct kallsym_iter *iter) - int ret = module_get_kallsym(iter->pos - iter->pos_arch_end, - &iter->value, &iter->type, - iter->name, iter->module_name, -- &iter->exported); -+ &iter->size, &iter->exported); -+ iter->builtin_module = 0; -+ - if (ret < 0) { - iter->pos_mod_end = iter->pos; - return 0; -@@ -509,10 +493,22 @@ static int get_ksymbol_bpf(struct kallsym_iter *iter) - static unsigned long get_ksymbol_core(struct kallsym_iter *iter) - { - unsigned off = iter->nameoff; -+ u32 mod_index = 0; -+ -+ if (kallsyms_symbol_modules) -+ mod_index = kallsyms_symbol_modules[iter->pos]; - -- iter->module_name[0] = '\0'; -+ if (mod_index == 0 || kallsyms_modules == NULL) { -+ iter->module_name[0] = '\0'; -+ iter->builtin_module = 0; -+ } else { -+ strcpy(iter->module_name, &kallsyms_modules[mod_index]); -+ iter->builtin_module = 1; -+ } -+ iter->exported = 0; - iter->value = kallsyms_sym_address(iter->pos); - -+ iter->size = kallsyms_sizes[iter->pos]; - iter->type = kallsyms_get_symbol_type(off); - - off = kallsyms_expand_symbol(off, iter->name, ARRAY_SIZE(iter->name)); -@@ -520,7 +516,7 @@ static unsigned long get_ksymbol_core(struct kallsym_iter *iter) - return off - iter->nameoff; - } - --static void reset_iter(struct kallsym_iter *iter, loff_t new_pos) -+void kallsyms_iter_reset(struct kallsym_iter *iter, loff_t new_pos) - { - iter->name[0] = '\0'; - iter->nameoff = get_symbol_offset(new_pos); -@@ -531,6 +527,7 @@ static void reset_iter(struct kallsym_iter *iter, loff_t new_pos) - iter->pos_ftrace_mod_end = 0; - } - } -+EXPORT_SYMBOL_GPL(kallsyms_iter_reset); - - /* - * The end position (last + 1) of each additional kallsyms section is recorded -@@ -557,7 +554,7 @@ static int update_iter_mod(struct kallsym_iter *iter, loff_t pos) - } - - /* Returns false if pos at or past end of file. */ --static int update_iter(struct kallsym_iter *iter, loff_t pos) -+int kallsyms_iter_update(struct kallsym_iter *iter, loff_t pos) - { - /* Module symbols can be accessed randomly. */ - if (pos >= kallsyms_num_syms) -@@ -565,26 +562,27 @@ static int update_iter(struct kallsym_iter *iter, loff_t pos) - - /* If we're not on the desired position, reset to new position. */ - if (pos != iter->pos) -- reset_iter(iter, pos); -+ kallsyms_iter_reset(iter, pos); - - iter->nameoff += get_ksymbol_core(iter); - iter->pos++; - - return 1; - } -+EXPORT_SYMBOL_GPL(kallsyms_iter_update); - - static void *s_next(struct seq_file *m, void *p, loff_t *pos) - { - (*pos)++; - -- if (!update_iter(m->private, *pos)) -+ if (!kallsyms_iter_update(m->private, *pos)) - return NULL; - return p; - } - - static void *s_start(struct seq_file *m, loff_t *pos) - { -- if (!update_iter(m->private, *pos)) -+ if (!kallsyms_iter_update(m->private, *pos)) - return NULL; - return m->private; - } -@@ -593,7 +591,7 @@ static void s_stop(struct seq_file *m, void *p) - { - } - --static int s_show(struct seq_file *m, void *p) -+static int s_show_internal(struct seq_file *m, void *p, int builtin_modules) - { - void *value; - struct kallsym_iter *iter = m->private; -@@ -604,7 +602,9 @@ static int s_show(struct seq_file *m, void *p) - - value = iter->show_value ? (void *)iter->value : NULL; - -- if (iter->module_name[0]) { -+ if ((iter->builtin_module == 0 && iter->module_name[0]) || -+ (iter->builtin_module != 0 && iter->module_name[0] && -+ builtin_modules != 0)) { - char type; - - /* -@@ -613,14 +613,32 @@ static int s_show(struct seq_file *m, void *p) - */ - type = iter->exported ? toupper(iter->type) : - tolower(iter->type); -- seq_printf(m, "%px %c %s\t[%s]\n", value, -- type, iter->name, iter->module_name); -- } else -+ if (builtin_modules) -+ seq_printf(m, "%px %lx %c %s\t[%s]\n", value, -+ iter->size, type, iter->name, -+ iter->module_name); -+ else -+ seq_printf(m, "%px %c %s\t[%s]\n", value, -+ type, iter->name, iter->module_name); -+ } else if (builtin_modules) -+ seq_printf(m, "%px %lx %c %s\n", value, iter->size, -+ iter->type, iter->name); -+ else - seq_printf(m, "%px %c %s\n", value, - iter->type, iter->name); - return 0; - } - -+static int s_show(struct seq_file *m, void *p) -+{ -+ return s_show_internal(m, p, 0); -+} -+ -+static int s_mod_show(struct seq_file *m, void *p) -+{ -+ return s_show_internal(m, p, 1); -+} -+ - static const struct seq_operations kallsyms_op = { - .start = s_start, - .next = s_next, -@@ -662,7 +680,15 @@ int kallsyms_show_value(void) - } - } - --static int kallsyms_open(struct inode *inode, struct file *file) -+static const struct seq_operations kallmodsyms_op = { -+ .start = s_start, -+ .next = s_next, -+ .stop = s_stop, -+ .show = s_mod_show -+}; -+ -+static int kallsyms_open_internal(struct inode *inode, struct file *file, -+ const struct seq_operations *ops) - { - /* - * We keep iterator in m->private, since normal case is to -@@ -670,15 +696,25 @@ static int kallsyms_open(struct inode *inode, struct file *file) - * using get_symbol_offset for every symbol. - */ - struct kallsym_iter *iter; -- iter = __seq_open_private(file, &kallsyms_op, sizeof(*iter)); -+ iter = __seq_open_private(file, ops, sizeof(*iter)); - if (!iter) - return -ENOMEM; -- reset_iter(iter, 0); -+ kallsyms_iter_reset(iter, 0); - - iter->show_value = kallsyms_show_value(); - return 0; - } - -+static int kallsyms_open(struct inode *inode, struct file *file) -+{ -+ return kallsyms_open_internal(inode, file, &kallsyms_op); -+} -+ -+static int kallmodsyms_open(struct inode *inode, struct file *file) -+{ -+ return kallsyms_open_internal(inode, file, &kallmodsyms_op); -+} -+ - #ifdef CONFIG_KGDB_KDB - const char *kdb_walk_kallsyms(loff_t *pos) - { -@@ -686,10 +722,10 @@ const char *kdb_walk_kallsyms(loff_t *pos) - if (*pos == 0) { - memset(&kdb_walk_kallsyms_iter, 0, - sizeof(kdb_walk_kallsyms_iter)); -- reset_iter(&kdb_walk_kallsyms_iter, 0); -+ kallsyms_iter_reset(&kdb_walk_kallsyms_iter, 0); - } - while (1) { -- if (!update_iter(&kdb_walk_kallsyms_iter, *pos)) -+ if (!kallsyms_iter_update(&kdb_walk_kallsyms_iter, *pos)) - return NULL; - ++*pos; - /* Some debugging symbols have no name. Ignore them. */ -@@ -706,9 +742,17 @@ static const struct proc_ops kallsyms_proc_ops = { - .proc_release = seq_release_private, - }; - -+static const struct proc_ops kallmodsyms_proc_ops = { -+ .proc_open = kallmodsyms_open, -+ .proc_read = seq_read, -+ .proc_lseek = seq_lseek, -+ .proc_release = seq_release_private, -+}; -+ - static int __init kallsyms_init(void) - { - proc_create("kallsyms", 0444, NULL, &kallsyms_proc_ops); -+ proc_create("kallmodsyms", 0444, NULL, &kallmodsyms_proc_ops); - return 0; - } - device_initcall(kallsyms_init); -diff --git a/kernel/module.c b/kernel/module.c -index 33569a01d6e1..70affc757bbc 100644 ---- a/kernel/module.c -+++ b/kernel/module.c -@@ -4147,7 +4147,8 @@ int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, - } - - int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, -- char *name, char *module_name, int *exported) -+ char *name, char *module_name, unsigned long *size, -+ int *exported) - { - struct module *mod; - -@@ -4166,6 +4167,7 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, - strlcpy(name, kallsyms_symbol_name(kallsyms, symnum), KSYM_NAME_LEN); - strlcpy(module_name, mod->name, MODULE_NAME_LEN); - *exported = is_exported(name, *value, mod); -+ *size = kallsyms->symtab[symnum].st_size; - preempt_enable(); - return 0; - } -diff --git a/scripts/Makefile b/scripts/Makefile -index 8f6353508366..f585a1225c4e 100644 ---- a/scripts/Makefile -+++ b/scripts/Makefile -@@ -16,6 +16,16 @@ always-$(CONFIG_MODULE_SIG_FORMAT) += sign-file - always-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += extract-cert - always-$(CONFIG_SYSTEM_EXTRA_CERTIFICATE) += insert-sys-cert - -+kallsyms-objs := kallsyms.o -+ -+ifeq ($(CONFIG_KALLMODSYMS),y) -+kallsyms-objs += eu_simple.o -+ -+HOSTCFLAGS_eu_simple.o := -I$(srctree)/scripts -+HOSTCFLAGS_kallsyms.o := $(shell pkg-config --cflags glib-2.0) -I$(srctree)/scripts -+HOSTLDLIBS_kallsyms := $(shell pkg-config --libs glib-2.0) -ldw -+endif -+ - HOSTCFLAGS_sorttable.o = -I$(srctree)/tools/include - HOSTCFLAGS_asn1_compiler.o = -I$(srctree)/include - HOSTLDLIBS_sign-file = -lcrypto -diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c -index 6dc3078649fa..d951fbb24c16 100644 ---- a/scripts/kallsyms.c -+++ b/scripts/kallsyms.c -@@ -5,7 +5,10 @@ - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * -- * Usage: nm -n vmlinux | scripts/kallsyms [--all-symbols] > symbols.S -+ * Usage: nm -n -S vmlinux | scripts/kallsyms [--all-symbols] -+ * [--symbol-prefix=<prefix char>] -+ * [--builtin=modules_thick.builtin] -+ * > symbols.S - * - * Table compression uses all the unused char codes on the symbols and - * maps these to the most used substrings (tokens). For instance, it might -@@ -18,12 +21,27 @@ - * - */ - -+#define _GNU_SOURCE 1 - #include <stdbool.h> - #include <stdio.h> - #include <stdlib.h> - #include <string.h> - #include <ctype.h> - #include <limits.h> -+#include <errno.h> -+#include <unistd.h> -+ -+#include "../include/generated/autoconf.h" -+ -+#ifdef CONFIG_KALLMODSYMS -+#include <libelf.h> -+#include <dwarf.h> -+#include <elfutils/libdwfl.h> -+#include <elfutils/libdw.h> -+#include <glib.h> -+ -+#include <eu_simple.h> -+#endif - - #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) - -@@ -31,9 +49,13 @@ - - struct sym_entry { - unsigned long long addr; -+ unsigned long long size; - unsigned int len; - unsigned int start_pos; - unsigned int percpu_absolute; -+#ifdef CONFIG_KALLMODSYMS -+ unsigned int module; -+#endif - unsigned char sym[]; - }; - -@@ -67,11 +89,33 @@ static int token_profit[0x10000]; - static unsigned char best_table[256][2]; - static unsigned char best_table_len[256]; - -+#ifdef CONFIG_KALLMODSYMS -+/* -+ * The builtin module names. The "offset" points to the name as if -+ * all builtin module names were concatenated to a single string. -+ */ -+static unsigned int builtin_module_size; /* number allocated */ -+static unsigned int builtin_module_len; /* number assigned */ -+static char **builtin_modules; /* array of module names */ -+static unsigned int *builtin_module_offsets; /* offset */ -+ -+/* -+ * An ordered list of address ranges and how they map to built-in modules. -+ */ -+struct addrmap_entry { -+ unsigned long long addr; -+ unsigned long long size; -+ unsigned int module; -+}; -+static struct addrmap_entry *addrmap; -+static int addrmap_num, addrmap_alloced; -+#endif - - static void usage(void) - { - fprintf(stderr, "Usage: kallsyms [--all-symbols] " -- "[--base-relative] < in.map > out.S\n"); -+ "[--base-relative] [--builtin=modules_thick.builtin] " -+ "< in.map > out.S\n"); - exit(1); - } - -@@ -98,6 +142,8 @@ static bool is_ignored_symbol(const char *name, char type) - "kallsyms_markers", - "kallsyms_token_table", - "kallsyms_token_index", -+ "kallsyms_symbol_modules", -+ "kallsyms_modules", - /* Exclude linker generated symbols which vary between passes */ - "_SDA_BASE_", /* ppc */ - "_SDA2_BASE_", /* ppc */ -@@ -174,6 +220,20 @@ static void check_symbol_range(const char *sym, unsigned long long addr, - } - } - -+#ifdef CONFIG_KALLMODSYMS -+static int addrmap_compare(const void *keyp, const void *rangep) -+{ -+ unsigned long long addr = *((const unsigned long long *)keyp); -+ const struct addrmap_entry *range = (const struct addrmap_entry *)rangep; -+ -+ if (addr < range->addr) -+ return -1; -+ if (addr < range->addr + range->size) -+ return 0; -+ return 1; -+} -+#endif -+ - static struct sym_entry *read_symbol(FILE *in) - { - char name[500], type; -@@ -181,9 +241,14 @@ static struct sym_entry *read_symbol(FILE *in) - unsigned int len; - struct sym_entry *sym; - int rc; -- -- rc = fscanf(in, "%llx %c %499s\n", &addr, &type, name); -- if (rc != 3) { -+#ifdef CONFIG_KALLMODSYMS -+ unsigned long long size; -+ struct addrmap_entry *range; -+ unsigned int module; -+#endif -+ -+ rc = fscanf(in, "%llx %llx %c %499s\n", &addr, &size, &type, name); -+ if (rc != 4) { - if (rc != EOF && fgets(name, 500, in) == NULL) - fprintf(stderr, "Read error or end of file.\n"); - return NULL; -@@ -205,6 +270,16 @@ static struct sym_entry *read_symbol(FILE *in) - check_symbol_range(name, addr, text_ranges, ARRAY_SIZE(text_ranges)); - check_symbol_range(name, addr, &percpu_range, 1); - -+#ifdef CONFIG_KALLMODSYMS -+ /* look up the builtin module this is part of (if any) */ -+ range = (struct addrmap_entry *) bsearch(&addr, -+ addrmap, addrmap_num, sizeof(*addrmap), &addrmap_compare); -+ if (range) -+ module = builtin_module_offsets[range->module]; -+ else -+ module = 0; -+#endif -+ - /* include the type field in the symbol name, so that it gets - * compressed together */ - -@@ -221,6 +296,10 @@ static struct sym_entry *read_symbol(FILE *in) - sym->sym[0] = type; - strcpy(sym_name(sym), name); - sym->percpu_absolute = 0; -+ sym->size = size; -+#ifdef CONFIG_KALLMODSYMS -+ sym->module = module; -+#endif - - return sym; - } -@@ -430,6 +509,11 @@ static void write_src(void) - printf("\n"); - } - -+ output_label("kallsyms_sizes"); -+ for (i = 0; i < table_cnt; i++) -+ printf("\tPTR\t%#llx\n", table[i]->size); -+ printf("\n"); -+ - output_label("kallsyms_num_syms"); - printf("\t.long\t%u\n", table_cnt); - printf("\n"); -@@ -479,8 +563,22 @@ static void write_src(void) - for (i = 0; i < 256; i++) - printf("\t.short\t%d\n", best_idx[i]); - printf("\n"); --} - -+#ifdef CONFIG_KALLMODSYMS -+ output_label("kallsyms_modules"); -+ for (i = 0; i < builtin_module_len; i++) -+ printf("\t.asciz\t\"%s\"\n", builtin_modules[i]); -+ printf("\n"); -+ -+ for (i = 0; i < builtin_module_len; i++) -+ free(builtin_modules[i]); -+ -+ output_label("kallsyms_symbol_modules"); -+ for (i = 0; i < table_cnt; i++) -+ printf("\t.int\t%d\n", table[i]->module); -+ printf("\n"); -+#endif -+} - - /* table lookup compression functions */ - -@@ -682,6 +780,18 @@ static int compare_symbols(const void *a, const void *b) - if (sa->addr < sb->addr) - return -1; - -+ /* zero-size markers before nonzero-size symbols */ -+ if (sa->size > 0 && sb->size == 0) -+ return 1; -+ if (sa->size == 0 && sb->size > 0) -+ return -1; -+ -+ /* sort by size (large size preceding symbols it encompasses) */ -+ if (sa->size < sb->size) -+ return 1; -+ if (sa->size > sb->size) -+ return -1; -+ - /* sort by "weakness" type */ - wa = (sa->sym[0] == 'w') || (sa->sym[0] == 'W'); - wb = (sb->sym[0] == 'w') || (sb->sym[0] == 'W'); -@@ -741,23 +851,198 @@ static void record_relative_base(void) - } - } - -+#ifdef CONFIG_KALLMODSYMS -+/* Built-in module list computation. */ -+ -+/* -+ * Expand the builtin modules list. -+ */ -+static void expand_builtin_modules(void) -+{ -+ builtin_module_size += 50; -+ -+ builtin_modules = realloc(builtin_modules, -+ sizeof(*builtin_modules) * -+ builtin_module_size); -+ builtin_module_offsets = realloc(builtin_module_offsets, -+ sizeof(*builtin_module_offsets) * -+ builtin_module_size); -+ -+ if (!builtin_modules || !builtin_module_offsets) { -+ fprintf(stderr, "kallsyms failure: out of memory.\n"); -+ exit(EXIT_FAILURE); -+ } -+} -+ -+/* -+ * Add a single built-in module (possibly composed of many files) to the -+ * modules list. Take the offset of the current module and return it -+ * (purely for simplicity's sake in the caller). -+ */ -+static size_t add_builtin_module(const char *module_name, char **module_paths, -+ GHashTable *obj2mod, size_t offset) -+{ -+ gpointer val = GUINT_TO_POINTER(builtin_module_len); -+ -+ /* map the module's object paths to the module offset */ -+ while (*module_paths) { -+ g_hash_table_insert(obj2mod, strdup(*module_paths), val); -+ module_paths++; -+ } -+ -+ /* add the module name */ -+ if (builtin_module_size <= builtin_module_len) -+ expand_builtin_modules(); -+ builtin_modules[builtin_module_len] = strdup(module_name); -+ builtin_module_offsets[builtin_module_len] = offset; -+ builtin_module_len++; -+ -+ return (offset + strlen(module_name) + 1); -+} -+ -+/* -+ * Read the linker map. -+ */ -+static void read_linker_map(GHashTable *obj2mod) -+{ -+ unsigned long long addr, size; -+ char obj[PATH_MAX+1]; -+ FILE *f = fopen(".tmp_vmlinux.ranges", "r"); -+ -+ if (!f) { -+ fprintf(stderr, "Cannot open '.tmp_vmlinux.ranges'.\n"); -+ exit(1); -+ } -+ -+ addrmap_num = 0; -+ addrmap_alloced = 4096; -+ addrmap = malloc(sizeof(*addrmap) * addrmap_alloced); -+ if (!addrmap) -+ goto oom; -+ -+ /* -+ * For each address range (addr,size) and object, add to addrmap -+ * the range and the built-in module to which the object maps. -+ */ -+ while (fscanf(f, "%llx %llx %s\n", &addr, &size, obj) == 3) { -+ int m = GPOINTER_TO_UINT(g_hash_table_lookup(obj2mod, obj)); -+ -+ if (addr == 0 || size == 0 || m == 0) -+ continue; -+ -+ if (addrmap_num >= addrmap_alloced) { -+ addrmap_alloced *= 2; -+ addrmap = realloc(addrmap, -+ sizeof(*addrmap) * addrmap_alloced); -+ if (!addrmap) -+ goto oom; -+ } -+ -+ addrmap[addrmap_num].addr = addr; -+ addrmap[addrmap_num].size = size; -+ addrmap[addrmap_num].module = m; -+ addrmap_num++; -+ } -+ fclose(f); -+ return; -+ -+oom: -+ fprintf(stderr, "kallsyms: out of memory\n"); -+ exit(1); -+} -+ -+/* -+ * Read the list of built-in modules. Construct: -+ * - builtin_modules: array of module names -+ * - builtin_module_offsets: array of offsets to find module names -+ * - obj2mod: mapping from each object-file path to a module index -+ * (which can be used in the arrays) -+ * Finally, read the linker map. -+ */ -+static void read_modules(const char *modules_builtin) -+{ -+ struct modules_thick_iter *i; -+ size_t offset = 0; -+ char *module_name = NULL; -+ char **module_paths; -+ GHashTable *obj2mod; -+ -+ obj2mod = g_hash_table_new_full(g_str_hash, g_str_equal, free, NULL); -+ if (!obj2mod) { -+ fprintf(stderr, "kallsyms: out of memory\n"); -+ exit(1); -+ } -+ -+ /* -+ * builtin_modules[0] is a null entry signifying a symbol that cannot be -+ * modular. -+ */ -+ builtin_module_size = 50; -+ builtin_modules = malloc(sizeof(*builtin_modules) * -+ builtin_module_size); -+ builtin_module_offsets = malloc(sizeof(*builtin_module_offsets) * -+ builtin_module_size); -+ if (!builtin_modules || !builtin_module_offsets) { -+ fprintf(stderr, "kallsyms: out of memory\n"); -+ exit(1); -+ } -+ builtin_modules[0] = strdup(""); -+ builtin_module_offsets[0] = 0; -+ builtin_module_len = 1; -+ offset++; -+ -+ /* -+ * Iterate over all modules in modules_thick.builtin and add each. -+ */ -+ i = modules_thick_iter_new(modules_builtin); -+ if (i == NULL) { -+ fprintf(stderr, "Cannot iterate over builtin modules.\n"); -+ exit(1); -+ } -+ -+ while ((module_paths = modules_thick_iter_next(i, &module_name)) != NULL) { -+ offset = add_builtin_module(module_name, module_paths, -+ obj2mod, offset); -+ free(module_paths); -+ module_paths = NULL; -+ } -+ -+ free(module_name); -+ modules_thick_iter_free(i); -+ -+ /* -+ * Read linker map. -+ */ -+ read_linker_map(obj2mod); -+ -+ g_hash_table_destroy(obj2mod); -+} -+#else -+static void read_modules(const char *unused) {} -+#endif /* CONFIG_KALLMODSYMS */ -+ - int main(int argc, char **argv) - { -- if (argc >= 2) { -+ const char *modules_builtin = "modules_thick.builtin"; -+ -+ if (argc >= 1) { - int i; - for (i = 1; i < argc; i++) { -- if(strcmp(argv[i], "--all-symbols") == 0) -+ if (strcmp(argv[i], "--all-symbols") == 0) - all_symbols = 1; - else if (strcmp(argv[i], "--absolute-percpu") == 0) - absolute_percpu = 1; - else if (strcmp(argv[i], "--base-relative") == 0) - base_relative = 1; -+ else if (strncmp(argv[i], "--builtin=", 10) == 0) -+ modules_builtin = &argv[i][10]; - else - usage(); - } - } else if (argc != 1) - usage(); - -+ read_modules(modules_builtin); - read_map(stdin); - shrink_table(); - if (absolute_percpu) -diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh -index dd484e92752e..67adb0959177 100755 ---- a/scripts/link-vmlinux.sh -+++ b/scripts/link-vmlinux.sh -@@ -76,6 +76,7 @@ vmlinux_link() - --start-group \ - ${KBUILD_VMLINUX_LIBS} \ - --end-group \ -+ -Map=.tmp_vmlinux.map \ - ${@}" - - ${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux} \ -@@ -88,6 +89,7 @@ vmlinux_link() - -Wl,--start-group \ - ${KBUILD_VMLINUX_LIBS} \ - -Wl,--end-group \ -+ -Wl,-Map=.tmp_vmlinux.map \ - ${@}" - - ${CC} ${CFLAGS_vmlinux} \ -@@ -140,6 +142,19 @@ kallsyms() - info KSYM ${2} - local kallsymopt; - -+ # read the linker map to identify ranges of addresses: -+ # - for each *.o file, report address, size, pathname -+ # - most such lines will have four fields -+ # - but sometimes there is a line break after the first field -+ # - start reading at "Linker script and memory map" -+ # - stop reading at ".brk" -+ ${AWK} ' -+ /\.o$/ && start==1 { print $(NF-2), $(NF-1), $NF } -+ /^Linker script and memory map/ { start = 1 } -+ /^\.brk/ { exit(0) } -+ ' .tmp_vmlinux.map | sort > .tmp_vmlinux.ranges -+ -+ # get kallsyms options - if [ -n "${CONFIG_KALLSYMS_ALL}" ]; then - kallsymopt="${kallsymopt} --all-symbols" - fi -@@ -152,12 +167,18 @@ kallsyms() - kallsymopt="${kallsymopt} --base-relative" - fi - -+ # set up compilation - local aflags="${KBUILD_AFLAGS} ${KBUILD_AFLAGS_KERNEL} \ - ${NOSTDINC_FLAGS} ${LINUXINCLUDE} ${KBUILD_CPPFLAGS}" - - local afile="`basename ${2} .o`.S" - -- ${NM} -n ${1} | scripts/kallsyms ${kallsymopt} > ${afile} -+ # "nm -S" does not print symbol size when size is 0 -+ # Therefore use awk to regularize the data: -+ # - when there are only three fields, add an explicit "0" -+ # - when there are already four fields, pass through as is -+ ${NM} -n -S ${1} | ${AWK} 'NF==3 {print $1, 0, $2, $3}; NF==4' | \ -+ scripts/kallsyms ${kallsymopt} > ${afile} - ${CC} ${aflags} -c -o ${2} ${afile} - } - -diff --git a/scripts/namespace.pl b/scripts/namespace.pl -index 1da7bca201a4..40f82b4c3a50 100755 ---- a/scripts/namespace.pl -+++ b/scripts/namespace.pl -@@ -120,6 +120,12 @@ my %nameexception = ( - 'kallsyms_addresses'=> 1, - 'kallsyms_offsets' => 1, - 'kallsyms_relative_base'=> 1, -+ 'kallsyms_sizes' => 1, -+ 'kallsyms_token_table'=> 1, -+ 'kallsyms_token_index'=> 1, -+ 'kallsyms_markers' => 1, -+ 'kallsyms_modules' => 1, -+ 'kallsyms_symbol_modules'=> 1, - '__this_module' => 1, - '_etext' => 1, - '_edata' => 1, --- -2.27.0 - diff --git a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0003-waitfd-new-syscall-implementing-waitpid-over-fds.patch b/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0003-waitfd-new-syscall-implementing-waitpid-over-fds.patch deleted file mode 100644 index ae303ea35634..000000000000 --- a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0003-waitfd-new-syscall-implementing-waitpid-over-fds.patch +++ /dev/null @@ -1,800 +0,0 @@ -From 923e0c2eb9228ac607ee722fc767d5c5d2a19539 Mon Sep 17 00:00:00 2001 -From: Nick Alcock <nick.alcock@oracle.com> -Date: Wed, 14 Nov 2018 20:28:51 +0000 -Subject: [PATCH 03/20] waitfd: new syscall implementing waitpid() over fds - -This syscall, originally due to Casey Dahlin but significantly modified -since, is called quite like waitid(): - - fd = waitfd(P_PID, some_pid, WEXITED | WSTOPPED, 0); - -This returns a file descriptor which becomes ready whenever waitpid() -would return, and when read() returns the return value waitpid() would -have returned. (Alternatively, you can use it as a pure indication that -waitpid() is callable without hanging, and then call waitpid()). See the -example in tools/testing/selftests/waitfd/. - -The original reason for rejection of this patch back in 2009 was that it -was redundant to waitpid()ing in a separate thread and transmitting -process information to another thread that polls: but this is only the -case for the conventional child-process use of waitpid(). Other -waitpid() uses, such as ptrace() returns, are targetted on a single -thread, so without waitfd or something like it, it is impossible to have -a thread that both accepts requests for servicing from other threads -over an fd *and* manipulates the state of a ptrace()d process in -response to those requests without ugly CPU-chewing polling (accepting -requests requires blocking in poll() or select(): handling the ptraced -process requires blocking in waitpid()). - -There is one ugliness in this patch which I would appreciate suggestions -to improve (due to me, not due to Casey, don't blame him). The poll() -machinery expects to be used with files, or things enough like files -that the wake_up key contains an indication as to whether this wakeup -corresponds to a POLLIN / POLLOUT / POLLERR event on this fd. You can -override this in your poll_queue_proc, but the poll() and epoll() queue -procs both have this interpretation. - -Unfortunately, this is not true for waitfds, which wait on the the -wait_chldexit waitqueue, whose key is a pointer to the task_struct of -the task being killed. We can't do anything with this key, but we -certainly don't want the poll machinery treating it as a bitmask and -checking it against poll events! - -So we introduce a new poll_wait() analogue, poll_wait_fixed(). This is used -for poll_wait() calls which know they must wait on waitqueues whose keys are -not a typecast representation of poll events, and passes in an extra -argument to the poll_queue_proc, which if nonzero is the event which a -wakeup on this waitqueue should be considered as equivalent to. The -poll_queue_proc can then skip adding entirely if that fixed event is not -included in the set to be caught by this poll(). - -We also add a new poll_table_entry.fixed_key. The poll_queue_proc can -record the fixed key it is passed in here, and reuse it at wakeup time to -track that a nonzero fixed key was passed in to poll_wait_fixed() and that -the key should be ignored in preference to fixed_key. - -With this in place, you can say, e.g. (as waitfd does) - - poll_wait_fixed(file, ¤t->signal->wait_chldexit, wait, - POLLIN); - -and the key passed to wakeups on the wait_chldexit waitqueue will be -ignored: the fd will always be treated as having raised POLLIN, waking -up poll()s and epoll()s that have specified that event. (Obviously, a -poll function that calls this should return the same value from the poll -function as was passed to poll_wait_fixed(), or, as usual, zero if this -was a spurious wakeup.) - -I do not like this scheme: it's sufficiently arcane that I had to go -back to my old commit messages to figure out what it was doing and -why. But I don't see another way to cause poll() to return on -appropriate activity on waitqueues that do not actually correspond to -files. (I do wonder how signalfd works. It doesn't seem to need any of -this and I don't understand why not. I would be overjoyed to remove the -whole invasive poll_wait_fixed() mess, but I'm not sure what to replace -it with.) - -Signed-off-by: Nick Alcock <nick.alcock@oracle.com> -Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com> -Signed-off-by: Tomas Jedlicka <tomas.jedlicka@oracle.com> -Signed-off-by: Eugene Loh <eugene.loh@oracle.com> -Signed-off-by: David Mc Lean <david.mclean@oracle.com> -Signed-off-by: Vincent Lim <vincent.lim@oracle.com> ---- - arch/x86/entry/syscalls/syscall_32.tbl | 3 + - arch/x86/entry/syscalls/syscall_64.tbl | 3 + - drivers/vfio/virqfd.c | 3 +- - drivers/vhost/vhost.c | 2 +- - fs/Makefile | 1 + - fs/aio.c | 2 +- - fs/eventpoll.c | 22 +++- - fs/io_uring.c | 2 +- - fs/select.c | 21 +++- - fs/waitfd.c | 130 ++++++++++++++++++++++++ - include/linux/poll.h | 14 ++- - include/linux/syscalls.h | 3 + - include/uapi/asm-generic/unistd.h | 8 ++ - init/Kconfig | 16 +++ - kernel/exit.c | 13 ++- - kernel/sys_ni.c | 1 + - mm/memcontrol.c | 2 +- - net/9p/trans_fd.c | 3 +- - tools/testing/selftests/waitfd/Makefile | 28 +++++ - tools/testing/selftests/waitfd/waitfd.c | 116 +++++++++++++++++++++ - virt/kvm/eventfd.c | 2 +- - 21 files changed, 378 insertions(+), 17 deletions(-) - create mode 100644 fs/waitfd.c - create mode 100644 tools/testing/selftests/waitfd/Makefile - create mode 100644 tools/testing/selftests/waitfd/waitfd.c - -diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl -index c17cb77eb150..1157bca6c6ea 100644 ---- a/arch/x86/entry/syscalls/syscall_32.tbl -+++ b/arch/x86/entry/syscalls/syscall_32.tbl -@@ -442,3 +442,6 @@ - 435 i386 clone3 sys_clone3 __ia32_sys_clone3 - 437 i386 openat2 sys_openat2 __ia32_sys_openat2 - 438 i386 pidfd_getfd sys_pidfd_getfd __ia32_sys_pidfd_getfd -+# This one is a temporary number, designed for no clashes. -+# Nothing but DTrace should use it. -+473 i386 waitfd sys_waitfd __ia32_sys_waitfd -diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl -index 44d510bc9b78..9b5e6d6a5d31 100644 ---- a/arch/x86/entry/syscalls/syscall_64.tbl -+++ b/arch/x86/entry/syscalls/syscall_64.tbl -@@ -359,6 +359,9 @@ - 435 common clone3 __x64_sys_clone3/ptregs - 437 common openat2 __x64_sys_openat2 - 438 common pidfd_getfd __x64_sys_pidfd_getfd -+# This one is a temporary number, designed for no clashes. -+# Nothing but DTrace should use it. -+473 common waitfd __x64_sys_waitfd - - # - # x32-specific system call numbers start at 512 to avoid cache impact -diff --git a/drivers/vfio/virqfd.c b/drivers/vfio/virqfd.c -index 997cb5d0a657..6bfafa889af2 100644 ---- a/drivers/vfio/virqfd.c -+++ b/drivers/vfio/virqfd.c -@@ -76,7 +76,8 @@ static int virqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void - } - - static void virqfd_ptable_queue_proc(struct file *file, -- wait_queue_head_t *wqh, poll_table *pt) -+ wait_queue_head_t *wqh, poll_table *pt, -+ unsigned long unused) - { - struct virqfd *virqfd = container_of(pt, struct virqfd, pt); - add_wait_queue(wqh, &virqfd->wait); -diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c -index f44340b41494..3043294f223d 100644 ---- a/drivers/vhost/vhost.c -+++ b/drivers/vhost/vhost.c -@@ -157,7 +157,7 @@ static void vhost_flush_work(struct vhost_work *work) - } - - static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, -- poll_table *pt) -+ poll_table *pt, unsigned long unused) - { - struct vhost_poll *poll; - -diff --git a/fs/Makefile b/fs/Makefile -index 505e51166973..48816ac3f594 100644 ---- a/fs/Makefile -+++ b/fs/Makefile -@@ -30,6 +30,7 @@ obj-$(CONFIG_SIGNALFD) += signalfd.o - obj-$(CONFIG_TIMERFD) += timerfd.o - obj-$(CONFIG_EVENTFD) += eventfd.o - obj-$(CONFIG_USERFAULTFD) += userfaultfd.o -+obj-$(CONFIG_WAITFD) += waitfd.o - obj-$(CONFIG_AIO) += aio.o - obj-$(CONFIG_IO_URING) += io_uring.o - obj-$(CONFIG_IO_WQ) += io-wq.o -diff --git a/fs/aio.c b/fs/aio.c -index 5f3d3d814928..986b11da81ea 100644 ---- a/fs/aio.c -+++ b/fs/aio.c -@@ -1715,7 +1715,7 @@ struct aio_poll_table { - - static void - aio_poll_queue_proc(struct file *file, struct wait_queue_head *head, -- struct poll_table_struct *p) -+ struct poll_table_struct *p, unsigned long fixed_event) - { - struct aio_poll_table *pt = container_of(p, struct aio_poll_table, pt); - -diff --git a/fs/eventpoll.c b/fs/eventpoll.c -index f5a481089893..8becdbd9825a 100644 ---- a/fs/eventpoll.c -+++ b/fs/eventpoll.c -@@ -157,6 +157,9 @@ struct epitem { - /* Number of active wait queue attached to poll operations */ - int nwait; - -+ /* fd always raises this fixed event. */ -+ unsigned long fixed_event; -+ - /* List containing poll wait queues */ - struct list_head pwqlist; - -@@ -847,7 +850,7 @@ static int ep_eventpoll_release(struct inode *inode, struct file *file) - static __poll_t ep_read_events_proc(struct eventpoll *ep, struct list_head *head, - void *priv); - static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, -- poll_table *pt); -+ poll_table *pt, unsigned long fixed_event); - - /* - * Differs from ep_eventpoll_poll() in that internal callers already have -@@ -1263,6 +1266,13 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v - if (!(epi->event.events & EPOLLEXCLUSIVE)) - ewake = 1; - -+ /* -+ * If this fd type has a hardwired event which should override the key -+ * (e.g. if it is waiting on a non-file waitqueue), jam it in here. -+ */ -+ if (epi->fixed_event) -+ key = (void *)epi->fixed_event; -+ - if (pollflags & POLLFREE) { - /* - * If we race with ep_remove_wait_queue() it can miss -@@ -1287,11 +1297,17 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v - * target file wakeup lists. - */ - static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, -- poll_table *pt) -+ poll_table *pt, unsigned long fixed_event) - { - struct epitem *epi = ep_item_from_epqueue(pt); - struct eppoll_entry *pwq; - -+ if (fixed_event & !(epi->event.events & fixed_event)) -+ return; -+ -+ if (fixed_event) -+ epi->fixed_event = fixed_event; -+ - if (epi->nwait >= 0 && (pwq = kmem_cache_alloc(pwq_cache, GFP_KERNEL))) { - init_waitqueue_func_entry(&pwq->wait, ep_poll_callback); - pwq->whead = whead; -@@ -1491,6 +1507,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, - ep_set_ffd(&epi->ffd, tfile, fd); - epi->event = *event; - epi->nwait = 0; -+ epi->fixed_event = 0; - epi->next = EP_UNACTIVE_PTR; - if (epi->event.events & EPOLLWAKEUP) { - error = ep_create_wakeup_source(epi); -@@ -2391,7 +2408,6 @@ static int __init eventpoll_init(void) - * We can have many thousands of epitems, so prevent this from - * using an extra cache line on 64-bit (and smaller) CPUs - */ -- BUILD_BUG_ON(sizeof(void *) <= 8 && sizeof(struct epitem) > 128); - - /* Allocates slab cache used to allocate "struct epitem" items */ - epi_cache = kmem_cache_create("eventpoll_epi", sizeof(struct epitem), -diff --git a/fs/io_uring.c b/fs/io_uring.c -index 832e042531bc..65f6cd5530f9 100644 ---- a/fs/io_uring.c -+++ b/fs/io_uring.c -@@ -3718,7 +3718,7 @@ struct io_poll_table { - }; - - static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head, -- struct poll_table_struct *p) -+ struct poll_table_struct *p, unsigned long fixed_event) - { - struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); - -diff --git a/fs/select.c b/fs/select.c -index 11d0285d46b7..088a297bbfb0 100644 ---- a/fs/select.c -+++ b/fs/select.c -@@ -116,7 +116,7 @@ struct poll_table_page { - * poll table. - */ - static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, -- poll_table *p); -+ poll_table *p, unsigned long fixed_event); - - void poll_initwait(struct poll_wqueues *pwq) - { -@@ -212,6 +212,14 @@ static int pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key - struct poll_table_entry *entry; - - entry = container_of(wait, struct poll_table_entry, wait); -+ -+ /* -+ * If this fd type has a hardwired key which should override the key -+ * (e.g. if it is waiting on a non-file waitqueue), jam it in here. -+ */ -+ if (entry->fixed_key) -+ key = (void *)entry->fixed_key; -+ - if (key && !(key_to_poll(key) & entry->key)) - return 0; - return __pollwake(wait, mode, sync, key); -@@ -219,15 +227,22 @@ static int pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key - - /* Add a new entry */ - static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, -- poll_table *p) -+ poll_table *p, unsigned long fixed_event) - { - struct poll_wqueues *pwq = container_of(p, struct poll_wqueues, pt); -- struct poll_table_entry *entry = poll_get_entry(pwq); -+ struct poll_table_entry *entry; -+ -+ if (fixed_event && !(p->_key & fixed_event)) -+ return; -+ -+ entry = poll_get_entry(pwq); - if (!entry) - return; -+ - entry->filp = get_file(filp); - entry->wait_address = wait_address; - entry->key = p->_key; -+ entry->fixed_key = fixed_event; - init_waitqueue_func_entry(&entry->wait, pollwake); - entry->wait.private = pwq; - add_wait_queue(wait_address, &entry->wait); -diff --git a/fs/waitfd.c b/fs/waitfd.c -new file mode 100644 -index 000000000000..311f84d7b85f ---- /dev/null -+++ b/fs/waitfd.c -@@ -0,0 +1,130 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * fs/waitfd.c -+ * -+ * Copyright (C) 2008 Red Hat, Casey Dahlin <cdahlin@redhat.com> -+ * -+ * Largely derived from fs/signalfd.c -+ */ -+ -+#include <linux/file.h> -+#include <linux/poll.h> -+#include <linux/init.h> -+#include <linux/fs.h> -+#include <linux/sched.h> -+#include <linux/slab.h> -+#include <linux/kernel.h> -+#include <linux/signal.h> -+#include <linux/list.h> -+#include <linux/anon_inodes.h> -+#include <linux/syscalls.h> -+ -+long kernel_wait4(pid_t upid, int __user *stat_addr, -+ int options, struct rusage __user *ru); -+ -+struct waitfd_ctx { -+ int options; -+ pid_t upid; -+}; -+ -+static int waitfd_release(struct inode *inode, struct file *file) -+{ -+ kfree(file->private_data); -+ return 0; -+} -+ -+static unsigned int waitfd_poll(struct file *file, poll_table *wait) -+{ -+ struct waitfd_ctx *ctx = file->private_data; -+ long value; -+ -+ poll_wait_fixed(file, ¤t->signal->wait_chldexit, wait, -+ POLLIN); -+ -+ value = kernel_wait4(ctx->upid, NULL, ctx->options | WNOHANG | WNOWAIT, -+ NULL); -+ if (value > 0 || value == -ECHILD) -+ return POLLIN | POLLRDNORM; -+ -+ return 0; -+} -+ -+/* -+ * Returns a multiple of the size of a stat_addr, or a negative error code. The -+ * "count" parameter must be at least sizeof(int). -+ */ -+static ssize_t waitfd_read(struct file *file, char __user *buf, size_t count, -+ loff_t *ppos) -+{ -+ struct waitfd_ctx *ctx = file->private_data; -+ int __user *stat_addr = (int *)buf; -+ int flags = ctx->options; -+ ssize_t ret, total = 0; -+ -+ count /= sizeof(int); -+ if (!count) -+ return -EINVAL; -+ -+ if (file->f_flags & O_NONBLOCK) -+ flags |= WNOHANG; -+ -+ do { -+ ret = kernel_wait4(ctx->upid, stat_addr, flags, NULL); -+ if (ret == 0) -+ ret = -EAGAIN; -+ if (ret == -ECHILD) -+ ret = 0; -+ if (ret <= 0) -+ break; -+ -+ stat_addr++; -+ total += sizeof(int); -+ } while (--count); -+ -+ return total ? total : ret; -+} -+ -+static const struct file_operations waitfd_fops = { -+ .release = waitfd_release, -+ .poll = waitfd_poll, -+ .read = waitfd_read, -+ .llseek = noop_llseek, -+}; -+ -+SYSCALL_DEFINE4(waitfd, int __maybe_unused, which, pid_t, upid, int, options, -+ int __maybe_unused, flags) -+{ -+ int ufd; -+ struct waitfd_ctx *ctx; -+ -+ /* -+ * Options validation from kernel_wait4(), minus WNOWAIT, which is -+ * only used by our polling implementation. If WEXITED or WSTOPPED -+ * are provided, silently remove them (for backward compatibility with -+ * older callers). -+ */ -+ options &= ~(WEXITED | WSTOPPED); -+ if (options & ~(WNOHANG|WUNTRACED|WCONTINUED| -+ __WNOTHREAD|__WCLONE|__WALL)) -+ return -EINVAL; -+ -+ ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); -+ if (!ctx) -+ return -ENOMEM; -+ -+ ctx->options = options; -+ ctx->upid = upid; -+ -+ ufd = anon_inode_getfd("[waitfd]", &waitfd_fops, ctx, -+ O_RDWR | flags | ((options & WNOHANG) ? -+ O_NONBLOCK | 0 : 0)); -+ /* -+ * Use the fd's nonblocking state from now on, since that can change. -+ */ -+ ctx->options &= ~WNOHANG; -+ -+ if (ufd < 0) -+ kfree(ctx); -+ -+ return ufd; -+} -diff --git a/include/linux/poll.h b/include/linux/poll.h -index 1cdc32b1f1b0..1c06718f39bc 100644 ---- a/include/linux/poll.h -+++ b/include/linux/poll.h -@@ -34,7 +34,8 @@ struct poll_table_struct; - /* - * structures and helpers for f_op->poll implementations - */ --typedef void (*poll_queue_proc)(struct file *, wait_queue_head_t *, struct poll_table_struct *); -+typedef void (*poll_queue_proc)(struct file *, wait_queue_head_t *, -+ struct poll_table_struct *, unsigned long fixed_event); - - /* - * Do not touch the structure directly, use the access functions -@@ -48,7 +49,15 @@ typedef struct poll_table_struct { - static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p) - { - if (p && p->_qproc && wait_address) -- p->_qproc(filp, wait_address, p); -+ p->_qproc(filp, wait_address, p, 0); -+} -+ -+static inline void poll_wait_fixed(struct file *filp, -+ wait_queue_head_t *wait_address, poll_table *p, -+ unsigned long fixed_event) -+{ -+ if (p && p->_qproc && wait_address) -+ p->_qproc(filp, wait_address, p, fixed_event); - } - - /* -@@ -93,6 +102,7 @@ static inline __poll_t vfs_poll(struct file *file, struct poll_table_struct *pt) - struct poll_table_entry { - struct file *filp; - __poll_t key; -+ unsigned long fixed_key; - wait_queue_entry_t wait; - wait_queue_head_t *wait_address; - }; -diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h -index 1815065d52f3..944ea084d160 100644 ---- a/include/linux/syscalls.h -+++ b/include/linux/syscalls.h -@@ -1421,5 +1421,8 @@ long ksys_old_shmctl(int shmid, int cmd, struct shmid_ds __user *buf); - long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems, - unsigned int nsops, - const struct old_timespec32 __user *timeout); -+#ifdef CONFIG_DTRACE -+asmlinkage long sys_waitfd(int which, pid_t upid, int options, int flags); -+#endif - - #endif -diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h -index 3a3201e4618e..5c5dda90c334 100644 ---- a/include/uapi/asm-generic/unistd.h -+++ b/include/uapi/asm-generic/unistd.h -@@ -859,6 +859,14 @@ __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd) - #undef __NR_syscalls - #define __NR_syscalls 439 - -+#ifdef CONFIG_DTRACE -+#define __NR_waitfd 473 -+__SYSCALL(__NR_waitfd, sys_waitfd) -+ -+#undef __NR_syscalls -+#define __NR_syscalls 474 -+#endif -+ - /* - * 32 bit systems traditionally used different - * syscalls for off_t and loff_t arguments, while -diff --git a/init/Kconfig b/init/Kconfig -index 878907b7a72d..1b446780b372 100644 ---- a/init/Kconfig -+++ b/init/Kconfig -@@ -1478,6 +1478,22 @@ config EPOLL - Disabling this option will cause the kernel to be built without - support for epoll family of system calls. - -+config WAITFD -+ bool "Enable waitfd() system call" if EXPERT -+ select ANON_INODES -+ default n -+ help -+ Enable the waitfd() system call that allows receiving child state -+ changes from a file descriptor. This permits use of poll() to -+ monitor waitpid() output simultaneously with other fd state changes, -+ even if the waitpid() output is coming from thread-targetted sources -+ such as ptrace(). -+ -+ Note: this system call is not upstream: its syscall number is not -+ finalized, so the call itself should only be used with caution. -+ -+ If unsure, say N. -+ - config SIGNALFD - bool "Enable signalfd() system call" if EXPERT - default y -diff --git a/kernel/exit.c b/kernel/exit.c -index 0b81b26a872a..07ff3139edf2 100644 ---- a/kernel/exit.c -+++ b/kernel/exit.c -@@ -1587,7 +1587,10 @@ long kernel_wait4(pid_t upid, int __user *stat_addr, int options, - enum pid_type type; - long ret; - -- if (options & ~(WNOHANG|WUNTRACED|WCONTINUED| -+ /* -+ * As for wait4(), except that waitfd() additionally needs WNOWAIT. -+ */ -+ if (options & ~(WNOHANG|WNOWAIT|WUNTRACED|WCONTINUED| - __WNOTHREAD|__WCLONE|__WALL)) - return -EINVAL; - -@@ -1626,7 +1629,13 @@ SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, - int, options, struct rusage __user *, ru) - { - struct rusage r; -- long err = kernel_wait4(upid, stat_addr, options, ru ? &r : NULL); -+ long err; -+ -+ if (options & ~(WNOHANG|WUNTRACED|WCONTINUED| -+ __WNOTHREAD|__WCLONE|__WALL)) -+ return -EINVAL; -+ -+ err = kernel_wait4(upid, stat_addr, options, ru ? &r : NULL); - - if (err > 0) { - if (ru && copy_to_user(ru, &r, sizeof(struct rusage))) -diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c -index 3b69a560a7ac..2cdd1d180bb5 100644 ---- a/kernel/sys_ni.c -+++ b/kernel/sys_ni.c -@@ -392,6 +392,7 @@ COND_SYSCALL(subpage_prot); - * include/uapi/asm-generic/unistd.h and wanted by >= 1 arch - */ - -+COND_SYSCALL(waitfd); - /* __ARCH_WANT_SYSCALL_NO_FLAGS */ - COND_SYSCALL(epoll_create); - COND_SYSCALL(inotify_init); -diff --git a/mm/memcontrol.c b/mm/memcontrol.c -index 537eae162ed3..fcf89c6c203c 100644 ---- a/mm/memcontrol.c -+++ b/mm/memcontrol.c -@@ -4551,7 +4551,7 @@ static int memcg_event_wake(wait_queue_entry_t *wait, unsigned mode, - } - - static void memcg_event_ptable_queue_proc(struct file *file, -- wait_queue_head_t *wqh, poll_table *pt) -+ wait_queue_head_t *wqh, poll_table *pt, unsigned long unused) - { - struct mem_cgroup_event *event = - container_of(pt, struct mem_cgroup_event, pt); -diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c -index 13cd683a658a..1f1f4d300275 100644 ---- a/net/9p/trans_fd.c -+++ b/net/9p/trans_fd.c -@@ -541,7 +541,8 @@ static int p9_pollwake(wait_queue_entry_t *wait, unsigned int mode, int sync, vo - */ - - static void --p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) -+p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p, -+ unsigned long unused) - { - struct p9_conn *m = container_of(p, struct p9_conn, pt); - struct p9_poll_wait *pwait = NULL; -diff --git a/tools/testing/selftests/waitfd/Makefile b/tools/testing/selftests/waitfd/Makefile -new file mode 100644 -index 000000000000..f85c80b54f05 ---- /dev/null -+++ b/tools/testing/selftests/waitfd/Makefile -@@ -0,0 +1,28 @@ -+uname_M := $(shell uname -m 2>/dev/null || echo not) -+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/) -+ifeq ($(ARCH),i386) -+ ARCH := X86 -+ CFLAGS := -DCONFIG_X86_32 -D__i386__ -+endif -+ifeq ($(ARCH),x86_64) -+ ARCH := X86 -+ CFLAGS := -DCONFIG_X86_64 -D__x86_64__ -+endif -+ -+CFLAGS += -I../../../../arch/x86/include/generated/ -+CFLAGS += -I../../../../include/ -+CFLAGS += -I../../../../usr/include/ -+CFLAGS += -I../../../../arch/x86/include/ -+ -+all: -+ifeq ($(ARCH),X86) -+ gcc $(CFLAGS) waitfd.c -o waitfd -+else -+ echo "Not an x86 target, can't build waitfd selftest" -+endif -+ -+run_tests: all -+ @./waitfd || echo "waitfd: [FAIL]" -+ -+clean: -+ rm -fr ./waitfd -diff --git a/tools/testing/selftests/waitfd/waitfd.c b/tools/testing/selftests/waitfd/waitfd.c -new file mode 100644 -index 000000000000..2df60bbdbb35 ---- /dev/null -+++ b/tools/testing/selftests/waitfd/waitfd.c -@@ -0,0 +1,116 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* waitfd testcase. */ -+ -+#define _GNU_SOURCE 1 -+#include <linux/unistd.h> -+#include <sys/syscall.h> -+#include <sys/ptrace.h> -+#include <sys/types.h> -+#include <sys/wait.h> -+#include <errno.h> -+#include <stdio.h> -+#include <stdlib.h> -+#include <unistd.h> -+#include <signal.h> -+#include <string.h> -+#include <poll.h> -+ -+int waitfd(int which, pid_t upid, int options, int flags) -+{ -+ return syscall(__NR_waitfd, which, upid, options, flags); -+} -+ -+void sleeper(void) -+{ -+ sleep(10); -+ exit(0); -+} -+ -+int main (void) -+{ -+ pid_t die_pid, ptrace_pid; -+ int die_fd, ptrace_fd; -+ int status; -+ struct pollfd pfd[2]; -+ int procs_left = 2; -+ -+ memset(pfd, 0, sizeof(pfd)); -+ -+ /* -+ * Fork off two children, one of which waits for a ptrace(). -+ * Both just sleep after that. Make sure we can use __WNOTHREAD, -+ * __WALL, and WUNTRACED without getting an -EINVAL. -+ */ -+ -+ die_pid = fork(); -+ -+ if (die_pid == 0) -+ sleeper(); -+ -+ ptrace_pid = fork(); -+ if (ptrace_pid == 0) { -+ ptrace(PTRACE_TRACEME, 0, 0, 0); -+ sleeper(); -+ } -+ -+ die_fd = waitfd(P_PID, die_pid, 0, 0); -+ ptrace_fd = waitfd(P_PID, ptrace_pid, __WNOTHREAD | __WALL | WUNTRACED, 0); -+ -+ if (die_fd < 0 || ptrace_fd < 0) { -+ perror("Cannot waitfd()"); -+ exit(1); -+ } -+ -+ pfd[0].fd = die_fd; -+ pfd[0].events = POLLIN; -+ pfd[1].fd = ptrace_fd; -+ pfd[1].events = POLLIN; -+ -+ /* -+ * Hit the ptrace PID with a signal -+ */ -+ kill(ptrace_pid, SIGABRT); -+ -+ while (procs_left > 0) { -+ ssize_t bytes; -+ -+ if (poll(pfd, 2, -1) < 0) -+ perror ("poll() failed"); -+ -+ if (pfd[0].revents != 0) { -+ bytes = read(die_fd, &status, sizeof(int)); -+ if (bytes < sizeof(int)) { -+ fprintf(stderr, "Only read %zi bytes\n", bytes); -+ exit(1); -+ } -+ -+ printf("die_fd returned %i via waitfd read: revents are %x\n", -+ status, pfd[0].revents); -+ pfd[0].fd *= -1; -+ procs_left--; -+ } -+ -+ if (pfd[1].revents != 0) { -+ pid_t check_pid; -+ status = 0; -+ check_pid = waitpid(ptrace_pid, &status, __WNOTHREAD | -+ __WALL | WUNTRACED | WNOHANG); -+ if (check_pid < 0) { -+ fprintf(stderr, "waitpid() failed: %s\n", -+ strerror(errno)); -+ exit(1); -+ } -+ if (check_pid != ptrace_pid) { -+ fprintf(stderr, "waitfd() said PID %i was ready, but waitpid() says it isn't: %i\n", -+ ptrace_pid, check_pid); -+ exit(1); -+ } -+ printf("ptrace_fd returned status %i via waitpid; revents are %x\n", -+ status, pfd[1].revents); -+ pfd[1].fd *= -1; -+ procs_left--; -+ } -+ } -+ -+ return 0; -+} -diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c -index 67b6fc153e9c..27ae3219ae99 100644 ---- a/virt/kvm/eventfd.c -+++ b/virt/kvm/eventfd.c -@@ -232,7 +232,7 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) - - static void - irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, -- poll_table *pt) -+ poll_table *pt, unsigned long unused) - { - struct kvm_kernel_irqfd *irqfd = - container_of(pt, struct kvm_kernel_irqfd, pt); --- -2.27.0 - diff --git a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0004-dtrace-core-and-x86.patch b/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0004-dtrace-core-and-x86.patch deleted file mode 100644 index a1449cd2d633..000000000000 --- a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0004-dtrace-core-and-x86.patch +++ /dev/null @@ -1,8061 +0,0 @@ -From cf1a56e992bfb765d9eabb5fa6641774e61ce5cf Mon Sep 17 00:00:00 2001 -From: Kris Van Hees <kris.van.hees@oracle.com> -Date: Mon, 19 Nov 2018 22:21:36 +0000 -Subject: [PATCH 04/20] dtrace: core and x86 - -This implements DTrace's core kernel (linked-in) components, -including platform-dependent portions for x86. (Most of this -machinery is not used until the next commit.) - -Signed-off-by: Nick Alcock <nick.alcock@oracle.com> -Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com> -Signed-off-by: Tomas Jedlicka <tomas.jedlicka@oracle.com> -Signed-off-by: Eugene Loh <eugene.loh@oracle.com> -Signed-off-by: David Mc Lean <david.mclean@oracle.com> -Signed-off-by: Vincent Lim <vincent.lim@oracle.com> ---- - Makefile | 8 +- - arch/x86/Kconfig | 3 + - arch/x86/include/asm/dtrace_arch.h | 28 + - arch/x86/include/asm/dtrace_cpuinfo.h | 14 + - arch/x86/include/asm/dtrace_util.h | 16 + - arch/x86/kernel/dtrace_util.c | 244 ++++ - arch/x86/mm/fault.c | 28 +- - fs/exec.c | 5 + - include/asm-generic/qrwlock.h | 24 + - include/dtrace/dtrace_impl.h | 1236 +++++++++++++++++ - include/dtrace/dtrace_impl_defines.h | 173 +++ - include/dtrace/provider.h | 971 +++++++++++++ - include/dtrace/provider_defines.h | 41 + - include/dtrace/types.h | 131 ++ - include/linux/cpuhotplug.h | 1 + - include/linux/cyclic.h | 49 + - include/linux/dtrace/cpu_defines.h | 61 + - include/linux/dtrace_cpu.h | 53 + - include/linux/dtrace_cpu_defines.h | 2 + - include/linux/dtrace_os.h | 120 ++ - include/linux/dtrace_psinfo.h | 59 + - include/linux/dtrace_task.h | 38 + - include/linux/dtrace_task_impl.h | 28 + - include/linux/dtrace_types.h | 13 + - include/linux/ktime.h | 8 + - include/linux/module.h | 3 + - include/linux/mutex.h | 16 + - include/linux/rwlock.h | 7 + - include/linux/sched.h | 4 + - include/linux/spinlock_up.h | 5 + - include/uapi/linux/dtrace/Kbuild | 35 + - include/uapi/linux/dtrace/actions.h | 14 + - include/uapi/linux/dtrace/actions_defines.h | 181 +++ - include/uapi/linux/dtrace/arg.h | 42 + - include/uapi/linux/dtrace/arg_defines.h | 21 + - include/uapi/linux/dtrace/buffer.h | 43 + - include/uapi/linux/dtrace/buffer_defines.h | 21 + - include/uapi/linux/dtrace/conf.h | 35 + - include/uapi/linux/dtrace/conf_defines.h | 21 + - include/uapi/linux/dtrace/cpu_defines.h | 17 + - include/uapi/linux/dtrace/dif.h | 60 + - include/uapi/linux/dtrace/dif_defines.h | 288 ++++ - include/uapi/linux/dtrace/difo.h | 57 + - include/uapi/linux/dtrace/difo_defines.h | 21 + - include/uapi/linux/dtrace/dof.h | 196 +++ - include/uapi/linux/dtrace/dof_defines.h | 192 +++ - include/uapi/linux/dtrace/dtrace.h | 33 + - include/uapi/linux/dtrace/enabling.h | 76 + - include/uapi/linux/dtrace/enabling_defines.h | 25 + - include/uapi/linux/dtrace/fasttrap.h | 56 + - include/uapi/linux/dtrace/fasttrap_defines.h | 25 + - include/uapi/linux/dtrace/fasttrap_ioctl.h | 19 + - include/uapi/linux/dtrace/faults.h | 20 + - include/uapi/linux/dtrace/faults_defines.h | 39 + - include/uapi/linux/dtrace/helpers.h | 101 ++ - include/uapi/linux/dtrace/helpers_defines.h | 21 + - include/uapi/linux/dtrace/ioctl.h | 47 + - include/uapi/linux/dtrace/metadesc.h | 81 ++ - include/uapi/linux/dtrace/metadesc_defines.h | 24 + - include/uapi/linux/dtrace/options.h | 20 + - include/uapi/linux/dtrace/options_defines.h | 72 + - include/uapi/linux/dtrace/stability.h | 52 + - include/uapi/linux/dtrace/stability_defines.h | 53 + - include/uapi/linux/dtrace/status.h | 50 + - include/uapi/linux/dtrace/universal.h | 47 + - init/Kconfig | 2 + - init/main.c | 10 + - kernel/Makefile | 1 + - kernel/dtrace/Kconfig | 54 + - kernel/dtrace/Makefile | 12 + - kernel/dtrace/cyclic.c | 526 +++++++ - kernel/dtrace/dtrace_cpu.c | 61 + - kernel/dtrace/dtrace_os.c | 332 +++++ - kernel/dtrace/dtrace_psinfo.c | 212 +++ - kernel/dtrace/dtrace_task.c | 237 ++++ - kernel/exit.c | 4 + - kernel/fork.c | 24 + - kernel/module.c | 14 + - kernel/sched/core.c | 10 + - kernel/sched/sched.h | 4 + - kernel/time/timekeeping.c | 2 + - scripts/coccinelle/dtrace/enum-elision.cocci | 29 + - .../coccinelle/dtrace/typedef-elision.cocci | 83 ++ - scripts/package/mkspec | 1 + - 84 files changed, 7108 insertions(+), 4 deletions(-) - create mode 100644 arch/x86/include/asm/dtrace_arch.h - create mode 100644 arch/x86/include/asm/dtrace_cpuinfo.h - create mode 100644 arch/x86/include/asm/dtrace_util.h - create mode 100644 arch/x86/kernel/dtrace_util.c - create mode 100644 include/dtrace/dtrace_impl.h - create mode 100644 include/dtrace/dtrace_impl_defines.h - create mode 100644 include/dtrace/provider.h - create mode 100644 include/dtrace/provider_defines.h - create mode 100644 include/dtrace/types.h - create mode 100644 include/linux/cyclic.h - create mode 100644 include/linux/dtrace/cpu_defines.h - create mode 100644 include/linux/dtrace_cpu.h - create mode 100644 include/linux/dtrace_cpu_defines.h - create mode 100644 include/linux/dtrace_os.h - create mode 100644 include/linux/dtrace_psinfo.h - create mode 100644 include/linux/dtrace_task.h - create mode 100644 include/linux/dtrace_task_impl.h - create mode 100644 include/linux/dtrace_types.h - create mode 100644 include/uapi/linux/dtrace/Kbuild - create mode 100644 include/uapi/linux/dtrace/actions.h - create mode 100644 include/uapi/linux/dtrace/actions_defines.h - create mode 100644 include/uapi/linux/dtrace/arg.h - create mode 100644 include/uapi/linux/dtrace/arg_defines.h - create mode 100644 include/uapi/linux/dtrace/buffer.h - create mode 100644 include/uapi/linux/dtrace/buffer_defines.h - create mode 100644 include/uapi/linux/dtrace/conf.h - create mode 100644 include/uapi/linux/dtrace/conf_defines.h - create mode 100644 include/uapi/linux/dtrace/cpu_defines.h - create mode 100644 include/uapi/linux/dtrace/dif.h - create mode 100644 include/uapi/linux/dtrace/dif_defines.h - create mode 100644 include/uapi/linux/dtrace/difo.h - create mode 100644 include/uapi/linux/dtrace/difo_defines.h - create mode 100644 include/uapi/linux/dtrace/dof.h - create mode 100644 include/uapi/linux/dtrace/dof_defines.h - create mode 100644 include/uapi/linux/dtrace/dtrace.h - create mode 100644 include/uapi/linux/dtrace/enabling.h - create mode 100644 include/uapi/linux/dtrace/enabling_defines.h - create mode 100644 include/uapi/linux/dtrace/fasttrap.h - create mode 100644 include/uapi/linux/dtrace/fasttrap_defines.h - create mode 100644 include/uapi/linux/dtrace/fasttrap_ioctl.h - create mode 100644 include/uapi/linux/dtrace/faults.h - create mode 100644 include/uapi/linux/dtrace/faults_defines.h - create mode 100644 include/uapi/linux/dtrace/helpers.h - create mode 100644 include/uapi/linux/dtrace/helpers_defines.h - create mode 100644 include/uapi/linux/dtrace/ioctl.h - create mode 100644 include/uapi/linux/dtrace/metadesc.h - create mode 100644 include/uapi/linux/dtrace/metadesc_defines.h - create mode 100644 include/uapi/linux/dtrace/options.h - create mode 100644 include/uapi/linux/dtrace/options_defines.h - create mode 100644 include/uapi/linux/dtrace/stability.h - create mode 100644 include/uapi/linux/dtrace/stability_defines.h - create mode 100644 include/uapi/linux/dtrace/status.h - create mode 100644 include/uapi/linux/dtrace/universal.h - create mode 100644 kernel/dtrace/Kconfig - create mode 100644 kernel/dtrace/Makefile - create mode 100644 kernel/dtrace/cyclic.c - create mode 100644 kernel/dtrace/dtrace_cpu.c - create mode 100644 kernel/dtrace/dtrace_os.c - create mode 100644 kernel/dtrace/dtrace_psinfo.c - create mode 100644 kernel/dtrace/dtrace_task.c - create mode 100644 scripts/coccinelle/dtrace/enum-elision.cocci - create mode 100644 scripts/coccinelle/dtrace/typedef-elision.cocci - -diff --git a/Makefile b/Makefile -index 2aad854bb87d..d6b9a7d2c973 100644 ---- a/Makefile -+++ b/Makefile -@@ -1026,11 +1026,12 @@ core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/ - - vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ - $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ -- $(net-y) $(net-m) $(libs-y) $(libs-m) $(virt-y))) -+ $(net-y) $(net-m) $(libs-y) $(libs-m) $(virt-y) \ -+ $(dtrace-y) $(dtrace-m))) - - vmlinux-alldirs := $(sort $(vmlinux-dirs) Documentation \ - $(patsubst %/,%,$(filter %/, $(init-) $(core-) \ -- $(drivers-) $(net-) $(libs-) $(virt-)))) -+ $(drivers-) $(net-) $(libs-) $(virt-) $(dtrace-)))) - - build-dirs := $(vmlinux-dirs) - clean-dirs := $(vmlinux-alldirs) -@@ -1038,6 +1039,7 @@ clean-dirs := $(vmlinux-alldirs) - init-y := $(patsubst %/, %/built-in.a, $(init-y)) - core-y := $(patsubst %/, %/built-in.a, $(core-y)) - drivers-y := $(patsubst %/, %/built-in.a, $(drivers-y)) -+dtrace-y := $(patsubst %/, %/built-in.a, $(dtrace-y)) - net-y := $(patsubst %/, %/built-in.a, $(net-y)) - libs-y1 := $(patsubst %/, %/lib.a, $(libs-y)) - libs-y2 := $(patsubst %/, %/built-in.a, $(filter-out %.a, $(libs-y))) -@@ -1045,7 +1047,7 @@ virt-y := $(patsubst %/, %/built-in.a, $(virt-y)) - - # Externally visible symbols (used by link-vmlinux.sh) - export KBUILD_VMLINUX_OBJS := $(head-y) $(init-y) $(core-y) $(libs-y2) \ -- $(drivers-y) $(net-y) $(virt-y) -+ $(drivers-y) $(net-y) $(virt-y) $(dtrace-y) - export KBUILD_VMLINUX_LIBS := $(libs-y1) - export KBUILD_LDS := arch/$(SRCARCH)/kernel/vmlinux.lds - export LDFLAGS_vmlinux -diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig -index beea77046f9b..1d1e23d4d20f 100644 ---- a/arch/x86/Kconfig -+++ b/arch/x86/Kconfig -@@ -363,6 +363,9 @@ config PGTABLE_LEVELS - default 3 if X86_PAE - default 2 - -+config ARCH_SUPPORTS_DTRACE -+ def_bool y if X86_64 -+ - config CC_HAS_SANE_STACKPROTECTOR - bool - default $(success,$(srctree)/scripts/gcc-x86_64-has-stack-protector.sh $(CC)) if 64BIT -diff --git a/arch/x86/include/asm/dtrace_arch.h b/arch/x86/include/asm/dtrace_arch.h -new file mode 100644 -index 000000000000..74e27f08a873 ---- /dev/null -+++ b/arch/x86/include/asm/dtrace_arch.h -@@ -0,0 +1,28 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+ -+/* -+ * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+#ifndef _X86_DTRACE_ARCH_H -+#define _X86_DTRACE_ARCH_H -+ -+/* Number of arguments stored inside the mstate. */ -+#define DTRACE_MSTATE_ARGS_MAX 6 -+ -+typedef uint8_t asm_instr_t; -+ -+typedef int (*prov_exit_f)(void); -+ -+/* -+ * Structure to hold DTrace specific information about modules (including the -+ * core kernel module). Note that each module (and the main kernel) already -+ * has one field that relates to probing: -+ * - pdata: pointer to a dtrace_module struct (for DTrace) -+ */ -+struct dtrace_module { -+ int enabled_cnt; -+ prov_exit_f prov_exit; /* Called with module_mutex held */ -+}; -+ -+#endif /* _X86_DTRACE_ARCH_H */ -diff --git a/arch/x86/include/asm/dtrace_cpuinfo.h b/arch/x86/include/asm/dtrace_cpuinfo.h -new file mode 100644 -index 000000000000..47024e169ec4 ---- /dev/null -+++ b/arch/x86/include/asm/dtrace_cpuinfo.h -@@ -0,0 +1,14 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+ -+/* Copyright (C) 2013-2014 Oracle, Inc. */ -+ -+#ifndef _ASM_X86_DTRACE_CPUINFO_H_ -+#define _ASM_X86_DTRACE_CPUINFO_H_ -+ -+#include <asm/processor.h> -+ -+typedef struct cpuinfo_x86 cpuinfo_arch_t; -+ -+#define dtrace_cpuinfo_chip(ci) ((ci)->phys_proc_id) -+ -+#endif /* _ASM_X86_DTRACE_CPUINFO_H_ */ -diff --git a/arch/x86/include/asm/dtrace_util.h b/arch/x86/include/asm/dtrace_util.h -new file mode 100644 -index 000000000000..4d9843bbc95b ---- /dev/null -+++ b/arch/x86/include/asm/dtrace_util.h -@@ -0,0 +1,16 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Copyright (c) 2013, 2017, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+#ifndef _X86_DTRACE_UTIL_H -+#define _X86_DTRACE_UTIL_H -+ -+#ifndef __ASSEMBLY__ -+ -+#include <asm/dtrace_arch.h> -+#include <asm/ptrace.h> -+ -+#endif -+ -+#endif /* _X86_DTRACE_UTIL_H */ -diff --git a/arch/x86/kernel/dtrace_util.c b/arch/x86/kernel/dtrace_util.c -new file mode 100644 -index 000000000000..64280fb98bbb ---- /dev/null -+++ b/arch/x86/kernel/dtrace_util.c -@@ -0,0 +1,244 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_util.c -+ * DESCRIPTION: Dynamic Tracing: Architecture utility functions -+ * -+ * Copyright (c) 2010, 2018, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+#include <linux/dtrace_cpu.h> -+#include <linux/dtrace_os.h> -+#include <linux/dtrace_task_impl.h> -+#include <linux/kdebug.h> -+#include <linux/mm.h> -+#include <linux/module.h> -+#include <linux/memory.h> -+#include <linux/notifier.h> -+#include <linux/ptrace.h> -+#include <linux/sched.h> -+#include <linux/slab.h> -+#include <linux/uaccess.h> -+#include <linux/sched/task_stack.h> -+#include <asm/insn.h> -+#include <asm/pgtable.h> -+#include <asm/ptrace.h> -+#include <asm/text-patching.h> -+#include <asm/dtrace_arch.h> -+#include <asm/dtrace_util.h> -+ -+int dtrace_instr_size(const asm_instr_t *addr) -+{ -+ struct insn insn; -+ -+ kernel_insn_init(&insn, addr, MAX_INSN_SIZE); -+ insn_get_length(&insn); -+ -+ return insn_complete(&insn) ? insn.length : -1; -+} -+EXPORT_SYMBOL(dtrace_instr_size); -+ -+/* -+ * Move the instruction pointer forward to the next instruction, effectiely -+ * skipping the current one. -+ */ -+static void dtrace_skip_instruction(struct pt_regs *regs) -+{ -+ int delta; -+ -+ delta = dtrace_instr_size((asm_instr_t *)regs->ip); -+ BUG_ON(delta <= 0); -+ -+ regs->ip += delta; -+} -+ -+void dtrace_handle_badaddr(struct pt_regs *regs) -+{ -+ unsigned long addr = read_cr2(); -+ -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); -+ this_cpu_core->cpuc_dtrace_illval = addr; -+ -+ dtrace_skip_instruction(regs); -+} -+ -+/* -+ * Trap notification handler. -+ */ -+int dtrace_die_notifier(struct notifier_block *nb, unsigned long val, -+ void *args) -+{ -+ struct die_args *dargs = args; -+ -+ switch (val) { -+ case DIE_PAGE_FAULT: { -+ if (!DTRACE_CPUFLAG_ISSET(CPU_DTRACE_NOFAULT)) -+ return NOTIFY_DONE; -+ -+ dtrace_handle_badaddr(dargs->regs); -+ -+ return NOTIFY_OK | NOTIFY_STOP_MASK; -+ } -+ case DIE_GPF: { -+ if (!DTRACE_CPUFLAG_ISSET(CPU_DTRACE_NOFAULT)) -+ return NOTIFY_DONE; -+ -+ dtrace_handle_badaddr(dargs->regs); -+ -+ return NOTIFY_OK | NOTIFY_STOP_MASK; -+ } -+ /* fallthrough */ -+ default: -+ return NOTIFY_DONE; -+ } -+} -+ -+static inline int dtrace_bad_address(void *addr) -+{ -+ unsigned long dummy; -+ -+ return probe_kernel_address((unsigned long *)addr, dummy); -+} -+ -+static int dtrace_user_addr_is_exec(uintptr_t addr) -+{ -+ struct mm_struct *mm = current->mm; -+ pgd_t *pgd; -+ -+#if CONFIG_PGTABLE_LEVELS > 3 -+ p4d_t *p4d; -+#endif -+ -+ pud_t *pud; -+ pmd_t *pmd; -+ pte_t *pte; -+ unsigned long flags; -+ int ret = 0; -+ -+ if (mm == NULL) -+ return 0; -+ -+ addr &= PAGE_MASK; -+ -+ local_irq_save(flags); -+ -+ pgd = pgd_offset(mm, addr); -+ if (dtrace_bad_address(pgd)) -+ goto out; -+ if (pgd_none(*pgd) || !pgd_present(*pgd)) -+ goto out; -+ -+#if CONFIG_PGTABLE_LEVELS > 3 -+ p4d = p4d_offset(pgd, addr); -+ if (dtrace_bad_address(p4d)) -+ goto out; -+ if (p4d_none(*p4d) || !p4d_present(*p4d)) -+ goto out; -+ -+ pud = pud_offset(p4d, addr); -+#else -+ pud = pud_offset(pgd, addr); -+#endif -+ -+ if (dtrace_bad_address(pud)) -+ goto out; -+ if (pud_none(*pud) || !pud_present(*pud)) -+ goto out; -+ if (unlikely(pud_large(*pud))) { -+ pte = (pte_t *)pud; -+ if (dtrace_bad_address(pte)) -+ goto out; -+ -+ ret = pte_exec(*pte); -+ goto out; -+ } -+ -+ pmd = pmd_offset(pud, addr); -+ if (dtrace_bad_address(pmd)) -+ goto out; -+ if (pmd_none(*pmd)) -+ goto out; -+ if (unlikely(pmd_large(*pmd) || !pmd_present(*pmd))) { -+ pte = (pte_t *)pmd; -+ if (dtrace_bad_address(pte)) -+ goto out; -+ -+ ret = pte_exec(*pte); -+ goto out; -+ } -+ -+ pte = pte_offset_map(pmd, addr); -+ if (dtrace_bad_address(pte)) -+ goto out; -+ if (pte_protnone(*pte)) -+ goto out; -+ if ((pte_flags(*pte) & (_PAGE_PRESENT|_PAGE_USER|_PAGE_SPECIAL)) != -+ (_PAGE_PRESENT|_PAGE_USER)) -+ goto out; -+ -+ ret = pte_exec(*pte); -+ -+out: -+ local_irq_restore(flags); -+ -+ return ret; -+} -+ -+void dtrace_user_stacktrace(struct stacktrace_state *st) -+{ -+ struct pt_regs *regs = current_pt_regs(); -+ uint64_t *pcs = st->pcs; -+ int limit = st->limit; -+ unsigned long *bos; -+ unsigned long *sp = (unsigned long *)user_stack_pointer(regs); -+ int ret; -+ -+ if (!user_mode(regs)) -+ goto out; -+ -+ if (current->dt_task == NULL) -+ goto out; -+ -+ bos = current->dt_task->dt_ustack; -+ -+ st->depth = 1; -+ if (pcs) -+ *pcs++ = (uint64_t)instruction_pointer(regs); -+ limit--; -+ -+ if (!limit) -+ goto out; -+ -+ while (sp <= bos && limit) { -+ unsigned long pc; -+ -+ pagefault_disable(); -+ ret = __copy_from_user_inatomic(&pc, sp, sizeof(pc)); -+ pagefault_enable(); -+ -+ if (ret) -+ break; -+ -+ if (dtrace_user_addr_is_exec(pc)) { -+ if (pcs) -+ *pcs++ = pc; -+ limit--; -+ st->depth++; -+ } -+ -+ sp++; -+ } -+ -+out: -+ if (pcs) { -+ while (limit--) -+ *pcs++ = 0; -+ } -+} -+ -+void dtrace_mod_pdata_init(struct dtrace_module *pdata) -+{ -+} -+ -+void dtrace_mod_pdata_cleanup(struct dtrace_module *pdata) -+{ -+} -diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c -index 629fdf13f846..b1cdcf3518f3 100644 ---- a/arch/x86/mm/fault.c -+++ b/arch/x86/mm/fault.c -@@ -18,6 +18,7 @@ - #include <linux/uaccess.h> /* faulthandler_disabled() */ - #include <linux/efi.h> /* efi_recover_from_page_fault()*/ - #include <linux/mm_types.h> -+#include <linux/dtrace_os.h> /* dtrace_no_pf */ - - #include <asm/cpufeature.h> /* boot_cpu_has, ... */ - #include <asm/traps.h> /* dotraplinkage, ... */ -@@ -782,6 +783,16 @@ no_context(struct pt_regs *regs, unsigned long error_code, - (((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) || - address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) { - unsigned long stack = __this_cpu_ist_top_va(DF) - sizeof(void *); -+ -+ /* -+ * Allow for the possibility that we know what we are doing and -+ * ignore this fault. E.g. the address may come from a source -+ * we cannot trust and it is OK if we cannot access it. -+ */ -+ if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, -+ 14, SIGKILL) == NOTIFY_STOP) -+ return; -+ - /* - * We're likely to be running with very little stack space - * left. It's plausible that we'd hit this condition but -@@ -830,8 +841,13 @@ no_context(struct pt_regs *regs, unsigned long error_code, - oops: - /* - * Oops. The kernel tried to access some bad page. We'll have to -- * terminate things with extreme prejudice: -+ * terminate things with extreme prejudice, unless a notifier decides -+ * to let this one slide. - */ -+ if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, -+ SIGKILL) == NOTIFY_STOP) -+ return; -+ - flags = oops_begin(); - - show_fault_oops(regs, error_code, address); -@@ -1315,6 +1331,10 @@ void do_user_addr_fault(struct pt_regs *regs, - tsk = current; - mm = tsk->mm; - -+ /* -+ * From here on, we know this must be a fault in userspace. -+ */ -+ - /* kprobes don't want to hook the spurious faults: */ - if (unlikely(kprobe_page_fault(regs, X86_TRAP_PF))) - return; -@@ -1341,6 +1361,12 @@ void do_user_addr_fault(struct pt_regs *regs, - return; - } - -+ /* -+ * DTrace doesn't want to either. -+ */ -+ if (unlikely(dtrace_no_pf(regs))) -+ return; -+ - /* - * If we're in an interrupt, have no user context or are running - * in a region with pagefaults disabled then we must not take the fault -diff --git a/fs/exec.c b/fs/exec.c -index 77603ceed51f..4bc77cee8f77 100644 ---- a/fs/exec.c -+++ b/fs/exec.c -@@ -62,6 +62,7 @@ - #include <linux/oom.h> - #include <linux/compat.h> - #include <linux/vmalloc.h> -+#include <linux/dtrace_os.h> - - #include <linux/uaccess.h> - #include <asm/mmu_context.h> -@@ -1827,6 +1828,10 @@ static int __do_execve_file(int fd, struct filename *filename, - goto out; - - /* execve succeeded */ -+ -+ /* Update DTrace per-task data. */ -+ dtrace_task_exec(current); -+ - current->fs->in_exec = 0; - current->in_execve = 0; - rseq_execve(current); -diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h -index 3aefde23dcea..addbb3c7d953 100644 ---- a/include/asm-generic/qrwlock.h -+++ b/include/asm-generic/qrwlock.h -@@ -30,6 +30,26 @@ - extern void queued_read_lock_slowpath(struct qrwlock *lock); - extern void queued_write_lock_slowpath(struct qrwlock *lock); - -+#ifdef CONFIG_DTRACE -+/** -+ * queued_peek_read_can_lock -- would read_trylock() be likely to succeed? -+ * @lock: Pointer to queue rwlock structure -+ */ -+static inline int queued_peek_read_can_lock(struct qrwlock *lock) -+{ -+ return !(atomic_read(&lock->cnts) & _QW_WMASK); -+} -+ -+/** -+ * queued_peek_write_can_lock -- would write_trylock() be likely to succeed? -+ * @lock: Pointer to queue rwlock structure -+ */ -+static inline int queued_peek_write_can_lock(struct qrwlock *lock) -+{ -+ return !atomic_read(&lock->cnts); -+} -+#endif /* CONFIG_DTRACE */ -+ - /** - * queued_read_trylock - try to acquire read lock of a queue rwlock - * @lock : Pointer to queue rwlock structure -@@ -120,6 +140,10 @@ static inline void queued_write_unlock(struct qrwlock *lock) - * Remapping rwlock architecture specific functions to the corresponding - * queue rwlock functions. - */ -+#ifdef CONFIG_DTRACE -+#define arch_peek_read_can_lock(l) queued_peek_read_can_lock(l) -+#define arch_peek_write_can_lock(l) queued_peek_write_can_lock(l) -+#endif /* CONFIG_DTRACE */ - #define arch_read_lock(l) queued_read_lock(l) - #define arch_write_lock(l) queued_write_lock(l) - #define arch_read_trylock(l) queued_read_trylock(l) -diff --git a/include/dtrace/dtrace_impl.h b/include/dtrace/dtrace_impl.h -new file mode 100644 -index 000000000000..2420103c765c ---- /dev/null -+++ b/include/dtrace/dtrace_impl.h -@@ -0,0 +1,1236 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Dynamic Tracing for Linux - Implementation -+ * -+ * Copyright (c) 2009, 2018, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _LINUX_DTRACE_IMPL_H -+#define _LINUX_DTRACE_IMPL_H -+ -+#include <linux/cyclic.h> -+#include <linux/idr.h> -+ -+#include <linux/dtrace/universal.h> -+#include <linux/dtrace/dif.h> -+#include <linux/dtrace/difo_defines.h> -+#include <linux/dtrace/metadesc.h> -+#include <linux/dtrace/stability.h> -+#include <linux/dtrace/helpers.h> -+#include <dtrace/types.h> -+#include <dtrace/provider.h> -+#include <dtrace/dtrace_impl_defines.h> -+ -+struct dtrace_provider { -+ struct dtrace_pattr dtpv_attr; -+ struct dtrace_ppriv dtpv_priv; -+ struct dtrace_pops dtpv_pops; -+ char *dtpv_name; -+ void *dtpv_arg; -+ uint_t dtpv_defunct; -+ struct dtrace_provider *dtpv_next; -+}; -+ -+struct dtrace_predicate { -+ struct dtrace_difo *dtp_difo; -+ dtrace_cacheid_t dtp_cacheid; -+ int dtp_refcnt; -+}; -+ -+struct dtrace_statvar { -+ uint64_t dtsv_data; -+ size_t dtsv_size; -+ int dtsv_refcnt; -+ struct dtrace_difv dtsv_var; -+}; -+ -+struct dtrace_action { -+ dtrace_actkind_t dta_kind; -+ uint16_t dta_intuple; -+ uint32_t dta_refcnt; -+ struct dtrace_difo *dta_difo; -+ struct dtrace_recdesc dta_rec; -+ struct dtrace_action *dta_prev; -+ struct dtrace_action *dta_next; -+}; -+ -+struct dtrace_ecb; -+ -+struct dtrace_probe { -+ dtrace_id_t dtpr_id; -+ struct dtrace_ecb *dtpr_ecb; -+ struct dtrace_ecb *dtpr_ecb_last; -+ void *dtpr_arg; -+ dtrace_cacheid_t dtpr_predcache; -+ int dtpr_aframes; -+ struct dtrace_provider *dtpr_provider; -+ char *dtpr_mod; -+ char *dtpr_func; -+ char *dtpr_name; -+ struct dtrace_probe *dtpr_nextmod; -+ struct dtrace_probe *dtpr_prevmod; -+ struct dtrace_probe *dtpr_nextfunc; -+ struct dtrace_probe *dtpr_prevfunc; -+ struct dtrace_probe *dtpr_nextname; -+ struct dtrace_probe *dtpr_prevname; -+ dtrace_genid_t dtpr_gen; -+}; -+ -+struct dtrace_state; -+ -+struct dtrace_ecb { -+ dtrace_epid_t dte_epid; -+ uint32_t dte_alignment; -+ size_t dte_needed; -+ size_t dte_size; -+ struct dtrace_predicate *dte_predicate; -+ struct dtrace_action *dte_action; -+ struct dtrace_ecb *dte_next; -+ struct dtrace_state *dte_state; -+ uint32_t dte_cond; -+ struct dtrace_probe *dte_probe; -+ struct dtrace_action *dte_action_last; -+ uint64_t dte_uarg; -+}; -+ -+struct dtrace_key { -+ uint64_t dttk_value; -+ uint64_t dttk_size; -+}; -+ -+struct dtrace_tuple { -+ uint32_t dtt_nkeys; -+ uint32_t dtt_pad; -+ struct dtrace_key dtt_key[1]; -+}; -+ -+struct dtrace_dynvar { -+ uint64_t dtdv_hashval; -+ struct dtrace_dynvar *dtdv_next; -+ void *dtdv_data; -+ struct dtrace_tuple dtdv_tuple; -+}; -+ -+struct dtrace_dstate_percpu { -+ struct dtrace_dynvar *dtdsc_free; -+ struct dtrace_dynvar *dtdsc_dirty; -+ struct dtrace_dynvar *dtdsc_rinsing; -+ struct dtrace_dynvar *dtdsc_clean; -+ uint64_t dtdsc_drops; -+ uint64_t dtdsc_dirty_drops; -+ uint64_t dtdsc_rinsing_drops; -+#ifdef CONFIG_64BIT -+ uint64_t dtdsc_pad; -+#else -+ uint64_t dtdsc_pad[2]; -+#endif -+}; -+ -+struct dtrace_dynhash { -+ struct dtrace_dynvar *dtdh_chain; -+ uintptr_t dtdh_lock; -+#ifdef CONFIG_64BIT -+ uintptr_t dtdh_pad[6]; -+#else -+ uintptr_t dtdh_pad[14]; -+#endif -+}; -+ -+struct dtrace_dstate { -+ void *dtds_base; -+ size_t dtds_size; -+ size_t dtds_hashsize; -+ size_t dtds_chunksize; -+ struct dtrace_dynhash *dtds_hash; -+ enum dtrace_dstate_state dtds_state; -+ struct dtrace_dstate_percpu *dtds_percpu; -+}; -+ -+struct dtrace_vstate { -+ struct dtrace_state *dtvs_state; -+ struct dtrace_statvar **dtvs_globals; -+ int dtvs_nglobals; -+ struct dtrace_difv *dtvs_tlocals; -+ int dtvs_ntlocals; -+ struct dtrace_statvar **dtvs_locals; -+ int dtvs_nlocals; -+ struct dtrace_dstate dtvs_dynvars; -+}; -+ -+/* -+ * DTrace Machine State -+ * -+ * In the process of processing a fired probe, DTrace needs to track and/or -+ * cache some per-CPU state associated with that particular firing. This is -+ * state that is always discarded after the probe firing has completed, and -+ * much of it is not specific to any DTrace consumer, remaining valid across -+ * all ECBs. This state is tracked in the dtrace_mstate structure. -+ */ -+ -+struct dtrace_mstate { -+ uintptr_t dtms_scratch_base; -+ uintptr_t dtms_scratch_ptr; -+ size_t dtms_scratch_size; -+ uint32_t dtms_present; -+ uint64_t dtms_arg[7]; -+ dtrace_epid_t dtms_epid; -+ ktime_t dtms_timestamp; -+ int dtms_stackdepth; -+ int dtms_ustackdepth; -+ struct dtrace_probe *dtms_probe; -+ uintptr_t dtms_caller; -+ uint64_t dtms_ucaller; -+ int dtms_ipl; -+ int dtms_fltoffs; -+ uintptr_t dtms_strtok; -+ uint32_t dtms_access; -+ struct dtrace_difo *dtms_difo; -+}; -+ -+struct dtrace_buffer { -+ uint64_t dtb_offset; -+ uint64_t dtb_size; -+ uint32_t dtb_flags; -+ uint32_t dtb_drops; -+ caddr_t dtb_tomax; -+ caddr_t dtb_xamot; -+ uint32_t dtb_xamot_flags; -+ uint32_t dtb_xamot_drops; -+ uint64_t dtb_xamot_offset; -+ uint32_t dtb_errors; -+ uint32_t dtb_xamot_errors; -+#ifndef CONFIG_64BIT -+ uint64_t dtb_pad1; -+#endif -+}; -+ -+struct dtrace_speculation { -+ enum dtrace_speculation_state dtsp_state; -+ int dtsp_cleaning; -+ struct dtrace_buffer *dtsp_buffer; -+}; -+ -+struct dtrace_aggregation { -+ struct dtrace_action dtag_action; -+ dtrace_aggid_t dtag_id; -+ struct dtrace_ecb *dtag_ecb; -+ struct dtrace_action *dtag_first; -+ uint32_t dtag_base; -+ uint8_t dtag_hasarg; -+ uint64_t dtag_initial; -+ void (*dtag_aggregate)(uint64_t *, uint64_t, uint64_t); -+}; -+ -+struct dtrace_cred { -+ const struct cred *dcr_cred; -+ uint8_t dcr_destructive; -+ uint8_t dcr_visible; -+ uint16_t dcr_action; -+}; -+ -+struct dtrace_state { -+ dev_t dts_dev; -+ int dts_necbs; -+ struct dtrace_ecb **dts_ecbs; -+ dtrace_epid_t dts_epid; -+ size_t dts_needed; -+ struct dtrace_state *dts_anon; -+ enum dtrace_activity dts_activity; -+ struct dtrace_vstate dts_vstate; -+ struct dtrace_buffer *dts_buffer; -+ struct dtrace_buffer *dts_aggbuffer; -+ struct dtrace_speculation *dts_speculations; -+ int dts_nspeculations; -+ struct idr dts_agg_idr; -+ int dts_naggs; -+ uint64_t dts_errors; -+ uint32_t dts_speculations_busy; -+ uint32_t dts_speculations_unavail; -+ uint32_t dts_stkstroverflows; -+ uint32_t dts_dblerrors; -+ uint32_t dts_reserve; -+ cyclic_id_t dts_cleaner; -+ cyclic_id_t dts_deadman; -+ ktime_t dts_laststatus; -+ ktime_t dts_alive; -+ char dts_speculates; -+ char dts_destructive; -+ int dts_nformats; -+ char **dts_formats; -+ dtrace_optval_t dts_options[DTRACEOPT_MAX]; -+ struct dtrace_cred dts_cred; -+ size_t dts_nretained; -+}; -+ -+struct dtrace_enabling { -+ struct dtrace_ecbdesc **dten_desc; -+ int dten_ndesc; -+ int dten_maxdesc; -+ struct dtrace_vstate *dten_vstate; -+ dtrace_genid_t dten_probegen; -+ struct dtrace_ecbdesc *dten_current; -+ int dten_error; -+ int dten_primed; -+ struct dtrace_enabling *dten_prev; -+ struct dtrace_enabling *dten_next; -+}; -+ -+typedef int dtrace_probekey_f(const char *, const char *, int); -+ -+struct dtrace_probekey { -+ const char *dtpk_prov; -+ dtrace_probekey_f *dtpk_pmatch; -+ const char *dtpk_mod; -+ dtrace_probekey_f *dtpk_mmatch; -+ const char *dtpk_func; -+ dtrace_probekey_f *dtpk_fmatch; -+ const char *dtpk_name; -+ dtrace_probekey_f *dtpk_nmatch; -+ dtrace_id_t dtpk_id; -+}; -+ -+struct dtrace_hashbucket { -+ struct dtrace_hashbucket *dthb_next; -+ struct dtrace_probe *dthb_chain; -+ int dthb_len; -+}; -+ -+struct dtrace_hash { -+ struct dtrace_hashbucket **dth_tab; -+ int dth_size; -+ int dth_mask; -+ int dth_nbuckets; -+ uintptr_t dth_nextoffs; -+ uintptr_t dth_prevoffs; -+ uintptr_t dth_stroffs; -+}; -+ -+/* -+ * DTrace supports safe loads from probe context; if the address turns out to -+ * be invalid, a bit will be set by the kernel indicating that DTrace -+ * encountered a memory error, and DTrace will propagate the error to the user -+ * accordingly. However, there may exist some regions of memory in which an -+ * arbitrary load can change system state, and from which it is impossible to -+ * recover from such a load after it has been attempted. Examples of this may -+ * include memory in which programmable I/O registers are mapped (for which a -+ * read may have some implications for the device) or (in the specific case of -+ * UltraSPARC-I and -II) the virtual address hole. The platform is required -+ * to make DTrace aware of these toxic ranges; DTrace will then check that -+ * target addresses are not in a toxic range before attempting to issue a -+ * safe load. -+ */ -+struct dtrace_toxrange { -+ uintptr_t dtt_base; -+ uintptr_t dtt_limit; -+}; -+ -+/* -+ * DTrace Helper Implementation -+ * -+ * A description of the helper architecture may be found in <linux/dtrace.h>. -+ * Each process contains a pointer to its helpers in its dtrace_helpers -+ * member. This is a pointer to a dtrace_helpers structure, which contains an -+ * array of pointers to dtrace_helper structures, helper variable state (shared -+ * among a process's helpers) and a generation count. (The generation count is -+ * used to provide an identifier when a helper is added so that it may be -+ * subsequently removed.) The dtrace_helper structure is self-explanatory, -+ * containing pointers to the objects needed to execute the helper. Note that -+ * helpers are _duplicated_ across fork(2), and destroyed on exec(2). No more -+ * than dtrace_helpers_max are allowed per-process. -+ */ -+struct dtrace_helper_action { -+ int dtha_generation; /* helper action generation */ -+ int dtha_nactions; /* number of actions */ -+ struct dtrace_difo *dtha_predicate; /* helper action predicate */ -+ struct dtrace_difo **dtha_actions; /* array of actions */ -+ struct dtrace_helper_action *dtha_next; /* next helper action */ -+}; -+ -+struct dtrace_helper_provider { -+ int dthp_generation; /* helper provider generation */ -+ uint32_t dthp_ref; /* reference count */ -+ struct dof_helper dthp_prov; /* DOF w/ provider and probes */ -+}; -+ -+struct dtrace_helpers { -+ struct dtrace_helper_action **dthps_actions; /* helper actions array */ -+ struct dtrace_vstate dthps_vstate; /* helper action var. state */ -+ struct dtrace_helper_provider **dthps_provs; /* providers array */ -+ uint_t dthps_nprovs; /* count of providers */ -+ uint_t dthps_maxprovs; /* provider array size */ -+ int dthps_generation; /* current generation */ -+ pid_t dthps_pid; /* pid of associated proc */ -+ int dthps_deferred; /* helper in deferred list */ -+ struct dtrace_helpers *dthps_next; /* next pointer */ -+ struct dtrace_helpers *dthps_prev; /* prev pointer */ -+}; -+ -+/* -+ * DTrace Helper Action Tracing -+ * -+ * Debugging helper actions can be arduous. To ease the development and -+ * debugging of helpers, DTrace contains a tracing-framework-within-a-tracing- -+ * framework: helper tracing. If dtrace_helptrace_enabled is non-zero (which -+ * it is by default on DEBUG kernels), all helper activity will be traced to a -+ * global, in-kernel ring buffer. Each entry includes a pointer to the specific -+ * helper, the location within the helper, and a trace of all local variables. -+ * The ring buffer may be displayed in a human-readable format with the -+ * ::dtrace_helptrace mdb(1) dcmd. -+ */ -+struct dtrace_helptrace { -+ struct dtrace_helper_action *dtht_helper; /* helper action */ -+ int dtht_where; /* where in helper action */ -+ int dtht_nlocals; /* number of locals */ -+ int dtht_fault; /* type of fault (if any) */ -+ int dtht_fltoffs; /* DIF offset */ -+ uint64_t dtht_illval; /* faulting value */ -+ uint64_t dtht_locals[1]; /* local variables */ -+}; -+ -+extern struct mutex dtrace_lock; -+extern struct mutex dtrace_provider_lock; -+extern struct mutex dtrace_meta_lock; -+ -+extern dtrace_genid_t dtrace_probegen; -+extern struct kmem_cache *dtrace_probe_cachep; -+ -+extern struct dtrace_pops dtrace_provider_ops; -+ -+extern int dtrace_opens; -+extern int dtrace_err_verbose; -+ -+extern struct dtrace_toxrange *dtrace_toxrange; -+extern int dtrace_toxranges; -+ -+extern void dtrace_nullop(void); -+extern int dtrace_enable_nullop(void); -+extern int dtrace_istoxic(uintptr_t, size_t); -+ -+/* -+ * DTrace Probe Context Functions -+ */ -+ -+extern void dtrace_panic(const char *, ...); -+extern int dtrace_assfail(const char *, const char *, int); -+extern void dtrace_aggregate_min(uint64_t *, uint64_t, uint64_t); -+extern void dtrace_aggregate_max(uint64_t *, uint64_t, uint64_t); -+extern void dtrace_aggregate_quantize(uint64_t *, uint64_t, uint64_t); -+extern void dtrace_aggregate_lquantize(uint64_t *, uint64_t, uint64_t); -+extern void dtrace_aggregate_llquantize(uint64_t *, uint64_t, uint64_t); -+extern void dtrace_aggregate_avg(uint64_t *, uint64_t, uint64_t); -+extern void dtrace_aggregate_stddev(uint64_t *, uint64_t, uint64_t); -+extern void dtrace_aggregate_count(uint64_t *, uint64_t, uint64_t); -+extern void dtrace_aggregate_sum(uint64_t *, uint64_t, uint64_t); -+extern void dtrace_aggregate(struct dtrace_aggregation *, -+ struct dtrace_buffer *, -+ intptr_t, struct dtrace_buffer *, uint64_t, -+ uint64_t); -+ -+/* -+ * DTrace Probe Hashing Functions -+ */ -+ -+extern struct dtrace_hash *dtrace_hash_create(uintptr_t, uintptr_t, uintptr_t); -+extern void dtrace_hash_destroy(struct dtrace_hash *); -+extern int dtrace_hash_add(struct dtrace_hash *, struct dtrace_probe *); -+extern struct dtrace_probe *dtrace_hash_lookup(struct dtrace_hash *, -+ struct dtrace_probe *); -+extern int dtrace_hash_collisions(struct dtrace_hash *, struct dtrace_probe *); -+extern void dtrace_hash_remove(struct dtrace_hash *, struct dtrace_probe *); -+ -+/* -+ * DTrace Speculation Functions -+ */ -+extern int dtrace_speculation(struct dtrace_state *); -+extern void dtrace_speculation_commit(struct dtrace_state *, processorid_t, -+ dtrace_specid_t); -+extern void dtrace_speculation_discard(struct dtrace_state *, processorid_t, -+ dtrace_specid_t); -+extern void dtrace_speculation_clean(struct dtrace_state *); -+extern struct dtrace_buffer *dtrace_speculation_buffer(struct dtrace_state *, -+ processorid_t, -+ dtrace_specid_t); -+ -+/* -+ * DTrace Non-Probe Context Utility Functions -+ */ -+ -+/* -+ * DTrace Matching Functions -+ */ -+extern struct dtrace_hash *dtrace_bymod; -+extern struct dtrace_hash *dtrace_byfunc; -+extern struct dtrace_hash *dtrace_byname; -+ -+extern int dtrace_match_priv(const struct dtrace_probe *, uint32_t, kuid_t); -+extern int dtrace_match_probe(const struct dtrace_probe *, -+ const struct dtrace_probekey *, uint32_t, -+ kuid_t); -+extern int dtrace_match_glob(const char *, const char *, int); -+extern int dtrace_match_string(const char *, const char *, int); -+extern int dtrace_match_nul(const char *, const char *, int); -+extern int dtrace_match_nonzero(const char *, const char *, int); -+extern int dtrace_match(const struct dtrace_probekey *, uint32_t, kuid_t, -+ int (*matched)(struct dtrace_probe *, void *), void *); -+extern void dtrace_probekey(const struct dtrace_probedesc *, -+ struct dtrace_probekey *); -+ -+/* -+ * DTrace Provider-to-Framework API Functions -+ */ -+ -+extern struct dtrace_provider *dtrace_provider; -+extern struct dtrace_meta *dtrace_meta_pid; -+extern struct dtrace_helpers *dtrace_deferred_pid; -+ -+/* -+ * DTrace Privilege Check Functions -+ */ -+extern int dtrace_priv_proc_destructive(struct dtrace_state *); -+extern int dtrace_priv_proc_control(struct dtrace_state *); -+extern int dtrace_priv_proc(struct dtrace_state *); -+extern int dtrace_priv_kernel(struct dtrace_state *); -+ -+/* -+ * DTrace Probe Management Functions -+ */ -+ -+extern int dtrace_probe_enable(const struct dtrace_probedesc *, -+ struct dtrace_enabling *); -+extern void dtrace_probe_description(const struct dtrace_probe *, -+ struct dtrace_probedesc *); -+extern void dtrace_probe_provide(struct dtrace_probedesc *, -+ struct dtrace_provider *); -+extern int dtrace_probe_init(void); -+extern void dtrace_probe_exit(void); -+extern void dtrace_probe_remove_id(dtrace_id_t); -+extern struct dtrace_probe *dtrace_probe_lookup_id(dtrace_id_t); -+extern struct dtrace_probe *dtrace_probe_get_next(dtrace_id_t *); -+extern int dtrace_probe_for_each(int (*)(int, void *, void *), void *); -+ -+/* -+ * DTrace Kernel Hooks -+ */ -+extern void (*dtrace_modload)(struct module *); -+extern void (*dtrace_modunload)(struct module *); -+ -+extern uint8_t dtrace_load8(uintptr_t); -+extern uint16_t dtrace_load16(uintptr_t); -+extern uint32_t dtrace_load32(uintptr_t); -+extern uint64_t dtrace_load64(uintptr_t); -+#ifdef CONFIG_64BIT -+#define dtrace_loadptr dtrace_load64 -+#else -+#define dtrace_loadptr dtrace_load32 -+#endif -+ -+ -+extern void dtrace_bzero(void *, size_t); -+ -+extern int dtrace_vcanload(void *, struct dtrace_diftype *, -+ struct dtrace_mstate *, -+ struct dtrace_vstate *); -+extern int dtrace_canload(uintptr_t, size_t, struct dtrace_mstate *, -+ struct dtrace_vstate *); -+ -+extern int dtrace_difo_validate(struct dtrace_difo *, struct dtrace_vstate *, -+ uint_t, const struct cred *); -+extern int dtrace_difo_validate_helper(struct dtrace_difo *); -+extern int dtrace_difo_cacheable(struct dtrace_difo *); -+extern void dtrace_difo_hold(struct dtrace_difo *); -+extern void dtrace_difo_init(struct dtrace_difo *, struct dtrace_vstate *); -+extern struct dtrace_difo *dtrace_difo_duplicate(struct dtrace_difo *, -+ struct dtrace_vstate *); -+extern void dtrace_difo_release(struct dtrace_difo *, struct dtrace_vstate *); -+ -+extern uint64_t dtrace_vtime_references; -+ -+extern uint64_t dtrace_dif_emulate(struct dtrace_difo *, -+ struct dtrace_mstate *, -+ struct dtrace_vstate *, -+ struct dtrace_state *); -+ -+/* -+ * DTrace Format Functions -+ */ -+extern uint16_t dtrace_format_add(struct dtrace_state *, char *); -+extern void dtrace_format_remove(struct dtrace_state *, uint16_t); -+extern void dtrace_format_destroy(struct dtrace_state *); -+ -+/* -+ * DTrace Predicate Functions -+ */ -+extern struct dtrace_predicate *dtrace_predicate_create(struct dtrace_difo *); -+extern void dtrace_predicate_hold(struct dtrace_predicate *); -+extern void dtrace_predicate_release(struct dtrace_predicate *, -+ struct dtrace_vstate *); -+ -+/* -+ * DTrace Action Description Functions -+ */ -+extern struct dtrace_actdesc *dtrace_actdesc_create(dtrace_actkind_t, uint32_t, -+ uint64_t, uint64_t); -+extern void dtrace_actdesc_hold(struct dtrace_actdesc *); -+extern void dtrace_actdesc_release(struct dtrace_actdesc *, -+ struct dtrace_vstate *); -+ -+/* -+ * DTrace Helper Functions -+ */ -+extern void dtrace_helpers_destroy(struct task_struct *); -+extern void dtrace_helpers_duplicate(struct task_struct *, -+ struct task_struct *); -+extern uint64_t dtrace_helper(int, struct dtrace_mstate *, -+ struct dtrace_state *, -+ uint64_t, uint64_t); -+ -+/* -+ * DTrace ECB Functions -+ */ -+extern struct dtrace_ecb *dtrace_ecb_create_cache; -+ -+extern int dtrace_ecb_create_enable(struct dtrace_probe *, void *); -+extern void dtrace_ecb_disable(struct dtrace_ecb *); -+extern void dtrace_ecb_destroy(struct dtrace_ecb *); -+extern void dtrace_ecb_resize(struct dtrace_ecb *); -+extern int dtrace_ecb_enable(struct dtrace_ecb *); -+extern struct dtrace_ecb *dtrace_epid2ecb(struct dtrace_state *, -+ dtrace_epid_t); -+extern struct dtrace_aggregation *dtrace_aggid2agg(struct dtrace_state *, -+ dtrace_aggid_t); -+ -+/* -+ * DTrace Buffer Functions -+ * -+ * DTrace Buffers -+ * -+ * Principal buffers, aggregation buffers, and speculative buffers are all -+ * managed with the dtrace_buffer structure. By default, this structure -+ * includes twin data buffers -- dtb_tomax and dtb_xamot -- that serve as the -+ * active and passive buffers, respectively. For speculative buffers, -+ * dtb_xamot will be NULL; for "ring" and "fill" buffers, dtb_xamot will point -+ * to a scratch buffer. For all buffer types, the dtrace_buffer structure is -+ * always allocated on a per-CPU basis; a single dtrace_buffer structure is -+ * never shared among CPUs. (That is, there is never true sharing of the -+ * dtrace_buffer structure; to prevent false sharing of the structure, it must -+ * always be aligned to the coherence granularity -- generally 64 bytes.) -+ * -+ * One of the critical design decisions of DTrace is that a given ECB always -+ * stores the same quantity and type of data. This is done to assure that the -+ * only metadata required for an ECB's traced data is the EPID. That is, from -+ * the EPID, the consumer can determine the data layout. (The data buffer -+ * layout is shown schematically below.) By assuring that one can determine -+ * data layout from the EPID, the metadata stream can be separated from the -+ * data stream -- simplifying the data stream enormously. -+ * -+ * base of data buffer ---> +------+--------------------+------+ -+ * | EPID | data | EPID | -+ * +------+--------+------+----+------+ -+ * | data | EPID | data | -+ * +---------------+------+-----------+ -+ * | data, cont. | -+ * +------+--------------------+------+ -+ * | EPID | data | | -+ * +------+--------------------+ | -+ * | || | -+ * | || | -+ * | \/ | -+ * : : -+ * . . -+ * . . -+ * . . -+ * : : -+ * | | -+ * limit of data buffer ---> +----------------------------------+ -+ * -+ * When evaluating an ECB, dtrace_probe() determines if the ECB's needs of the -+ * principal buffer (both scratch and payload) exceed the available space. If -+ * the ECB's needs exceed available space (and if the principal buffer policy -+ * is the default "switch" policy), the ECB is dropped, the buffer's drop count -+ * is incremented, and processing advances to the next ECB. If the ECB's needs -+ * can be met with the available space, the ECB is processed, but the offset in -+ * the principal buffer is only advanced if the ECB completes processing -+ * without error. -+ * -+ * When a buffer is to be switched (either because the buffer is the principal -+ * buffer with a "switch" policy or because it is an aggregation buffer), a -+ * cross call is issued to the CPU associated with the buffer. In the cross -+ * call context, interrupts are disabled, and the active and the inactive -+ * buffers are atomically switched. This involves switching the data pointers, -+ * copying the various state fields (offset, drops, errors, etc.) into their -+ * inactive equivalents, and clearing the state fields. Because interrupts are -+ * disabled during this procedure, the switch is guaranteed to appear atomic to -+ * dtrace_probe(). -+ * -+ * DTrace Ring Buffering -+ * -+ * To process a ring buffer correctly, one must know the oldest valid record. -+ * Processing starts at the oldest record in the buffer and continues until -+ * the end of the buffer is reached. Processing then resumes starting with -+ * the record stored at offset 0 in the buffer, and continues until the -+ * youngest record is processed. If trace records are of a fixed-length, -+ * determining the oldest record is trivial: -+ * -+ * - If the ring buffer has not wrapped, the oldest record is the record -+ * stored at offset 0. -+ * -+ * - If the ring buffer has wrapped, the oldest record is the record stored -+ * at the current offset. -+ * -+ * With variable length records, however, just knowing the current offset -+ * doesn't suffice for determining the oldest valid record: assuming that one -+ * allows for arbitrary data, one has no way of searching forward from the -+ * current offset to find the oldest valid record. (That is, one has no way -+ * of separating data from metadata.) It would be possible to simply refuse to -+ * process any data in the ring buffer between the current offset and the -+ * limit, but this leaves (potentially) an enormous amount of otherwise valid -+ * data unprocessed. -+ * -+ * To effect ring buffering, we track two offsets in the buffer: the current -+ * offset and the _wrapped_ offset. If a request is made to reserve some -+ * amount of data, and the buffer has wrapped, the wrapped offset is -+ * incremented until the wrapped offset minus the current offset is greater -+ * than or equal to the reserve request. This is done by repeatedly looking -+ * up the ECB corresponding to the EPID at the current wrapped offset, and -+ * incrementing the wrapped offset by the size of the data payload -+ * corresponding to that ECB. If this offset is greater than or equal to the -+ * limit of the data buffer, the wrapped offset is set to 0. Thus, the -+ * current offset effectively "chases" the wrapped offset around the buffer. -+ * Schematically: -+ * -+ * base of data buffer ---> +------+--------------------+------+ -+ * | EPID | data | EPID | -+ * +------+--------+------+----+------+ -+ * | data | EPID | data | -+ * +---------------+------+-----------+ -+ * | data, cont. | -+ * +------+---------------------------+ -+ * | EPID | data | -+ * current offset ---> +------+---------------------------+ -+ * | invalid data | -+ * wrapped offset ---> +------+--------------------+------+ -+ * | EPID | data | EPID | -+ * +------+--------+------+----+------+ -+ * | data | EPID | data | -+ * +---------------+------+-----------+ -+ * : : -+ * . . -+ * . ... valid data ... . -+ * . . -+ * : : -+ * +------+-------------+------+------+ -+ * | EPID | data | EPID | data | -+ * +------+------------++------+------+ -+ * | data, cont. | leftover | -+ * limit of data buffer ---> +-------------------+--------------+ -+ * -+ * If the amount of requested buffer space exceeds the amount of space -+ * available between the current offset and the end of the buffer: -+ * -+ * (1) all words in the data buffer between the current offset and the limit -+ * of the data buffer (marked "leftover", above) are set to -+ * DTRACE_EPIDNONE -+ * -+ * (2) the wrapped offset is set to zero -+ * -+ * (3) the iteration process described above occurs until the wrapped offset -+ * is greater than the amount of desired space. -+ * -+ * The wrapped offset is implemented by (re-)using the inactive offset. -+ * In a "switch" buffer policy, the inactive offset stores the offset in -+ * the inactive buffer; in a "ring" buffer policy, it stores the wrapped -+ * offset. -+ * -+ * DTrace Scratch Buffering -+ * -+ * Some ECBs may wish to allocate dynamically-sized temporary scratch memory. -+ * To accommodate such requests easily, scratch memory may be allocated in -+ * the buffer beyond the current offset plus the needed memory of the current -+ * ECB. If there isn't sufficient room in the buffer for the requested amount -+ * of scratch space, the allocation fails and an error is generated. Scratch -+ * memory is tracked in the dtrace_mstate_t and is automatically freed when -+ * the ECB ceases processing. Note that ring buffers cannot allocate their -+ * scratch from the principal buffer -- lest they needlessly overwrite older, -+ * valid data. Ring buffers therefore have their own dedicated scratch buffer -+ * from which scratch is allocated. -+ */ -+ -+extern void dtrace_buffer_switch(struct dtrace_buffer *); -+extern void dtrace_buffer_activate(struct dtrace_state *); -+extern int dtrace_buffer_alloc(struct dtrace_buffer *, size_t, int, -+ processorid_t); -+extern void dtrace_buffer_drop(struct dtrace_buffer *); -+extern intptr_t dtrace_buffer_reserve(struct dtrace_buffer *, size_t, size_t, -+ struct dtrace_state *, -+ struct dtrace_mstate *); -+extern void dtrace_buffer_polish(struct dtrace_buffer *); -+extern void dtrace_buffer_free(struct dtrace_buffer *); -+ -+/* -+ * DTrace framework/probe data synchronization -+ * ------------------------------------------- -+ * -+ * The dtrace_sync() facility is used to synchronize global DTrace framework -+ * data with DTrace probe context. The framework updates data and then calls -+ * dtrace_sync(). dtrace_sync() loops until it observes all CPUs have been out -+ * of probe context at least once. This ensures all consumers are using the -+ * updated data. -+ * -+ * DTrace probes have several requirements. First DTrace probe context cannot -+ * block. DTrace probes execute with interrupts disabled. Locks cannot be -+ * acquired in DTrace probe context. A second requirement is that DTrace -+ * probes need to be as high performance as possible to minimize the effect of -+ * enabled probes. -+ * -+ * DTrace framework data changes have their own requirements. DTrace data -+ * changes/syncs are extremely infrequent compared to DTrace probe firings. -+ * Probes can be in commonly executed code. A good trade-off is to favor -+ * DTrace probe context performance over DTrace sync performance. -+ * -+ * To meet the above requirements, the DTrace data synchronization algorithm -+ * is lock-less. The DTrace probe path is wait-free. The DTrace probe path -+ * is memory-barrier-free in the common case to minimize probe effect. -+ * dtrace_probe has been made membar free in the common case by adding a read -+ * in dtrace_probe and adding an additional write and membar to dtrace_sync(). -+ * -+ * A simple algorithm is to have dtrace_probe set a flag for its CPU when -+ * entering DTrace probe context and clear the flag when it exits DTrace probe -+ * context. A producer of DTrace framework data checks the flag to detect and -+ * synchronize with probe context. Unfortunately memory ordering issues -+ * complicate the implementation. Memory barriers are required in probe -+ * context for this simple approach to work. -+ * -+ * A simple implementation to sync with one CPU that works with any memory -+ * ordering model is: -+ * -+ * DTrace probe: -+ * 1. CPU->in_probe_context = B_TRUE; -+ * 2. dtrace_membar_enter()// membar #StoreLoad|#StoreStore -+ * 3. access framework shared data// critical section -+ * 4. dtrace_membar_exit()// membar #LoadStore|#StoreStore -+ * 5. CPU->in_probe_context = B_FALSE; -+ * -+ * DTrace framework dtrace_sync: -+ * 0. update framework shared data -+ * 1. dtrace_membar_enter()// membar #StoreLoad|#StoreStore -+ * 2. while (CPU->in_probe_context == B_TRUE) -+ * 3. spin -+ * 4. dtrace_membar_exit()// membar #LoadStore|#StoreStore -+ * 5. produce shared dtrace data -+ * -+ * A note on memory ordering -+ * ------------------------- -+ * -+ * dtrace_membar_enter() guarantees later loads cannot complete before earlier -+ * stores, and it guarantees later stores cannot complete before earlier stores. -+ * dtrace_membar_enter() is, in SPARC parlance, a membar #StoreLoad|#StoreStore. -+ * -+ * dtrace_membar_exit() guarantees later stores cannot complete before earlier -+ * loads, and it guarantees later stores cannot complete before earlier stores. -+ * dtrace_membar_exit() is, in SPARC parlance, a membar #LoadStore|#StoreStore. -+ * -+ * Please see the SPARC and Intel processor guides on memory ordering. -+ * All sun4v and Fujitsu processors are TSO (Total Store Order). Modern -+ * supported Intel and AMD processors have similar load and store ordering -+ * to SPARC. All processors currently supported by Solaris have these memory -+ * ordering properties: -+ * 1) Loads are ordered with respect to earlier loads. -+ * 2) Stores are ordered with respect to earlier stores. -+ * 3a) SPARC Atomic load-store behaves as if it were followed by a -+ * MEMBAR #LoadLoad, #LoadStore, and #StoreStore. -+ * 3b) X86 Atomic operations serialize load and store. -+ * 4) Stores cannot bypass earlier loads. -+ * -+ * The above implementation details allow the membars to be simplified thus: -+ * A) dtrace_membar_enter() can be reduced to "membar #StoreLoad" on sparc. -+ * See property number 4 above. -+ * Since dtrace_membar_enter() is an atomic operation on x86, it cannot be -+ * reduced further. -+ * B) dtrace_membar_exit() becomes a NOP on both SPARC and x86. -+ * See properties 2 and 4. -+ * -+ * -+ * Elimination of membar #StoreLoad from dtrace probe context -+ * ---------------------------------------------------------- -+ * -+ * Furthermore it is possible to eliminate all memory barriers from the common -+ * dtrace_probe() entry case. The only membar needed in dtrace_probe is there -+ * to prevent Loads of global DTrace framework data from passing the Store to -+ * the "in_probe_context" flag (i.e. the dtrace_membar_enter()). -+ * A Load at the beginning of the algorithm is also ordered with these later -+ * Loads and Stores: the membar #StoreLoad can be replaced with a early Load of -+ * a "sync_request" flag and a conditional branch on the flag value. -+ * -+ * dtrace_sync() first Stores to the "sync_request" flag, and dtrace_probe() -+ * starts by Loading the flag. This Load in dtrace_probe() of "sync_request" -+ * is ordered with its later Store to the "in_probe_context" flag and -+ * dtrace_probe's later Loads of DTrace framework data. dtrace_probe() only -+ * needs a membar #StoreLoad iff the "sync_request" flag is set. -+ * -+ * Optimized Synchronization Algorithm -+ * ----------------------------------- -+ * -+ * DTrace probe: -+ * + 1a. request_flag = CPU->sync_request // Load -+ * 1b. CPU->in_probe_context = B_TRUE // Store -+ * + 2. if request_flag > 0 -+ * dtrace_membar_enter() // membar #StoreLoad -+ * 3. access framework shared data // critical section -+ * - -+ * 5. CPU->in_probe_context = B_FALSE // Store -+ * -+ * DTrace framework dtrace_sync: -+ * + 1a. atomically add 1 to CPU->sync_request // Store and -+ * 1b. dtrace_membar_enter() // membar #StoreLoad -+ * 2. while (CPU->in_probe_context == B_TRUE) // Load -+ * 3. spin -+ * + 4a. atomically subtract 1 from CPU->sync_request // Load + Store -+ * - -+ * 5. produce shared dtrace data -+ * -+ * This algorithm has been proven correct by analysis of all interleaving -+ * scenarios of the above operations with the hardware memory ordering -+ * described above. -+ * -+ * The Load and store of the flag pair is very inexpensive. The cacheline with -+ * the flag pair is never accessed by a different CPU except by dtrace_sync. -+ * dtrace_sync is very uncommon compared to typical probe firings. The removal -+ * of membars from DTrace probe context at the expense of a Load and Store and -+ * a conditional branch is a good performance win. -+ * -+ * As implemented there is one pair of flags per CPU. The flags are in one -+ * cacheline; they could be split into two cachelines if dtrace_sync was more -+ * common. dtrace_sync loops over all NCPU sets of flags. dtrace_sync lazily -+ * only does one dtrace_membar_enter() (step 1b) after setting all NCPU -+ * sync_request flags. -+ * -+ * Sample aliasing could cause dtrace_sync() to always sample a CPU's -+ * in_probe_context flag when the CPU is in probe context even if the CPU -+ * left and returned to probe context one or more times since the last sample. -+ * cpuc_in_probe_ctxt is implemented as an even/odd counter instead of a -+ * boolean flag. cpuc_in_probe_ctxt is odd when in probe context and even -+ * when not in probe context. Probe context increments cpuc_in_probe_ctxt when -+ * entering and exiting. dtrace_probe() handles re-entry by not increment the -+ * counter for re-enterant entry and exit. -+ */ -+ -+/* -+ * dtrace_membar_exit() is a NOP on current SPARC and X86 hardware. -+ * It is defined as an inline asm statement to prevent the C optimizer from -+ * moving C statements around the membar. -+ */ -+#define dtrace_membar_exit() \ -+ __asm__ __volatile__("" ::: "memory") -+ -+/* -+ * dtrace_membar_enter() does not need an explicit membar #StoreStore because -+ * modern SPARC hardware is TSO: stores are ordered with other stores. -+ */ -+#define dtrace_membar_enter() \ -+ mb() -+ -+#define dtrace_safe_smt_pause() \ -+ cpu_relax() -+ -+/* -+ * Used by dtrace_probe() to flag entry to the the critical section. -+ * dtrace_probe() context may be consuming DTrace framework data. -+ * -+ * cpuc_in_probe_ctxt is odd when in probe context and even when not in -+ * probe context. The flag must not be incremented when re-entering from -+ * probe context. -+ */ -+#define DTRACE_SYNC_ENTER_CRITICAL(cookie, re_entry) \ -+{ \ -+ uint64_t requests; \ -+ uint64_t count; \ -+ \ -+ preempt_disable(); \ -+ local_irq_save(cookie); \ -+ \ -+ requests = atomic64_read(&this_cpu_core->cpuc_sync_requests); \ -+ \ -+ /* Increment flag iff it is even */ \ -+ count = atomic64_read(&this_cpu_core->cpuc_in_probe_ctx); \ -+ re_entry = count & 0x1; \ -+ atomic64_set(&this_cpu_core->cpuc_in_probe_ctx, count | 0x1); \ -+ ASSERT(DTRACE_SYNC_IN_CRITICAL(smp_processor_id())); \ -+ \ -+ /* \ -+ * Later Loads are ordered with respect to the Load of \ -+ * cpuc_sync_requests. The Load is also guaranteed to complete \ -+ * before the store to cpuc_in_probe_ctxt. Thus a member_enter \ -+ * is only needed when requests is not 0. This is very \ -+ * uncommon. \ -+ */ \ -+ if (requests > 0) { \ -+ dtrace_membar_enter(); \ -+ } \ -+} -+ -+/* -+ * Used by dtrace_probe() to flag exit from the critical section. -+ * dtrace_probe context is no longer using DTrace framework data. -+ */ -+#define DTRACE_SYNC_EXIT_CRITICAL(cookie, re_entry) \ -+{ \ -+ dtrace_membar_exit(); \ -+ ASSERT((re_entry | 0x1) == 0x1); \ -+ \ -+ /* \ -+ * flag must not be incremented when returning to probe context.\ -+ */ \ -+ atomic64_add(~re_entry & 0x1, &this_cpu_core->cpuc_in_probe_ctx); \ -+ ASSERT(re_entry == \ -+ (atomic64_read(&this_cpu_core->cpuc_in_probe_ctx) & 0x1)); \ -+ local_irq_restore(cookie); \ -+ preempt_enable(); \ -+} -+ -+/* -+ * Used by dtrace_sync to inform dtrace_probe it needs to synchronize with -+ * dtrace_sync. dtrace_probe consumes the cpuc_sync_requests flag to determine -+ * if it needs a membar_enter. Not called from probe context. -+ * -+ * cpuc_sync_requests must be updated atomically by dtrace_sync because there -+ * may be multiple dtrace_sync operations executing at the same time. -+ * cpuc_sync_requests is a simple count of the number of concurrent -+ * dtrace_sync requests. -+ */ -+#define DTRACE_SYNC_START(cpuid) \ -+{ \ -+ atomic64_add(1, &(per_cpu_core(cpuid))->cpuc_sync_requests); \ -+ ASSERT(atomic64_read(&per_cpu_core(cpuid)->cpuc_sync_requests) > 0); \ -+} -+ -+/* -+ * Used by dtrace_sync to flag dtrace_probe that it no longer needs to -+ * synchronize with dtrace_sync. Not called from probe context. -+ */ -+#define DTRACE_SYNC_END(cpuid) \ -+{ \ -+ atomic64_add(-1, &(per_cpu_core(cpuid))->cpuc_sync_requests); \ -+ ASSERT(atomic64_read(&per_cpu_core(cpuid)->cpuc_sync_requests) >= 0); \ -+} -+ -+/* -+ * The next two macros are used by dtrace_sync to check if the target CPU is in -+ * DTrace probe context. cpuc_in_probe_ctxt is a monotonically increasing -+ * count which dtrace_probe() increments when entering and exiting probe -+ * context. The flag is odd when in probe context, and even when not in probe -+ * context. -+ */ -+#define DTRACE_SYNC_IN_CRITICAL(cpuid) \ -+ (atomic64_read(&per_cpu_core(cpuid)->cpuc_in_probe_ctx) & 0x1) -+ -+/* -+ * Used to check if the target CPU left and then entered probe context again. -+ */ -+#define DTRACE_SYNC_CRITICAL_COUNT(cpuid) \ -+ (atomic64_read(&per_cpu_core(cpuid)->cpuc_in_probe_ctx)) -+ -+/* -+ * The next three macros are bitmap operations used by dtrace_sync to keep track -+ * of which CPUs it still needs to synchronize with. -+ */ -+#define DTRACE_SYNC_OUTSTANDING(cpuid, bitmap) \ -+ (cpumask_test_cpu(cpuid, bitmap) == 1) -+ -+#define DTRACE_SYNC_NEEDED(cpuid, bitmap) \ -+ cpumask_set_cpu(cpuid, bitmap) -+ -+#define DTRACE_SYNC_DONE(cpuid, bitmap) \ -+ cpumask_clear_cpu(cpuid, bitmap) -+ -+extern uint64_t dtrace_sync_sample_count; -+extern void dtrace_sync(void); -+ -+/* -+ * DTrace Enabling Functions -+ */ -+extern struct dtrace_enabling *dtrace_retained; -+extern dtrace_genid_t dtrace_retained_gen; -+ -+extern struct dtrace_enabling *dtrace_enabling_create(struct dtrace_vstate *); -+extern void dtrace_enabling_add(struct dtrace_enabling *, -+ struct dtrace_ecbdesc *); -+extern void dtrace_enabling_dump(struct dtrace_enabling *); -+extern void dtrace_enabling_destroy(struct dtrace_enabling *); -+extern int dtrace_enabling_retain(struct dtrace_enabling *); -+extern int dtrace_enabling_replicate(struct dtrace_state *, -+ struct dtrace_probedesc *, -+ struct dtrace_probedesc *); -+extern void dtrace_enabling_retract(struct dtrace_state *); -+extern int dtrace_enabling_match(struct dtrace_enabling *, int *); -+extern void dtrace_enabling_matchall(void); -+extern void dtrace_enabling_prime(struct dtrace_state *); -+extern void dtrace_enabling_provide(struct dtrace_provider *); -+ -+/* -+ * DOF functions -+ */ -+extern void dtrace_dof_error(struct dof_hdr *, const char *); -+extern struct dof_hdr *dtrace_dof_create(struct dtrace_state *); -+extern struct dof_hdr *dtrace_dof_copyin(void __user *, int *); -+extern struct dof_hdr *dtrace_dof_property(const char *); -+extern void dtrace_dof_destroy(struct dof_hdr *); -+extern int dtrace_dof_slurp(struct dof_hdr *, struct dtrace_vstate *, -+ const struct cred *, struct dtrace_enabling **, -+ uint64_t, int); -+extern int dtrace_dof_options(struct dof_hdr *, struct dtrace_state *); -+extern void dtrace_helper_provide(struct dof_helper *dhp, pid_t pid); -+extern int dtrace_helper_slurp(struct dof_hdr *, struct dof_helper *); -+extern int dtrace_helper_destroygen(int); -+ -+/* -+ * DTrace Anonymous Enabling Functions -+ */ -+struct dtrace_anon { -+ struct dtrace_state *dta_state; -+ struct dtrace_enabling *dta_enabling; -+ processorid_t dta_beganon; -+}; -+ -+extern struct dtrace_anon dtrace_anon; -+ -+extern struct dtrace_state *dtrace_anon_grab(void); -+extern void dtrace_anon_property(void); -+ -+/* -+ * DTrace Consumer State Functions -+ */ -+extern struct kmem_cache *dtrace_state_cachep; -+extern size_t dtrace_strsize_default; -+ -+extern ktime_t dtrace_deadman_timeout; -+extern int dtrace_destructive_disallow; -+ -+extern dtrace_id_t dtrace_probeid_begin; -+extern dtrace_id_t dtrace_probeid_end; -+extern dtrace_id_t dtrace_probeid_error; -+ -+extern struct dtrace_dynvar dtrace_dynhash_sink; -+ -+extern struct user_namespace *init_user_namespace; -+ -+extern int dtrace_dstate_init(struct dtrace_dstate *, size_t); -+extern void dtrace_dstate_fini(struct dtrace_dstate *); -+extern void dtrace_vstate_fini(struct dtrace_vstate *); -+extern struct dtrace_state *dtrace_state_create(struct file *); -+extern int dtrace_state_go(struct dtrace_state *, processorid_t *); -+extern int dtrace_state_stop(struct dtrace_state *, processorid_t *); -+extern int dtrace_state_option(struct dtrace_state *, dtrace_optid_t, -+ dtrace_optval_t); -+extern void dtrace_state_destroy(struct dtrace_state *); -+ -+/* -+ * DTrace Utility Functions -+ */ -+extern int dtrace_isglob(const char *); -+extern int dtrace_gmatch(const char *, const char *); -+extern void *dtrace_vzalloc(unsigned long); -+extern void *dtrace_vzalloc_try(unsigned long); -+extern char *dtrace_strdup(const char *); -+extern int dtrace_strncmp(char *, char *, size_t); -+extern size_t dtrace_strlen(const char *, size_t); -+extern int dtrace_badattr(const struct dtrace_attribute *); -+extern int dtrace_badname(const char *); -+extern void dtrace_cred2priv(const struct cred *, uint32_t *, kuid_t *); -+ -+extern void ctf_forceload(void); -+ -+#define dtrace_membar_producer() smp_wmb() -+#define dtrace_membar_consumer() smp_rmb() -+ -+typedef unsigned long dtrace_icookie_t; -+ -+extern struct mutex cpu_lock; -+ -+extern void dtrace_toxic_ranges(void (*)(uintptr_t, uintptr_t)); -+extern void dtrace_vpanic(const char *, va_list); -+extern int dtrace_getipl(void); -+ -+extern dtrace_icookie_t dtrace_interrupt_disable(void); -+extern void dtrace_interrupt_enable(dtrace_icookie_t); -+ -+typedef void (*dtrace_xcall_t)(void *); -+ -+extern void dtrace_xcall(processorid_t, dtrace_xcall_t, void *); -+ -+extern uintptr_t dtrace_fulword(void *); -+extern uint8_t dtrace_fuword8(void *); -+extern uint16_t dtrace_fuword16(void *); -+extern uint32_t dtrace_fuword32(void *); -+extern uint64_t dtrace_fuword64(void *); -+ -+extern void dtrace_probe_error(struct dtrace_state *, dtrace_epid_t, int, int, -+ int, uintptr_t); -+ -+extern void dtrace_getpcstack(uint64_t *, int, int, uint32_t *); -+extern void dtrace_getupcstack(uint64_t *, int); -+extern unsigned long dtrace_getufpstack(uint64_t *, uint64_t *, int); -+extern uintptr_t dtrace_getfp(void); -+extern uint64_t dtrace_getarg(int, int); -+extern int dtrace_getstackdepth(struct dtrace_mstate *, int); -+extern int dtrace_getustackdepth(void); -+extern ulong_t dtrace_getreg(struct task_struct *, uint_t); -+extern void dtrace_copyin(uintptr_t, uintptr_t, size_t, -+ volatile uint16_t *); -+extern void dtrace_copyout(uintptr_t, uintptr_t, size_t, -+ volatile uint16_t *); -+extern void dtrace_copyinstr(uintptr_t, uintptr_t, size_t, -+ volatile uint16_t *); -+extern void dtrace_copyoutstr(uintptr_t, uintptr_t, size_t, -+ volatile uint16_t *); -+ -+/* -+ * Plaforms that support a fast path to obtain the caller implement the -+ * dtrace_caller() function. -+ * -+ * The first argument is the number of frames that should be skipped when -+ * looking for a caller address. The 2nd argument is a dummy argument that -+ * is necessary for SPARC. -+ * -+ * On x86 this is effectively a NOP. -+ * -+ * On SPARC it is possible to retrieve the caller address from the register -+ * windows without flushing them to the stack. This involves performing -+ * explicit rotation of the register windows. Modification of the windowing -+ * mechanism state alters all %i, %o, and %l registers so we are can only use -+ * %g registers to store temporary data. -+ * -+ * On Linux a lot of %g registers are already allocated for specific purposes. -+ * Saving temporaries to the stack would be a violation of the fast path code -+ * logic. Therefore, the function prototype declares a 2nd argument that serves -+ * as a temporary value. A compiler will not expect that the value in %o1 -+ * will survive the call and therefore dtrace_caller() can use %o1 as a -+ * temporary register. -+ */ -+extern uintptr_t dtrace_caller(int, int); -+ -+extern void dtrace_copyin_arch(uintptr_t, uintptr_t, size_t, -+ volatile uint16_t *); -+extern void dtrace_copyinstr_arch(uintptr_t, uintptr_t, size_t, -+ volatile uint16_t *); -+ -+extern void pdata_init(struct dtrace_module *, struct module *); -+extern void pdata_cleanup(struct dtrace_module *, struct module *); -+ -+extern void debug_enter(char *); -+ -+#endif /* _LINUX_DTRACE_IMPL_H */ -diff --git a/include/dtrace/dtrace_impl_defines.h b/include/dtrace/dtrace_impl_defines.h -new file mode 100644 -index 000000000000..19b57f6188a0 ---- /dev/null -+++ b/include/dtrace/dtrace_impl_defines.h -@@ -0,0 +1,173 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Dynamic Tracing for Linux - Implementation Defines -+ * -+ * Copyright (c) 2009, 2017, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _LINUX_DTRACE_IMPL_DEFINES_H -+#define _LINUX_DTRACE_IMPL_DEFINES_H -+ -+#include <linux/dtrace/universal.h> -+#include <linux/preempt.h> -+#include <asm/ptrace.h> -+ -+typedef typeof(instruction_pointer((struct pt_regs *)0)) pc_t; -+ -+enum dtrace_activity { -+ DTRACE_ACTIVITY_INACTIVE = 0, -+ DTRACE_ACTIVITY_WARMUP, -+ DTRACE_ACTIVITY_ACTIVE, -+ DTRACE_ACTIVITY_DRAINING, -+ DTRACE_ACTIVITY_COOLDOWN, -+ DTRACE_ACTIVITY_STOPPED, -+ DTRACE_ACTIVITY_KILLED -+}; -+ -+enum dtrace_dstate_state { -+ DTRACE_DSTATE_CLEAN = 0, -+ DTRACE_DSTATE_EMPTY, -+ DTRACE_DSTATE_DIRTY, -+ DTRACE_DSTATE_RINSING -+}; -+ -+enum dtrace_dynvar_op { -+ DTRACE_DYNVAR_ALLOC, -+ DTRACE_DYNVAR_NOALLOC, -+ DTRACE_DYNVAR_DEALLOC -+}; -+ -+#define DTRACE_MSTATE_ARGS 0x00000001 -+#define DTRACE_MSTATE_PROBE 0x00000002 -+#define DTRACE_MSTATE_EPID 0x00000004 -+#define DTRACE_MSTATE_TIMESTAMP 0x00000008 -+#define DTRACE_MSTATE_STACKDEPTH 0x00000010 -+#define DTRACE_MSTATE_CALLER 0x00000020 -+#define DTRACE_MSTATE_IPL 0x00000040 -+#define DTRACE_MSTATE_FLTOFFS 0x00000080 -+#define DTRACE_MSTATE_USTACKDEPTH 0x00000100 -+#define DTRACE_MSTATE_UCALLER 0x00000200 -+ -+#define DTRACE_PROBEKEY_MAXDEPTH 8 -+ -+enum dtrace_speculation_state { -+ DTRACESPEC_INACTIVE = 0, -+ DTRACESPEC_ACTIVE, -+ DTRACESPEC_ACTIVEONE, -+ DTRACESPEC_ACTIVEMANY, -+ DTRACESPEC_COMMITTING, -+ DTRACESPEC_COMMITTINGMANY, -+ DTRACESPEC_DISCARDING -+}; -+ -+#define DTRACE_HELPER_ACTION_USTACK 0 -+#define DTRACE_NHELPER_ACTIONS 1 -+ -+#define DTRACE_HELPTRACE_NEXT (-1) -+#define DTRACE_HELPTRACE_DONE (-2) -+#define DTRACE_HELPTRACE_ERR (-3) -+ -+#undef ASSERT -+#ifdef CONFIG_DT_DEBUG -+# define ASSERT(x) ((void)((x) || dtrace_assfail(#x, __FILE__, __LINE__))) -+#else -+# define ASSERT(x) ((void)0) -+#endif -+ -+/* -+ * DTrace Probe Hashing -+ */ -+ -+#define DTRACE_HASHNEXT(hash, probe) \ -+ (struct dtrace_probe **)((uintptr_t)(probe) + (hash)->dth_nextoffs) -+#define DTRACE_HASHPREV(hash, probe) \ -+ (struct dtrace_probe **)((uintptr_t)(probe) + (hash)->dth_prevoffs) -+ -+/* -+ * DTrace Probe Management -+ */ -+#define DTRACE_ANCHORED(probe) ((probe)->dtpr_func[0] != '\0') -+#define DTRACE_FLAGS2FLT(flags) \ -+ (((flags) & CPU_DTRACE_BADADDR) ? DTRACEFLT_BADADDR : \ -+ ((flags) & CPU_DTRACE_ILLOP) ? DTRACEFLT_ILLOP : \ -+ ((flags) & CPU_DTRACE_DIVZERO) ? DTRACEFLT_DIVZERO : \ -+ ((flags) & CPU_DTRACE_KPRIV) ? DTRACEFLT_KPRIV : \ -+ ((flags) & CPU_DTRACE_UPRIV) ? DTRACEFLT_UPRIV : \ -+ ((flags) & CPU_DTRACE_TUPOFLOW) ? DTRACEFLT_TUPOFLOW : \ -+ ((flags) & CPU_DTRACE_BADALIGN) ? DTRACEFLT_BADALIGN : \ -+ ((flags) & CPU_DTRACE_NOSCRATCH) ? DTRACEFLT_NOSCRATCH : \ -+ ((flags) & CPU_DTRACE_BADSTACK) ? DTRACEFLT_BADSTACK : \ -+ DTRACEFLT_UNKNOWN) -+ -+/* -+ * Test whether alloc_sz bytes will fit in the scratch region. We isolate -+ * alloc_sz on the righthand side of the comparison in order to avoid overflow -+ * or underflow in the comparison with it. This is simpler than the INRANGE -+ * check above, because we know that the dtms_scratch_ptr is valid in the -+ * range. Allocations of size zero are allowed. -+ */ -+#define DTRACE_INSCRATCH(mstate, alloc_sz) \ -+ ((mstate)->dtms_scratch_base + (mstate)->dtms_scratch_size - \ -+ (mstate)->dtms_scratch_ptr >= (alloc_sz)) -+ -+/* -+ * Buffering. -+ */ -+ -+#define DTRACEBUF_RING 0x0001 /* bufpolicy set to "ring" */ -+#define DTRACEBUF_FILL 0x0002 /* bufpolicy set to "fill" */ -+#define DTRACEBUF_NOSWITCH 0x0004 /* do not switch buffer */ -+#define DTRACEBUF_WRAPPED 0x0008 /* ring buffer has wrapped */ -+#define DTRACEBUF_DROPPED 0x0010 /* drops occurred */ -+#define DTRACEBUF_ERROR 0x0020 /* errors occurred */ -+#define DTRACEBUF_FULL 0x0040 /* "fill" buffer is full */ -+#define DTRACEBUF_CONSUMED 0x0080 /* buffer has been consumed */ -+#define DTRACEBUF_INACTIVE 0x0100 /* buffer is not yet active */ -+ -+#define DTRACE_STORE(type, tomax, offset, what) \ -+ do { \ -+ *((type *)((uintptr_t)(tomax) + (uintptr_t)(offset))) = (type)(what); \ -+ } while (0) -+ -+#define KERNELBASE (uintptr_t)_text -+ -+#ifdef CONFIG_DT_DEBUG_MUTEX -+# define real_mutex_lock(x) mutex_lock(x) -+# define real_mutex_unlock(x) mutex_unlock(x) -+ -+# define mutex_lock(x) do { \ -+ pr_debug("mutex_lock(%s) at %s::%d for %p (PID %d)\n", \ -+ __stringify(x), \ -+ __FILE__, __LINE__, current, \ -+ current ? current->pid : -1); \ -+ real_mutex_lock(x); \ -+ } while (0) -+# define mutex_unlock(x) do { \ -+ pr_debug("mutex_unlock(%s) at %s::%d for %p (PID %d)\n", \ -+ __stringify(x), \ -+ __FILE__, __LINE__, current, \ -+ current ? current->pid : -1); \ -+ real_mutex_unlock(x); \ -+ } while (0) -+#endif -+ -+#define MUTEX_HELD(lock) mutex_owned(lock) -+ -+#define PDATA(mp) ((struct dtrace_module *)mp->pdata) -+ -+#endif /* _LINUX_DTRACE_IMPL_DEFINES_H */ -diff --git a/include/dtrace/provider.h b/include/dtrace/provider.h -new file mode 100644 -index 000000000000..9eeb147f0363 ---- /dev/null -+++ b/include/dtrace/provider.h -@@ -0,0 +1,971 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Dynamic Tracing for Linux - Provider API -+ * -+ * Copyright (c) 2009, 2018, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _DTRACE_PROVIDER_H -+#define _DTRACE_PROVIDER_H -+ -+/* -+ * The following functions are implemented by the DTrace framework and are -+ * used to implement separate in-kernel DTrace providers. -+ * -+ * The provider API has two halves: the API that the providers consume from -+ * DTrace, and the API that providers make available to DTrace. -+ * -+ * 1 Framework-to-Provider API -+ * -+ * 1.1 Overview -+ * -+ * The Framework-to-Provider API is represented by the dtrace_pops structure -+ * that the provider passes to the framework when registering itself. This -+ * structure consists of the following members: -+ * -+ * dtps_provide() <-- Provide all probes, all modules -+ * dtps_provide_module() <-- Provide all probes in specified module -+ * dtps_enable() <-- Enable specified probe -+ * dtps_disable() <-- Disable specified probe -+ * dtps_suspend() <-- Suspend specified probe -+ * dtps_resume() <-- Resume specified probe -+ * dtps_getargdesc() <-- Get the argument description for args[X] -+ * dtps_getargval() <-- Get the value for an argX or args[X] variable -+ * dtps_usermode() <-- Find out if the probe was fired in user mode -+ * dtps_destroy() <-- Destroy all state associated with this probe -+ * dtps_destroy_module() <-- Destroy per-module data -+ * -+ * 1.2 void dtps_provide(void *arg, const struct dtrace_probedesc *spec) -+ * -+ * 1.2.1 Overview -+ * -+ * Called to indicate that the provider should provide all probes. If the -+ * specified description is non-NULL, dtps_provide() is being called because -+ * no probe matched a specified probe -- if the provider has the ability to -+ * create custom probes, it may wish to create a probe that matches the -+ * specified description. -+ * -+ * 1.2.2 Arguments and notes -+ * -+ * The first argument is the cookie as passed to dtrace_register(). The -+ * second argument is a pointer to a probe description that the provider may -+ * wish to consider when creating custom probes. The provider is expected to -+ * call back into the DTrace framework via dtrace_probe_create() to create -+ * any necessary probes. dtps_provide() may be called even if the provider -+ * has made available all probes; the provider should check the return value -+ * of dtrace_probe_create() to handle this case. Note that the provider need -+ * not implement both dtps_provide() and dtps_provide_module(); see -+ * "Arguments and Notes" for dtrace_register(), below. -+ * -+ * 1.2.3 Return value -+ * -+ * None. -+ * -+ * 1.2.4 Caller's context -+ * -+ * dtps_provide() is typically called from open() or ioctl() context, but may -+ * be called from other contexts as well. The DTrace framework is locked in -+ * such a way that providers may not register or unregister. This means that -+ * the provider may not call any DTrace API that affects its registration with -+ * the framework, including dtrace_register(), dtrace_unregister(), -+ * dtrace_invalidate(), and dtrace_condense(). However, the context is such -+ * that the provider may (and indeed, is expected to) call probe-related -+ * DTrace routines, including dtrace_probe_create(), dtrace_probe_lookup(), -+ * and dtrace_probe_arg(). -+ * -+ * 1.3 void dtps_provide_module(void *arg, struct modctl *mp) -+ * -+ * 1.3.1 Overview -+ * -+ * Called to indicate that the provider should provide all probes in the -+ * specified module. -+ * -+ * 1.3.2 Arguments and notes -+ * -+ * The first argument is the cookie as passed to dtrace_register(). The -+ * second argument is a pointer to a modctl structure that indicates the -+ * module for which probes should be created. -+ * -+ * 1.3.3 Return value -+ * -+ * None. -+ * -+ * 1.3.4 Caller's context -+ * -+ * dtps_provide_module() may be called from open() or ioctl() context, but -+ * may also be called from a module loading context. mod_lock is held, and -+ * the DTrace framework is locked in such a way that providers may not -+ * register or unregister. This means that the provider may not call any -+ * DTrace API that affects its registration with the framework, including -+ * dtrace_register(), dtrace_unregister(), dtrace_invalidate(), and -+ * dtrace_condense(). However, the context is such that the provider may (and -+ * indeed, is expected to) call probe-related DTrace routines, including -+ * dtrace_probe_create(), dtrace_probe_lookup(), and dtrace_probe_arg(). Note -+ * that the provider need not implement both dtps_provide() and -+ * dtps_provide_module(); see "Arguments and Notes" for dtrace_register(), -+ * below. -+ * -+ * 1.4 int dtps_enable(void *arg, dtrace_id_t id, void *parg) -+ * -+ * 1.4.1 Overview -+ * -+ * Called to enable the specified probe. -+ * -+ * 1.4.2 Arguments and notes -+ * -+ * The first argument is the cookie as passed to dtrace_register(). The -+ * second argument is the identifier of the probe to be enabled. The third -+ * argument is the probe argument as passed to dtrace_probe_create(). -+ * dtps_enable() will be called when a probe transitions from not being -+ * enabled at all to having one or more ECB. The number of ECBs associated -+ * with the probe may change without subsequent calls into the provider. -+ * When the number of ECBs drops to zero, the provider will be explicitly -+ * told to disable the probe via dtps_disable(). dtrace_probe() should never -+ * be called for a probe identifier that hasn't been explicitly enabled via -+ * dtps_enable(). -+ * -+ * 1.4.3 Return value -+ * -+ * On success, dtps_enable() should return 0. On failure, -1 should be -+ * returned. -+ * -+ * 1.4.4 Caller's context -+ * -+ * The DTrace framework is locked in such a way that it may not be called -+ * back into at all. cpu_lock is held. mod_lock is not held and may not -+ * be acquired. -+ * -+ * 1.5 void dtps_disable(void *arg, dtrace_id_t id, void *parg) -+ * -+ * 1.5.1 Overview -+ * -+ * Called to disable the specified probe. -+ * -+ * 1.5.2 Arguments and notes -+ * -+ * The first argument is the cookie as passed to dtrace_register(). The -+ * second argument is the identifier of the probe to be disabled. The third -+ * argument is the probe argument as passed to dtrace_probe_create(). -+ * dtps_disable() will be called when a probe transitions from being enabled -+ * to having zero ECBs. dtrace_probe() should never be called for a probe -+ * identifier that has been explicitly enabled via dtps_disable(). -+ * -+ * 1.5.3 Return value -+ * -+ * None. -+ * -+ * 1.5.4 Caller's context -+ * -+ * The DTrace framework is locked in such a way that it may not be called -+ * back into at all. cpu_lock is held. mod_lock is not held and may not -+ * be acquired. -+ * -+ * 1.6 void dtps_suspend(void *arg, dtrace_id_t id, void *parg) -+ * -+ * 1.6.1 Overview -+ * -+ * Called to suspend the specified enabled probe. This entry point is for -+ * providers that may need to suspend some or all of their probes when CPUs -+ * are being powered on or when the boot monitor is being entered for a -+ * prolonged period of time. -+ * -+ * 1.6.2 Arguments and notes -+ * -+ * The first argument is the cookie as passed to dtrace_register(). The -+ * second argument is the identifier of the probe to be suspended. The -+ * third argument is the probe argument as passed to dtrace_probe_create(). -+ * dtps_suspend will only be called on an enabled probe. Providers that -+ * provide a dtps_suspend entry point will want to take roughly the action -+ * that it takes for dtps_disable. -+ * -+ * 1.6.3 Return value -+ * -+ * None. -+ * -+ * 1.6.4 Caller's context -+ * -+ * Interrupts are disabled. The DTrace framework is in a state such that the -+ * specified probe cannot be disabled or destroyed for the duration of -+ * dtps_suspend(). As interrupts are disabled, the provider is afforded -+ * little latitude; the provider is expected to do no more than a store to -+ * memory. -+ * -+ * 1.7 void dtps_resume(void *arg, dtrace_id_t id, void *parg) -+ * -+ * 1.7.1 Overview -+ * -+ * Called to resume the specified enabled probe. This entry point is for -+ * providers that may need to resume some or all of their probes after the -+ * completion of an event that induced a call to dtps_suspend(). -+ * -+ * 1.7.2 Arguments and notes -+ * -+ * The first argument is the cookie as passed to dtrace_register(). The -+ * second argument is the identifier of the probe to be resumed. The -+ * third argument is the probe argument as passed to dtrace_probe_create(). -+ * dtps_resume will only be called on an enabled probe. Providers that -+ * provide a dtps_resume entry point will want to take roughly the action -+ * that it takes for dtps_enable. -+ * -+ * 1.7.3 Return value -+ * -+ * None. -+ * -+ * 1.7.4 Caller's context -+ * -+ * Interrupts are disabled. The DTrace framework is in a state such that the -+ * specified probe cannot be disabled or destroyed for the duration of -+ * dtps_resume(). As interrupts are disabled, the provider is afforded -+ * little latitude; the provider is expected to do no more than a store to -+ * memory. -+ * -+ * 1.8 void dtps_getargdesc(void *arg, dtrace_id_t id, void *parg, -+ * struct dtrace_argdesc *desc) -+ * -+ * 1.8.1 Overview -+ * -+ * Called to retrieve the argument description for an args[X] variable. -+ * -+ * 1.8.2 Arguments and notes -+ * -+ * The first argument is the cookie as passed to dtrace_register(). The -+ * second argument is the identifier of the current probe. The third -+ * argument is the probe argument as passed to dtrace_probe_create(). The -+ * fourth argument is a pointer to the argument description. This -+ * description is both an input and output parameter: it contains the -+ * index of the desired argument in the dtargd_ndx field, and expects -+ * the other fields to be filled in upon return. If there is no argument -+ * corresponding to the specified index, the dtargd_ndx field should be set -+ * to DTRACE_ARGNONE. -+ * -+ * 1.8.3 Return value -+ * -+ * None. The dtargd_ndx, dtargd_native, dtargd_xlate and dtargd_mapping -+ * members of the dtrace_argdesc structure are all output values. -+ * -+ * 1.8.4 Caller's context -+ * -+ * dtps_getargdesc() is called from ioctl() context. mod_lock is held, and -+ * the DTrace framework is locked in such a way that providers may not -+ * register or unregister. This means that the provider may not call any -+ * DTrace API that affects its registration with the framework, including -+ * dtrace_register(), dtrace_unregister(), dtrace_invalidate(), and -+ * dtrace_condense(). -+ * -+ * 1.9 uint64_t dtps_getargval(void *arg, dtrace_id_t id, void *parg, -+ * int argno, int aframes) -+ * -+ * 1.9.1 Overview -+ * -+ * Called to retrieve a value for an argX or args[X] variable. -+ * -+ * 1.9.2 Arguments and notes -+ * -+ * The first argument is the cookie as passed to dtrace_register(). The -+ * second argument is the identifier of the current probe. The third -+ * argument is the probe argument as passed to dtrace_probe_create(). The -+ * fourth argument is the number of the argument (the X in the example in -+ * 1.9.1). The fifth argument is the number of stack frames that were used -+ * to get from the actual place in the code that fired the probe to -+ * dtrace_probe() itself, the so-called artificial frames. This argument may -+ * be used to descend an appropriate number of frames to find the correct -+ * values. If this entry point is left NULL, the dtrace_getarg() built-in -+ * function is used. -+ * -+ * 1.9.3 Return value -+ * -+ * The value of the argument. -+ * -+ * 1.9.4 Caller's context -+ * -+ * This is called from within dtrace_probe() meaning that interrupts -+ * are disabled. No locks should be taken within this entry point. -+ * -+ * 1.10 int dtps_usermode(void *arg, dtrace_id_t id, void *parg) -+ * -+ * 1.10.1 Overview -+ * -+ * Called to determine if the probe was fired in a user context. -+ * -+ * 1.10.2 Arguments and notes -+ * -+ * The first argument is the cookie as passed to dtrace_register(). The -+ * second argument is the identifier of the current probe. The third -+ * argument is the probe argument as passed to dtrace_probe_create(). This -+ * entry point must not be left NULL for providers whose probes allow for -+ * mixed mode tracing, that is to say those probes that can fire during -+ * kernel- _or_ user-mode execution -+ * -+ * 1.10.3 Return value -+ * -+ * A boolean value. -+ * -+ * 1.10.4 Caller's context -+ * -+ * This is called from within dtrace_probe() meaning that interrupts -+ * are disabled. No locks should be taken within this entry point. -+ * -+ * 1.11 void dtps_destroy(void *arg, dtrace_id_t id, void *parg) -+ * -+ * 1.11.1 Overview -+ * -+ * Called to destroy the specified probe. -+ * -+ * 1.11.2 Arguments and notes -+ * -+ * The first argument is the cookie as passed to dtrace_register(). The -+ * second argument is the identifier of the probe to be destroyed. The third -+ * argument is the probe argument as passed to dtrace_probe_create(). The -+ * provider should free all state associated with the probe. The framework -+ * guarantees that dtps_destroy() is only called for probes that have either -+ * been disabled via dtps_disable() or were never enabled via dtps_enable(). -+ * Once dtps_disable() has been called for a probe, no further call will be -+ * made specifying the probe. -+ * -+ * 1.11.3 Return value -+ * -+ * None. -+ * -+ * 1.11.4 Caller's context -+ * -+ * The DTrace framework is locked in such a way that it may not be called -+ * back into at all. mod_lock is held. cpu_lock is not held, and may not be -+ * acquired. -+ * -+ * 1.12 void dtps_destroy_module(void *arg, struct modctl *mp) -+ * -+ * 1.12.1 Overview -+ * -+ * Called to notify provider that it can remove any per-module data. -+ * -+ * 1.12.2 Arguments and notes -+ * -+ * The first argument is the cookie as passed to dtrace_register(). The -+ * second argument is a pointer to a struct module structure that points to -+ * the module for which data may be cleared. -+ * -+ * 1.12.3 Return value -+ * -+ * None. -+ * -+ * -+ * 2 Provider-to-Framework API -+ * -+ * 2.1 Overview -+ * -+ * The Provider-to-Framework API provides the mechanism for the provider to -+ * register itself with the DTrace framework, to create probes, to lookup -+ * probes and (most importantly) to fire probes. The Provider-to-Framework -+ * consists of: -+ * -+ * dtrace_register() <-- Register a provider with the DTrace framework -+ * dtrace_unregister() <-- Remove a provider's DTrace registration -+ * dtrace_meta_register() <-- Register a metaprovider with the DTrace framework -+ * dtrace_meta_unregister()<-- Remove a metaprovider's DTrace registration -+ * dtrace_invalidate() <-- Invalidate the specified provider -+ * dtrace_condense() <-- Remove a provider's unenabled probes -+ * dtrace_attached() <-- Indicates whether or not DTrace has attached -+ * dtrace_probe_create() <-- Create a DTrace probe -+ * dtrace_probe_lookup() <-- Lookup a DTrace probe based on its name -+ * dtrace_probe_arg() <-- Return the probe argument for a specific probe -+ * dtrace_probe() <-- Fire the specified probe -+ * -+ * 2.2 int dtrace_register(const char *name, const struct dtrace_pattr *pap, -+ * uint32_t priv, struct cred *cr, const struct dtrace_pops *pops, -+ * void *arg, dtrace_provider_id_t *idp) -+ * -+ * 2.2.1 Overview -+ * -+ * dtrace_register() registers the calling provider with the DTrace -+ * framework. It should generally be called by DTrace providers in their -+ * attach(9E) entry point. -+ * -+ * 2.2.2 Arguments and Notes -+ * -+ * The first argument is the name of the provider. The second argument is a -+ * pointer to the stability attributes for the provider. The third argument -+ * is the privilege flags for the provider, and must be some combination of: -+ * -+ * DTRACE_PRIV_NONE <= All users may enable probes from this provider -+ * -+ * DTRACE_PRIV_PROC <= Any user with privilege of PRIV_DTRACE_PROC may -+ * enable probes from this provider -+ * -+ * DTRACE_PRIV_USER <= Any user with privilege of PRIV_DTRACE_USER may -+ * enable probes from this provider -+ * -+ * DTRACE_PRIV_KERNEL <= Any user with privilege of PRIV_DTRACE_KERNEL -+ * may enable probes from this provider -+ * -+ * DTRACE_PRIV_OWNER <= This flag places an additional constraint on -+ * the privilege requirements above. These probes -+ * require either (a) a user ID matching the user -+ * ID of the cred passed in the fourth argument -+ * or (b) the PRIV_PROC_OWNER privilege. -+ * -+ * Note that these flags designate the _visibility_ of the probes, not -+ * the conditions under which they may or may not fire. -+ * -+ * The fourth argument is the credential that is associated with the provider. -+ * This argument should be NULL if the privilege flags don't include -+ * DTRACE_PRIV_OWNER. If non-NULL, the framework stashes the uid represented -+ * by this credential for use at probe-time, in implicit predicates. These -+ * limit visibility of the probes to users which have sufficient privilege to -+ * access them. -+ * -+ * The fifth argument is a DTrace provider operations vector, which provides -+ * the implementation for the Framework-to-Provider API. (See Section 1, -+ * above.) This must be non-NULL, and each member must be non-NULL. The -+ * exceptions to this are (1) the dtps_provide() and dtps_provide_module() -+ * members (if the provider so desires, _one_ of these members may be left -+ * NULL -- denoting that the provider only implements the other) and (2) -+ * the dtps_suspend() and dtps_resume() members, which must either both be -+ * NULL or both be non-NULL. -+ * -+ * The sixth argument is a cookie to be specified as the first argument for -+ * each function in the Framework-to-Provider API. This argument may have -+ * any value. -+ * -+ * The final argument is a pointer to dtrace_provider_id_t. If -+ * dtrace_register() successfully completes, the provider identifier will be -+ * stored in the memory pointed to be this argument. This argument must be -+ * non-NULL. -+ * -+ * 2.2.3 Return value -+ * -+ * On success, dtrace_register() returns 0 and stores the new provider's -+ * identifier into the memory pointed to by the idp argument. On failure, -+ * dtrace_register() returns an errno: -+ * -+ * EINVAL The arguments passed to dtrace_register() were somehow invalid. -+ * This may because a parameter that must be non-NULL was NULL, -+ * because the name was invalid (either empty or an illegal -+ * provider name) or because the attributes were invalid. -+ * -+ * No other failure code is returned. -+ * -+ * 2.2.4 Caller's context -+ * -+ * dtrace_register() may induce calls to dtrace_provide(); the provider must -+ * hold no locks across dtrace_register() that may also be acquired by -+ * dtrace_provide(). cpu_lock and mod_lock must not be held. -+ * -+ * 2.3 int dtrace_unregister(dtrace_provider_id_t id) -+ * -+ * 2.3.1 Overview -+ * -+ * Unregisters the specified provider from the DTrace framework. It should -+ * generally be called by DTrace providers in their detach(9E) entry point. -+ * -+ * 2.3.2 Arguments and Notes -+ * -+ * The only argument is the provider identifier, as returned from a -+ * successful call to dtrace_register(). As a result of calling -+ * dtrace_unregister(), the DTrace framework will call back into the provider -+ * via the dtps_destroy() entry point. Once dtrace_unregister() successfully -+ * completes, however, the DTrace framework will no longer make calls through -+ * the Framework-to-Provider API. -+ * -+ * 2.3.3 Return value -+ * -+ * On success, dtrace_unregister returns 0. On failure, dtrace_unregister() -+ * returns an errno: -+ * -+ * EBUSY There are currently processes that have the DTrace pseudodevice -+ * open, or there exists an anonymous enabling that hasn't yet -+ * been claimed. -+ * -+ * No other failure code is returned. -+ * -+ * 2.3.4 Caller's context -+ * -+ * Because a call to dtrace_unregister() may induce calls through the -+ * Framework-to-Provider API, the caller may not hold any lock across -+ * dtrace_register() that is also acquired in any of the Framework-to- -+ * Provider API functions. Additionally, mod_lock may not be held. -+ * -+ * 2.4 void dtrace_invalidate(dtrace_provider_id_t id) -+ * -+ * 2.4.1 Overview -+ * -+ * Invalidates the specified provider. All subsequent probe lookups for the -+ * specified provider will fail, but its probes will not be removed. -+ * -+ * 2.4.2 Arguments and note -+ * -+ * The only argument is the provider identifier, as returned from a -+ * successful call to dtrace_register(). In general, a provider's probes -+ * always remain valid; dtrace_invalidate() is a mechanism for invalidating -+ * an entire provider, regardless of whether or not probes are enabled or -+ * not. Note that dtrace_invalidate() will _not_ prevent already enabled -+ * probes from firing -- it will merely prevent any new enablings of the -+ * provider's probes. -+ * -+ * 2.5 int dtrace_condense(dtrace_provider_id_t id) -+ * -+ * 2.5.1 Overview -+ * -+ * Removes all the unenabled probes for the given provider. This function is -+ * not unlike dtrace_unregister(), except that it doesn't remove the -+ * provider just as many of its associated probes as it can. -+ * -+ * 2.5.2 Arguments and Notes -+ * -+ * As with dtrace_unregister(), the sole argument is the provider identifier -+ * as returned from a successful call to dtrace_register(). As a result of -+ * calling dtrace_condense(), the DTrace framework will call back into the -+ * given provider's dtps_destroy() entry point for each of the provider's -+ * unenabled probes. -+ * -+ * 2.5.3 Return value -+ * -+ * Currently, dtrace_condense() always returns 0. However, consumers of this -+ * function should check the return value as appropriate; its behavior may -+ * change in the future. -+ * -+ * 2.5.4 Caller's context -+ * -+ * As with dtrace_unregister(), the caller may not hold any lock across -+ * dtrace_condense() that is also acquired in the provider's entry points. -+ * Also, mod_lock may not be held. -+ * -+ * 2.6 int dtrace_attached() -+ * -+ * 2.6.1 Overview -+ * -+ * Indicates whether or not DTrace has attached. -+ * -+ * 2.6.2 Arguments and Notes -+ * -+ * For most providers, DTrace makes initial contact beyond registration. -+ * That is, once a provider has registered with DTrace, it waits to hear -+ * from DTrace to create probes. However, some providers may wish to -+ * proactively create probes without first being told by DTrace to do so. -+ * If providers wish to do this, they must first call dtrace_attached() to -+ * determine if DTrace itself has attached. If dtrace_attached() returns 0, -+ * the provider must not make any other Provider-to-Framework API call. -+ * -+ * 2.6.3 Return value -+ * -+ * dtrace_attached() returns 1 if DTrace has attached, 0 otherwise. -+ * -+ * 2.7 int dtrace_probe_create(dtrace_provider_id_t id, const char *mod, -+ * const char *func, const char *name, int aframes, void *arg) -+ * -+ * 2.7.1 Overview -+ * -+ * Creates a probe with specified module name, function name, and name. -+ * -+ * 2.7.2 Arguments and Notes -+ * -+ * The first argument is the provider identifier, as returned from a -+ * successful call to dtrace_register(). The second, third, and fourth -+ * arguments are the module name, function name, and probe name, -+ * respectively. Of these, module name and function name may both be NULL -+ * (in which case the probe is considered to be unanchored), or they may both -+ * be non-NULL. The name must be non-NULL, and must point to a non-empty -+ * string. -+ * -+ * The fifth argument is the number of artificial stack frames that will be -+ * found on the stack when dtrace_probe() is called for the new probe. These -+ * artificial frames will be automatically be pruned should the stack() or -+ * stackdepth() functions be called as part of one of the probe's ECBs. If -+ * the parameter doesn't add an artificial frame, this parameter should be -+ * zero. -+ * -+ * The final argument is a probe argument that will be passed back to the -+ * provider when a probe-specific operation is called. (e.g., via -+ * dtps_enable(), dtps_disable(), etc.) -+ * -+ * Note that it is up to the provider to be sure that the probe that it -+ * creates does not already exist -- if the provider is unsure of the probe's -+ * existence, it should assure its absence with dtrace_probe_lookup() before -+ * calling dtrace_probe_create(). -+ * -+ * 2.7.3 Return value -+ * -+ * dtrace_probe_create() always succeeds, and always returns the identifier -+ * of the newly-created probe. -+ * -+ * 2.7.4 Caller's context -+ * -+ * While dtrace_probe_create() is generally expected to be called from -+ * dtps_provide() and/or dtps_provide_module(), it may be called from other -+ * non-DTrace contexts. Neither cpu_lock nor mod_lock may be held. -+ * -+ * 2.8 dtrace_id_t dtrace_probe_lookup(dtrace_provider_id_t id, -+ * const char *mod, const char *func, const char *name) -+ * -+ * 2.8.1 Overview -+ * -+ * Looks up a probe based on provdider and one or more of module name, -+ * function name and probe name. -+ * -+ * 2.8.2 Arguments and Notes -+ * -+ * The first argument is the provider identifier, as returned from a -+ * successful call to dtrace_register(). The second, third, and fourth -+ * arguments are the module name, function name, and probe name, -+ * respectively. Any of these may be NULL; dtrace_probe_lookup() will return -+ * the identifier of the first probe that is provided by the specified -+ * provider and matches all of the non-NULL matching criteria. -+ * dtrace_probe_lookup() is generally used by a provider to be check the -+ * existence of a probe before creating it with dtrace_probe_create(). -+ * -+ * 2.8.3 Return value -+ * -+ * If the probe exists, returns its identifier. If the probe does not exist, -+ * return DTRACE_IDNONE. -+ * -+ * 2.8.4 Caller's context -+ * -+ * While dtrace_probe_lookup() is generally expected to be called from -+ * dtps_provide() and/or dtps_provide_module(), it may also be called from -+ * other non-DTrace contexts. Neither cpu_lock nor mod_lock may be held. -+ * -+ * 2.9 void *dtrace_probe_arg(dtrace_provider_id_t id, dtrace_id_t probe) -+ * -+ * 2.9.1 Overview -+ * -+ * Returns the probe argument associated with the specified probe. -+ * -+ * 2.9.2 Arguments and Notes -+ * -+ * The first argument is the provider identifier, as returned from a -+ * successful call to dtrace_register(). The second argument is a probe -+ * identifier, as returned from dtrace_probe_lookup() or -+ * dtrace_probe_create(). This is useful if a probe has multiple -+ * provider-specific components to it: the provider can create the probe -+ * once with provider-specific state, and then add to the state by looking -+ * up the probe based on probe identifier. -+ * -+ * 2.9.3 Return value -+ * -+ * Returns the argument associated with the specified probe. If the -+ * specified probe does not exist, or if the specified probe is not provided -+ * by the specified provider, NULL is returned. -+ * -+ * 2.9.4 Caller's context -+ * -+ * While dtrace_probe_arg() is generally expected to be called from -+ * dtps_provide() and/or dtps_provide_module(), it may also be called from -+ * other non-DTrace contexts. Neither cpu_lock nor mod_lock may be held. -+ * -+ * 2.10 void dtrace_probe(dtrace_id_t probe, uintptr_t arg0, uintptr_t arg1, -+ * uintptr_t arg2, uintptr_t arg3, uintptr_t arg4) -+ * -+ * 2.10.1 Overview -+ * -+ * The epicenter of DTrace: fires the specified probes with the specified -+ * arguments. -+ * -+ * 2.10.2 Arguments and Notes -+ * -+ * The first argument is a probe identifier as returned by -+ * dtrace_probe_create() or dtrace_probe_lookup(). The second through sixth -+ * arguments are the values to which the D variables "arg0" through "arg4" -+ * will be mapped. -+ * -+ * dtrace_probe() should be called whenever the specified probe has fired -- -+ * however the provider defines it. -+ * -+ * 2.10.3 Return value -+ * -+ * None. -+ * -+ * 2.10.4 Caller's context -+ * -+ * dtrace_probe() may be called in virtually any context: kernel, user, -+ * interrupt, high-level interrupt, with arbitrary adaptive locks held, with -+ * dispatcher locks held, with interrupts disabled, etc. The only latitude -+ * that must be afforded to DTrace is the ability to make calls within -+ * itself (and to its in-kernel subroutines) and the ability to access -+ * arbitrary (but mapped) memory. On some platforms, this constrains -+ * context. For example, on UltraSPARC, dtrace_probe() cannot be called -+ * from any context in which TL is greater than zero. dtrace_probe() may -+ * also not be called from any routine which may be called by dtrace_probe() -+ * -- which includes functions in the DTrace framework and some in-kernel -+ * DTrace subroutines. All such functions "dtrace_"; providers that -+ * instrument the kernel arbitrarily should be sure to not instrument these -+ * routines. -+ */ -+ -+#include <dtrace/types.h> -+#include <linux/cred.h> -+#include <linux/module.h> -+#include <linux/dtrace/enabling_defines.h> -+#include <linux/dtrace/arg_defines.h> -+#include <dtrace/provider_defines.h> -+#include <linux/dtrace/stability.h> -+ -+struct dtrace_pops { -+ void (*dtps_provide)(void *, const struct dtrace_probedesc *); -+ void (*dtps_provide_module)(void *, struct module *); -+ int (*dtps_enable)(void *, dtrace_id_t, void *); -+ void (*dtps_disable)(void *, dtrace_id_t, void *); -+ void (*dtps_suspend)(void *, dtrace_id_t, void *); -+ void (*dtps_resume)(void *, dtrace_id_t, void *); -+ void (*dtps_getargdesc)(void *, dtrace_id_t, void *, -+ struct dtrace_argdesc *); -+ uint64_t (*dtps_getargval)(void *, dtrace_id_t, void *, int, int); -+ int (*dtps_usermode)(void *, dtrace_id_t, void *); -+ void (*dtps_destroy)(void *, dtrace_id_t, void *); -+ void (*dtps_destroy_module)(void *, struct module *); -+}; -+ -+struct dtrace_helper_probedesc { -+ char *dthpb_mod; -+ char *dthpb_func; -+ char *dthpb_name; -+ uint64_t dthpb_base; -+ uint32_t *dthpb_offs; -+ uint32_t *dthpb_enoffs; -+ uint32_t dthpb_noffs; -+ uint32_t dthpb_nenoffs; -+ uint8_t *dthpb_args; -+ uint8_t dthpb_xargc; -+ uint8_t dthpb_nargc; -+ char *dthpb_xtypes; -+ char *dthpb_ntypes; -+}; -+ -+struct dtrace_helper_provdesc { -+ char *dthpv_provname; -+ struct dtrace_pattr dthpv_pattr; -+}; -+ -+struct dtrace_mops { -+ void (*dtms_create_probe)(void *, void *, -+ struct dtrace_helper_probedesc *); -+ void *(*dtms_provide_pid)(void *, struct dtrace_helper_provdesc *, -+ pid_t); -+ void (*dtms_remove_pid)(void *, struct dtrace_helper_provdesc *, -+ pid_t); -+}; -+ -+/* -+ * DTrace Provider-to-Framework API Functions -+ */ -+ -+struct dtrace_meta { -+ struct dtrace_mops dtm_mops; -+ char *dtm_name; -+ void *dtm_arg; -+ uint64_t dtm_count; -+}; -+ -+struct dtrace_mprovider { -+ char *dtmp_name; -+ char *dtmp_pref; -+ struct dtrace_pattr *dtmp_attr; -+ uint32_t dtmp_priv; -+ struct dtrace_pops *dtmp_pops; -+ dtrace_provider_id_t dtmp_id; -+}; -+ -+struct dtrace_pmod { -+ struct module *mod; -+ struct list_head list; -+}; -+ -+extern int dtrace_register(const char *, const struct dtrace_pattr *, -+ uint32_t, const struct cred *, -+ const struct dtrace_pops *, void *, -+ dtrace_provider_id_t *); -+extern int dtrace_unregister(dtrace_provider_id_t); -+extern void dtrace_invalidate(dtrace_provider_id_t); -+extern int dtrace_condense(dtrace_provider_id_t); -+extern int dtrace_attached(void); -+ -+extern int dtrace_meta_register(const char *, const struct dtrace_mops *, -+ void *, dtrace_meta_provider_id_t *); -+extern int dtrace_meta_unregister(dtrace_meta_provider_id_t); -+ -+extern dtrace_id_t dtrace_probe_create(dtrace_provider_id_t, const char *, -+ const char *, const char *, int, -+ void *); -+extern void *dtrace_probe_arg(dtrace_provider_id_t, dtrace_id_t); -+extern dtrace_id_t dtrace_probe_lookup(dtrace_provider_id_t, const char *, -+ const char *, const char *); -+extern void dtrace_probe(dtrace_id_t, uintptr_t, uintptr_t, uintptr_t, -+ uintptr_t, uintptr_t, uintptr_t, uintptr_t); -+ -+/* -+ * Provider creation. -+ */ -+#ifdef DTRACE_HAVE_PROV_EXIT -+# define DT_PROV_EXIT(name) \ -+ extern int name##_prov_exit(void); -+#else -+# define DT_PROV_EXIT(name) \ -+ static int name##_prov_exit(void) \ -+ { \ -+ return (dtrace_unregister(name##_id) == 0); \ -+ } -+#endif -+ -+#define DT_PROVIDER_MODULE(name, priv) \ -+ dtrace_provider_id_t name##_id = DTRACE_PROVNONE; \ -+ \ -+ DT_PROV_EXIT(name) \ -+ \ -+ static int __init name##_init(void) \ -+ { \ -+ int ret = -ENOMEM; \ -+ struct dtrace_module *pdata = THIS_MODULE->pdata; \ -+ \ -+ if (pdata == NULL) \ -+ goto failed; \ -+ \ -+ ret = name##_dev_init(); \ -+ if (ret) \ -+ goto failed; \ -+ \ -+ ret = dtrace_register(__stringify(name), &name##_attr, priv, \ -+ NULL, &name##_pops, NULL, &name##_id); \ -+ if (ret) \ -+ goto failed; \ -+ \ -+ pdata->prov_exit = name##_prov_exit; \ -+ \ -+ return 0; \ -+ \ -+ failed: \ -+ return ret; \ -+ } \ -+ \ -+ static void __exit name##_exit(void) \ -+ { \ -+ name##_dev_exit(); \ -+ } \ -+ \ -+ module_init(name##_init); \ -+ module_exit(name##_exit); -+ -+#ifdef DTRACE_HAVE_PROV_EXIT -+# define DT_META_PROV_EXIT(name) \ -+ extern int name##_prov_exit(void); -+#else -+# define DT_META_PROV_EXIT(name) \ -+ static int name##_prov_exit(void) \ -+ { \ -+ return (dtrace_meta_unregister(name##_id) == 0); \ -+ } -+#endif -+ -+#define DT_META_PROVIDER_MODULE(name) \ -+ dtrace_meta_provider_id_t name##_id = DTRACE_METAPROVNONE; \ -+ \ -+ DT_META_PROV_EXIT(name) \ -+ \ -+ static int __init name##_init(void) \ -+ { \ -+ int ret = -ENOMEM; \ -+ struct dtrace_module *pdata = THIS_MODULE->pdata; \ -+ \ -+ if (pdata == NULL) \ -+ goto failed; \ -+ \ -+ ret = name##_dev_init(); \ -+ if (ret) \ -+ goto failed; \ -+ \ -+ ret = dtrace_meta_register(__stringify(name), &name##_mops, \ -+ NULL, &name##_id); \ -+ if (ret) \ -+ goto failed; \ -+ \ -+ pdata->prov_exit = name##_prov_exit; \ -+ \ -+ return 0; \ -+ \ -+ failed: \ -+ return ret; \ -+ } \ -+ \ -+ static void __exit name##_exit(void) \ -+ { \ -+ name##_dev_exit(); \ -+ } \ -+ \ -+ module_init(name##_init); \ -+ module_exit(name##_exit); -+ -+#define DT_MULTI_PROVIDER_MODULE(name, plist) \ -+ static int name##_prov_exit(void) \ -+ { \ -+ int ret = 0; \ -+ struct dtrace_mprovider *prov; \ -+ \ -+ for (prov = plist; prov->dtmp_name != NULL; prov++) { \ -+ if (prov->dtmp_id != DTRACE_PROVNONE) { \ -+ ret = dtrace_unregister(prov->dtmp_id); \ -+ if (ret != 0) { \ -+ pr_warn("Failed to unregister " \ -+ "provider %s: %d", \ -+ prov->dtmp_name, ret); \ -+ break; \ -+ } \ -+ \ -+ prov->dtmp_id = DTRACE_PROVNONE; \ -+ } \ -+ } \ -+ \ -+ return (ret == 0); \ -+ } \ -+ \ -+ static int __init name##_init(void) \ -+ { \ -+ int ret = -ENOMEM; \ -+ struct dtrace_mprovider *prov; \ -+ struct dtrace_module *pdata = THIS_MODULE->pdata; \ -+ \ -+ if (pdata == NULL) \ -+ goto failed; \ -+ \ -+ ret = name##_dev_init(); \ -+ if (ret) \ -+ goto failed; \ -+ \ -+ for (prov = plist; prov->dtmp_name != NULL; prov++) { \ -+ if (dtrace_register(prov->dtmp_name, prov->dtmp_attr, \ -+ prov->dtmp_priv, NULL, \ -+ prov->dtmp_pops, prov, \ -+ &prov->dtmp_id) != 0) \ -+ pr_warn("Failed to register provider %s", \ -+ prov->dtmp_name); \ -+ } \ -+ \ -+ pdata->prov_exit = name##_prov_exit; \ -+ \ -+ return 0; \ -+ \ -+ failed: \ -+ return ret; \ -+ } \ -+ \ -+ static void __exit name##_exit(void) \ -+ { \ -+ name##_dev_exit(); \ -+ } \ -+ \ -+ module_init(name##_init); \ -+ module_exit(name##_exit); -+ -+ -+#endif /* _DTRACE_PROVIDER_H */ -diff --git a/include/dtrace/provider_defines.h b/include/dtrace/provider_defines.h -new file mode 100644 -index 000000000000..104514e1261b ---- /dev/null -+++ b/include/dtrace/provider_defines.h -@@ -0,0 +1,41 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Dynamic Tracing for Linux - Provider defines -+ * -+ * Copyright (c) 2009, 2017, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _DTRACE_PROVIDER_DEFINES_H -+#define _DTRACE_PROVIDER_DEFINES_H -+ -+#include <linux/dtrace/universal.h> -+#include <linux/in6.h> -+ -+typedef uintptr_t dtrace_provider_id_t; -+typedef uintptr_t dtrace_meta_provider_id_t; -+typedef __be32 ipaddr_t; -+typedef ipaddr_t *ipaddr_t_p; -+typedef struct in6_addr in6_addr_t; -+ -+struct dtrace_pops; -+struct dtrace_helper_probedesc; -+struct dtrace_helper_provdesc; -+struct dtrace_mops; -+struct dtrace_meta; -+ -+#endif /* _DTRACE_PROVIDER_DEFINES_H */ -diff --git a/include/dtrace/types.h b/include/dtrace/types.h -new file mode 100644 -index 000000000000..7f8d0d7efcc7 ---- /dev/null -+++ b/include/dtrace/types.h -@@ -0,0 +1,131 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Dynamic Tracing for Linux - Kernel Types -+ * -+ * Copyright (c) 2009, 2017, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _DTRACE_TYPES_H -+#define _DTRACE_TYPES_H -+ -+/* -+ * This file contains types needed to parse the DTrace shared userspace/ kernel -+ * headers, and a few others (it has not been gardened to remove constants used -+ * only by the DTrace core). Userspace has its own version of these types -+ * (mostly from <unistd.h>). -+ * -+ * This file is compiled both in a normal kernel environment and in a peculiar -+ * halfway-house environment used for headers_checking of <ioctl.h>, in which -+ * among other things, no config.h symbols are available. As a result, you -+ * should be careful about #including kernel headers here: many will break -+ * headers_check if added. So far, it has always been sufficient to add them to -+ * dtrace/dtrace.h instead; if this turns out to be insufficient later (perhaps -+ * because DTrace core files cease to #include all of <dtrace.h>), the -+ * HEADERS_CHECK #define may prove useful to disable kernel-only portions of -+ * this file. -+ */ -+ -+#include <asm/bitsperlong.h> -+#include <linux/dtrace_os.h> -+ -+typedef unsigned char uchar_t; -+typedef unsigned int uint_t; -+typedef unsigned long ulong_t; -+ -+typedef long intptr_t; -+ -+#define UINT8_MAX (0xff) -+#define UINT8_MIN 0 -+#define UINT16_MAX (0xffff) -+#define UINT16_MIN 0 -+#define UINT32_MAX (0xffffffff) -+#define UINT32_MIN 0 -+#define UINT64_MAX (~0ULL) -+#define UINT64_MIN (0) -+#define INT64_MAX ((long long)(~0ULL>>1)) -+#define INT64_MIN (-INT64_MAX - 1LL) -+ -+#define NBBY (__BITS_PER_LONG / sizeof(long)) -+ -+/* -+ * This is a bit unusual, but OpenSolaris seems to like it. Basically, the -+ * values below are the number of time units (sec, milli, micro, nano) that -+ * comprise 1 second. As such, it is the value of the respective multiplier. -+ */ -+#define SEC 1 -+#define MILLISEC 1000 -+#define MICROSEC 1000000 -+#define NANOSEC 1000000000 -+ -+typedef enum { -+ TRUE = -1, -+ FALSE = 0 -+} boolean_t; -+ -+ -+#define DTRACE_ACCESS_KERNEL 0x1 -+ -+#define DTRACE_CRA_PROC 0x0001 -+#define DTRACE_CRA_PROC_CONTROL 0x0002 -+#define DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER 0x0004 -+#define DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG 0x0010 -+#define DTRACE_CRA_KERNEL 0x0020 -+#define DTRACE_CRA_KERNEL_DESTRUCTIVE 0x0040 -+ -+#define DTRACE_CRA_ALL (DTRACE_CRA_PROC | \ -+ DTRACE_CRA_PROC_CONTROL | \ -+ DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER | \ -+ DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG | \ -+ DTRACE_CRA_KERNEL | \ -+ DTRACE_CRA_KERNEL_DESTRUCTIVE) -+ -+#define DTRACE_CRV_ALLPROC 0x01 -+#define DTRACE_CRV_KERNEL 0x02 -+#define DTRACE_CRV_ALL (DTRACE_CRV_ALLPROC | DTRACE_CRV_KERNEL) -+ -+#define DTRACE_MATCH_FAIL -1 -+#define DTRACE_MATCH_NEXT 0 -+#define DTRACE_MATCH_DONE 1 -+ -+#define DTRACE_COND_OWNER 0x01 -+#define DTRACE_COND_USERMODE 0x02 -+ -+#define P2ROUNDUP(x, a) (-(-(x) & -(a))) -+ -+#if (BITS_PER_LONG == 64) || defined(CONFIG_KTIME_SCALAR) -+#define KTIME_INIT(s, ns) ((s64)(s) * NSEC_PER_SEC + (s64)(ns)) -+#else -+# define KTIME_INIT(n, ns) { .sec = (s), .nsec = (ns) } -+#endif -+#define ktime_lt(t0, t1) (t0 < t1) -+#define ktime_le(t0, t1) (t0 <= t1) -+#define ktime_ge(t0, t1) (t0 >= t1) -+#define ktime_gt(t0, t1) (t0 > t1) -+#define ktime_cp(t0, t1) (t0 = t1) -+ -+/* -+ * Translate between kernel config options and userspace-compatible definitions. -+ */ -+#ifdef CONFIG_64BIT -+#define _LP64 1 -+#endif -+#ifdef __LITTLE_ENDIAN -+#define _LITTLE_ENDIAN 1 -+#endif -+ -+#endif /* _DTRACE_TYPES_H */ -diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h -index d37c17e68268..4e35234824a6 100644 ---- a/include/linux/cpuhotplug.h -+++ b/include/linux/cpuhotplug.h -@@ -104,6 +104,7 @@ enum cpuhp_state { - CPUHP_AP_IRQ_MIPS_GIC_STARTING, - CPUHP_AP_ARM_MVEBU_COHERENCY, - CPUHP_AP_MICROCODE_LOADER, -+ CPUHP_AP_CYCLIC_STARTING, - CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING, - CPUHP_AP_PERF_X86_STARTING, - CPUHP_AP_PERF_X86_AMD_IBS_STARTING, -diff --git a/include/linux/cyclic.h b/include/linux/cyclic.h -new file mode 100644 -index 000000000000..12ab85dc185b ---- /dev/null -+++ b/include/linux/cyclic.h -@@ -0,0 +1,49 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Copyright (c) 2010, 2017, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+#ifndef _CYCLIC_H_ -+#define _CYCLIC_H_ -+ -+#include <linux/ktime.h> -+#include <linux/types.h> -+ -+#define CY_LOW_LEVEL 0 -+#define CY_LOCK_LEVEL 1 -+#define CY_HIGH_LEVEL 2 -+#define CY_SOFT_LEVELS 2 -+#define CY_LEVELS 3 -+ -+typedef uintptr_t cyclic_id_t; -+typedef uint16_t cyc_level_t; -+typedef void (*cyc_func_t)(uintptr_t); -+ -+#define CYCLIC_NONE ((cyclic_id_t)0) -+ -+struct cyc_handler { -+ cyc_func_t cyh_func; -+ uintptr_t cyh_arg; -+ cyc_level_t cyh_level; -+}; -+ -+#define CY_INTERVAL_INF (-1) -+ -+struct cyc_time { -+ ktime_t cyt_when; -+ ktime_t cyt_interval; -+}; -+ -+struct cyc_omni_handler { -+ void (*cyo_online)(void *, uint32_t, struct cyc_handler *, -+ struct cyc_time *); -+ void (*cyo_offline)(void *, uint32_t, void *); -+ void *cyo_arg; -+}; -+ -+extern cyclic_id_t cyclic_add(struct cyc_handler *, struct cyc_time *); -+extern cyclic_id_t cyclic_add_omni(struct cyc_omni_handler *); -+extern void cyclic_remove(cyclic_id_t); -+extern void cyclic_reprogram(cyclic_id_t, ktime_t); -+ -+#endif /* _CYCLIC_H_ */ -diff --git a/include/linux/dtrace/cpu_defines.h b/include/linux/dtrace/cpu_defines.h -new file mode 100644 -index 000000000000..c8719378da80 ---- /dev/null -+++ b/include/linux/dtrace/cpu_defines.h -@@ -0,0 +1,61 @@ -+/* Copyright (C) 2011-2014 Oracle, Inc. */ -+ -+#ifndef _LINUX_DTRACE_CPU_DEFINES_H_ -+#define _LINUX_DTRACE_CPU_DEFINES_H_ -+ -+#include <linux/percpu.h> -+ -+#define CPUC_SIZE (sizeof (uint16_t) + sizeof(uint8_t) + \ -+ sizeof(uintptr_t) + sizeof(struct mutex)) -+#define CPUC_PADSIZE (192 - CPUC_SIZE) -+ -+#define per_cpu_core(cpu) (&per_cpu(dtrace_cpu_core, (cpu))) -+#if 0 -+# define this_cpu_core (this_cpu_ptr(&dtrace_cpu_core)) -+#else -+# define this_cpu_core (per_cpu_core(smp_processor_id())) -+#endif -+ -+#define DTRACE_CPUFLAG_ISSET(flag) \ -+ (this_cpu_core->cpuc_dtrace_flags & (flag)) -+ -+#define DTRACE_CPUFLAG_SET(flag) \ -+ (this_cpu_core->cpuc_dtrace_flags |= (flag)) -+ -+#define DTRACE_CPUFLAG_CLEAR(flag) \ -+ (this_cpu_core->cpuc_dtrace_flags &= ~(flag)) -+ -+#define CPU_DTRACE_NOFAULT 0x0001 -+#define CPU_DTRACE_DROP 0x0002 -+#define CPU_DTRACE_BADADDR 0x0004 -+#define CPU_DTRACE_BADALIGN 0x0008 -+#define CPU_DTRACE_DIVZERO 0x0010 -+#define CPU_DTRACE_ILLOP 0x0020 -+#define CPU_DTRACE_NOSCRATCH 0x0040 -+#define CPU_DTRACE_KPRIV 0x0080 -+#define CPU_DTRACE_UPRIV 0x0100 -+#define CPU_DTRACE_TUPOFLOW 0x0200 -+#define CPU_DTRACE_ENTRY 0x0800 -+#define CPU_DTRACE_BADSTACK 0x1000 -+#define CPU_DTRACE_NOPF 0x2000 -+#define CPU_DTRACE_PF_TRAPPED 0x4000 -+ -+#define CPU_DTRACE_FAULT (CPU_DTRACE_BADADDR | CPU_DTRACE_BADALIGN | \ -+ CPU_DTRACE_DIVZERO | CPU_DTRACE_ILLOP | \ -+ CPU_DTRACE_NOSCRATCH | CPU_DTRACE_KPRIV | \ -+ CPU_DTRACE_UPRIV | CPU_DTRACE_TUPOFLOW | \ -+ CPU_DTRACE_BADSTACK | CPU_DTRACE_PF_TRAPPED) -+#define CPU_DTRACE_ERROR (CPU_DTRACE_FAULT | CPU_DTRACE_DROP) -+ -+typedef uint32_t processorid_t; -+typedef uint32_t psetid_t; -+typedef uint32_t chipid_t; -+typedef uint32_t lgrp_id_t; -+ -+struct cpu_core; -+struct cpuinfo; -+ -+#define per_cpu_info(cpu) (&per_cpu(dtrace_cpu_info, (cpu))) -+#define this_cpu_info (this_cpu_ptr(&dtrace_cpu_info)) -+ -+#endif /* _LINUX_DTRACE_CPU_DEFINES_H_ */ -diff --git a/include/linux/dtrace_cpu.h b/include/linux/dtrace_cpu.h -new file mode 100644 -index 000000000000..d9c7b2da263c ---- /dev/null -+++ b/include/linux/dtrace_cpu.h -@@ -0,0 +1,53 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Copyright (c) 2004, 2018, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+#ifndef _LINUX_DTRACE_CPU_H_ -+#define _LINUX_DTRACE_CPU_H_ -+ -+#ifdef CONFIG_DTRACE -+ -+#include <linux/ktime.h> -+#include <linux/mutex.h> -+#include <linux/rwlock.h> -+#include <linux/dtrace_types.h> -+#include <linux/dtrace_cpu_defines.h> -+#include <asm/dtrace_cpuinfo.h> -+ -+struct cpu_core { -+ uint16_t cpuc_dtrace_flags; -+ uint8_t cpuc_dcpc_intr_state; -+ uint8_t cpuc_pad[CPUC_PADSIZE]; -+ uintptr_t cpuc_dtrace_illval; -+ struct mutex cpuc_pid_lock; -+ -+ uintptr_t cpu_dtrace_caller; -+ struct pt_regs *cpu_dtrace_regs; -+ ktime_t cpu_dtrace_chillmark; -+ ktime_t cpu_dtrace_chilled; -+ rwlock_t cpu_ft_lock; -+ atomic64_t cpuc_sync_requests; -+ atomic64_t cpuc_in_probe_ctx; -+ dtrace_id_t cpuc_current_probe; -+}; -+ -+DECLARE_PER_CPU_SHARED_ALIGNED(struct cpu_core, dtrace_cpu_core); -+ -+struct cpuinfo { -+ processorid_t cpu_id; -+ psetid_t cpu_pset; -+ chipid_t cpu_chip; -+ lgrp_id_t cpu_lgrp; -+ cpuinfo_arch_t *cpu_info; -+}; -+ -+DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo, dtrace_cpu_info); -+ -+/* ABI requirement: type names compiled into DTrace userspace. */ -+typedef struct cpuinfo cpuinfo_t; -+ -+extern void dtrace_cpu_init(void); -+ -+#endif /* CONFIG_DTRACE */ -+#endif /* _LINUX_DTRACE_CPU_H_ */ -diff --git a/include/linux/dtrace_cpu_defines.h b/include/linux/dtrace_cpu_defines.h -new file mode 100644 -index 000000000000..f5866a6e95b8 ---- /dev/null -+++ b/include/linux/dtrace_cpu_defines.h -@@ -0,0 +1,2 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#include <linux/dtrace/cpu_defines.h> -diff --git a/include/linux/dtrace_os.h b/include/linux/dtrace_os.h -new file mode 100644 -index 000000000000..5bcd77e08a14 ---- /dev/null -+++ b/include/linux/dtrace_os.h -@@ -0,0 +1,120 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+#ifndef _LINUX_DTRACE_OS_H_ -+#define _LINUX_DTRACE_OS_H_ -+ -+#ifndef HEADERS_CHECK -+ -+#ifdef CONFIG_DTRACE -+ -+#include <linux/ktime.h> -+#include <linux/mm.h> -+#include <linux/notifier.h> -+#include <linux/timekeeper_internal.h> -+#include <asm/unistd.h> -+#include <linux/dtrace_cpu.h> -+#include <linux/dtrace_task.h> -+#include <linux/dtrace_psinfo.h> -+ -+extern struct module *dtrace_kmod; -+ -+extern void __init dtrace_os_init(void); -+extern void __init dtrace_psinfo_os_init(void); -+extern void __init dtrace_task_os_init(void); -+ -+extern void dtrace_mod_pdata_alloc(struct module *); -+extern void dtrace_mod_pdata_free(struct module *); -+extern int dtrace_destroy_prov(struct module *); -+ -+extern int dtrace_enable(void); -+extern void dtrace_disable(void); -+ -+extern ktime_t dtrace_gethrtime(void); -+extern ktime_t dtrace_getwalltime(void); -+ -+enum dtrace_vtime_state { -+ DTRACE_VTIME_INACTIVE = 0, -+ DTRACE_VTIME_ACTIVE -+}; -+ -+extern enum dtrace_vtime_state dtrace_vtime_active; -+ -+typedef void for_each_module_fn(void *, struct module *); -+extern void dtrace_for_each_module(for_each_module_fn *fn, void *arg); -+ -+extern void dtrace_update_time(struct timekeeper *); -+extern ktime_t dtrace_get_walltime(void); -+ -+extern void dtrace_vtime_enable(void); -+extern void dtrace_vtime_disable(void); -+extern void dtrace_vtime_switch(struct task_struct *, struct task_struct *); -+ -+#include <asm/dtrace_util.h> -+ -+extern int dtrace_instr_size(const asm_instr_t *); -+ -+extern int dtrace_die_notifier(struct notifier_block *, unsigned long, void *); -+ -+#define STACKTRACE_KERNEL 0x01 -+#define STACKTRACE_USER 0x02 -+#define STACKTRACE_TYPE 0x0f -+ -+struct stacktrace_state { -+ uint64_t *pcs; -+ uint64_t *fps; -+ int limit; -+ int depth; -+ int flags; -+}; -+ -+extern void dtrace_stacktrace(struct stacktrace_state *); -+extern void dtrace_user_stacktrace(struct stacktrace_state *); -+extern void dtrace_handle_badaddr(struct pt_regs *); -+extern void dtrace_mod_pdata_init(struct dtrace_module *pdata); -+extern void dtrace_mod_pdata_cleanup(struct dtrace_module *pdata); -+ -+/* -+ * This is only safe to call if we know this is a userspace fault -+ * or that the call happens after early boot. -+ */ -+static inline int dtrace_no_pf(struct pt_regs *regs) -+{ -+ if (unlikely(DTRACE_CPUFLAG_ISSET(CPU_DTRACE_NOFAULT))) { -+ dtrace_handle_badaddr(regs); -+ return 1; -+ } -+ -+ return 0; -+} -+ -+extern void (*dtrace_helpers_cleanup)(struct task_struct *); -+extern void (*dtrace_helpers_fork)(struct task_struct *, struct task_struct *); -+ -+#else -+ -+/* -+ * See arch/x86/mm/fault.c. -+ */ -+ -+#define dtrace_no_pf(ignore) 0 -+ -+/* -+ * See kernel/timekeeper.c -+ */ -+#define dtrace_update_time(ignore) -+ -+/* -+ * See kernel/dtrace/dtrace_os.c -+ */ -+#define dtrace_mod_pdata_alloc(ignore) -+#define dtrace_mod_pdata_free(ignore) -+#define dtrace_destroy_prov(ignore) 1 -+ -+#endif /* CONFIG_DTRACE */ -+ -+#endif /* !HEADERS_CHECK */ -+ -+#endif /* _LINUX_DTRACE_OS_H_ */ -diff --git a/include/linux/dtrace_psinfo.h b/include/linux/dtrace_psinfo.h -new file mode 100644 -index 000000000000..53a9c317a8a3 ---- /dev/null -+++ b/include/linux/dtrace_psinfo.h -@@ -0,0 +1,59 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+#ifndef _LINUX_DTRACE_PSINFO_H_ -+#define _LINUX_DTRACE_PSINFO_H_ -+ -+#ifdef CONFIG_DTRACE -+ -+#define PR_PSARGS_SZ 80 -+#define PR_ARGV_SZ 512 -+#define PR_ENVP_SZ 512 -+ -+/* -+ * DTrace's per-process info (per-tgid). -+ * -+ * All threads in a process share the same structure instance. -+ */ -+struct dtrace_psinfo { -+ atomic_t dtps_usage; -+ unsigned long dtps_argc; -+ char **dtps_argv; -+ unsigned long dtps_envc; -+ char **dtps_envp; -+ char dtps_psargs[PR_PSARGS_SZ]; -+}; -+ -+/* -+ * DTrace psinfo API. Requires struct dtrace_task as its argument. -+ */ -+ -+extern void dtrace_psinfo_alloc(struct task_struct *); -+extern void dtrace_psinfo_free(struct dtrace_psinfo *); -+ -+static inline void dtrace_psinfo_get(struct dtrace_psinfo *psinfo) -+{ -+ if (likely(psinfo)) -+ atomic_inc(&(psinfo)->dtps_usage); -+} -+ -+static inline void dtrace_psinfo_put(struct dtrace_psinfo *psinfo) -+{ -+ if (likely((psinfo))) { -+ if (atomic_dec_and_test(&(psinfo)->dtps_usage)) -+ dtrace_psinfo_free(psinfo); -+ } -+} -+ -+#else /* CONFIG_DTRACE */ -+ -+#define dtrace_psinfo_alloc(ignore) -+#define dtrace_psinfo_free(ignore) -+#define dtrace_psinfo_get(ignore) -+#define dtrace_psinfo_put(ignore) -+ -+#endif /* CONFIG_DTRACE */ -+ -+#endif /* _LINUX_DTRACE_PSINFO_H_ */ -diff --git a/include/linux/dtrace_task.h b/include/linux/dtrace_task.h -new file mode 100644 -index 000000000000..ce7111223788 ---- /dev/null -+++ b/include/linux/dtrace_task.h -@@ -0,0 +1,38 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+#ifndef _LINUX_DTRACE_TASK_H_ -+#define _LINUX_DTRACE_TASK_H_ -+ -+#ifdef CONFIG_DTRACE -+ -+#include <linux/sched.h> -+ -+/* -+ * Opaque handle for per-task data. -+ */ -+struct dtrace_task; -+ -+/* -+ * DTrace's kernel API for per-task data manipulation. -+ */ -+ -+extern void dtrace_task_init(struct task_struct *); -+extern void dtrace_task_exec(struct task_struct *); -+extern void dtrace_task_copy(struct task_struct *, struct task_struct *); -+extern void dtrace_task_free(struct task_struct *); -+extern void dtrace_task_dup(struct task_struct *, struct task_struct *); -+ -+#else /* CONFIG_DTRACE */ -+ -+#define dtrace_task_init(ignore) -+#define dtrace_task_exec(ignore) -+#define dtrace_task_copy(ignore1, ignore2) -+#define dtrace_task_free(ignore) -+#define dtrace_task_dup(ignore1, ignore2) -+ -+#endif /* CONFIG_DTRACE */ -+ -+#endif /* _LINUX_DTRACE_TASK_H_ */ -diff --git a/include/linux/dtrace_task_impl.h b/include/linux/dtrace_task_impl.h -new file mode 100644 -index 000000000000..2f76b475c2f8 ---- /dev/null -+++ b/include/linux/dtrace_task_impl.h -@@ -0,0 +1,28 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. -+ */ -+#ifndef _LINUX_DTRACE_TASK_IMPL_H_ -+#define _LINUX_DTRACE_TASK_IMPL_H_ -+ -+#ifdef CONFIG_DTRACE -+ -+#include <linux/dtrace_task.h> -+#include <linux/dtrace_psinfo.h> -+ -+struct dtrace_task { -+ uint32_t dt_predcache; -+ ktime_t dt_vtime; -+ ktime_t dt_start; -+ uint8_t dt_stop; -+ uint8_t dt_sig; -+ struct dtrace_psinfo *dt_psinfo; -+ void *dt_helpers; -+ uint32_t dt_probes; -+ uint64_t dt_tp_count; -+ void *dt_ustack; -+}; -+ -+#endif /* CONFIG_DTRACE */ -+#endif /* _LINUX_DTRACE_TASK_IMPL_H_ */ -+ -diff --git a/include/linux/dtrace_types.h b/include/linux/dtrace_types.h -new file mode 100644 -index 000000000000..4484dc58e188 ---- /dev/null -+++ b/include/linux/dtrace_types.h -@@ -0,0 +1,13 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+#ifndef _DTRACE_TYPES_H_ -+#define _DTRACE_TYPES_H_ -+ -+typedef uint32_t dtrace_id_t; -+ -+#define DTRACE_IDNONE 0 -+ -+#endif /* _DTRACE_TYPES_H_ */ -diff --git a/include/linux/ktime.h b/include/linux/ktime.h -index d1fb05135665..c8c015bbaeaf 100644 ---- a/include/linux/ktime.h -+++ b/include/linux/ktime.h -@@ -155,6 +155,14 @@ static inline s64 ktime_divns(const ktime_t kt, s64 div) - } - #endif - -+/* -+ * ktime_nz - Check whether a ktime_v variable is non-zero -+ */ -+static inline int ktime_nz(const ktime_t kt) -+{ -+ return kt != 0LL; -+} -+ - static inline s64 ktime_to_us(const ktime_t kt) - { - return ktime_divns(kt, NSEC_PER_USEC); -diff --git a/include/linux/module.h b/include/linux/module.h -index 0d9777ecee92..1411e8cc803c 100644 ---- a/include/linux/module.h -+++ b/include/linux/module.h -@@ -498,6 +498,9 @@ struct module { - struct klp_modinfo *klp_info; - #endif - -+#ifdef CONFIG_DTRACE -+ void *pdata; -+#endif - #ifdef CONFIG_MODULE_UNLOAD - /* What modules depend on me? */ - struct list_head source_list; -diff --git a/include/linux/mutex.h b/include/linux/mutex.h -index aca8f36dfac9..f2861d68ade1 100644 ---- a/include/linux/mutex.h -+++ b/include/linux/mutex.h -@@ -20,6 +20,10 @@ - #include <linux/osq_lock.h> - #include <linux/debug_locks.h> - -+#ifdef CONFIG_SMP -+# include <asm/current.h> -+#endif -+ - struct ww_acquire_ctx; - - /* -@@ -210,4 +214,16 @@ enum mutex_trylock_recursive_enum { - extern /* __deprecated */ __must_check enum mutex_trylock_recursive_enum - mutex_trylock_recursive(struct mutex *lock); - -+#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP) -+static inline int mutex_owned(struct mutex *lock) -+{ -+ return mutex_is_locked(lock) && __mutex_owner(lock) == current; -+} -+#else -+static inline int mutex_owned(struct mutex *lock) -+{ -+ return mutex_is_locked(lock); -+} -+#endif -+ - #endif /* __LINUX_MUTEX_H */ -diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h -index 3dcd617e65ae..a7e72774f17e 100644 ---- a/include/linux/rwlock.h -+++ b/include/linux/rwlock.h -@@ -59,6 +59,13 @@ do { \ - # define do_raw_write_unlock(rwlock) do {arch_write_unlock(&(rwlock)->raw_lock); __release(lock); } while (0) - #endif - -+#ifdef CONFIG_DTRACE -+#define peek_read_can_lock(rwlock) \ -+ arch_peek_read_can_lock(&(rwlock)->raw_lock) -+#define peek_write_can_lock(rwlock) \ -+ arch_peek_write_can_lock(&(rwlock)->raw_lock) -+#endif /* CONFIG_DTRACE */ -+ - /* - * Define the various rw_lock methods. Note we define these - * regardless of whether CONFIG_SMP or CONFIG_PREEMPT are set. The various -diff --git a/include/linux/sched.h b/include/linux/sched.h -index 0323e4f0982a..b3294ca52457 100644 ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -31,6 +31,7 @@ - #include <linux/task_io_accounting.h> - #include <linux/posix-timers.h> - #include <linux/rseq.h> -+#include <linux/dtrace_task.h> - - /* task_struct member predeclarations (sorted alphabetically): */ - struct audit_context; -@@ -1247,6 +1248,9 @@ struct task_struct { - struct request_queue *throttle_queue; - #endif - -+#ifdef CONFIG_DTRACE -+ struct dtrace_task *dt_task; -+#endif - #ifdef CONFIG_UPROBES - struct uprobe_task *utask; - #endif -diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h -index 0ac9112c1bbe..cfd00c13f2aa 100644 ---- a/include/linux/spinlock_up.h -+++ b/include/linux/spinlock_up.h -@@ -69,4 +69,9 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) - - #define arch_spin_is_contended(lock) (((void)(lock), 0)) - -+#ifdef CONFIG_DTRACE -+#define arch_peek_read_can_lock(lock) (((void)(lock), 1)) -+#define arch_peek_write_can_lock(lock) (((void)(lock), 1)) -+#endif /* CONFIG_DTRACE */ -+ - #endif /* __LINUX_SPINLOCK_UP_H */ -diff --git a/include/uapi/linux/dtrace/Kbuild b/include/uapi/linux/dtrace/Kbuild -new file mode 100644 -index 000000000000..0cb5b941b72b ---- /dev/null -+++ b/include/uapi/linux/dtrace/Kbuild -@@ -0,0 +1,35 @@ -+# UAPI Header export list -+header-y += actions_defines.h -+header-y += actions.h -+header-y += arg_defines.h -+header-y += arg.h -+header-y += buffer_defines.h -+header-y += buffer.h -+header-y += conf_defines.h -+header-y += conf.h -+header-y += cpu_defines.h -+header-y += dif_defines.h -+header-y += dif.h -+header-y += difo_defines.h -+header-y += difo.h -+header-y += dof_defines.h -+header-y += dof.h -+header-y += dtrace.h -+header-y += enabling_defines.h -+header-y += enabling.h -+header-y += fasttrap_defines.h -+header-y += fasttrap.h -+header-y += fasttrap_ioctl.h -+header-y += faults_defines.h -+header-y += faults.h -+header-y += helpers_defines.h -+header-y += helpers.h -+header-y += ioctl.h -+header-y += metadesc_defines.h -+header-y += metadesc.h -+header-y += options_defines.h -+header-y += options.h -+header-y += stability_defines.h -+header-y += stability.h -+header-y += status.h -+header-y += universal.h -diff --git a/include/uapi/linux/dtrace/actions.h b/include/uapi/linux/dtrace/actions.h -new file mode 100644 -index 000000000000..9b47343271ba ---- /dev/null -+++ b/include/uapi/linux/dtrace/actions.h -@@ -0,0 +1,14 @@ -+/* SPDX-License-Identifier: UPL-1.0 */ -+/* -+ * Licensed under the Universal Permissive License v 1.0 as shown at -+ * http://oss.oracle.com/licenses/upl. -+ * -+ * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+#ifndef _LINUX_DTRACE_ACTIONS_H -+#define _LINUX_DTRACE_ACTIONS_H -+ -+#include <linux/dtrace/actions_defines.h> -+ -+#endif /* _LINUX_DTRACE_ACTIONS_H */ -diff --git a/include/uapi/linux/dtrace/actions_defines.h b/include/uapi/linux/dtrace/actions_defines.h -new file mode 100644 -index 000000000000..4512c291f58a ---- /dev/null -+++ b/include/uapi/linux/dtrace/actions_defines.h -@@ -0,0 +1,181 @@ -+/* SPDX-License-Identifier: UPL-1.0 */ -+/* -+ * Licensed under the Universal Permissive License v 1.0 as shown at -+ * http://oss.oracle.com/licenses/upl. -+ * -+ * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _LINUX_DTRACE_ACTIONS_DEFINES_H -+#define _LINUX_DTRACE_ACTIONS_DEFINES_H -+ -+#include <linux/dtrace/universal.h> -+ -+/* -+ * The upper byte determines the class of the action; the low bytes determines -+ * the specific action within that class. The classes of actions are as -+ * follows: -+ * -+ * [ no class ] <= May record process- or kernel-related data -+ * DTRACEACT_PROC <= Only records process-related data -+ * DTRACEACT_PROC_DESTRUCTIVE <= Potentially destructive to processes -+ * DTRACEACT_KERNEL <= Only records kernel-related data -+ * DTRACEACT_KERNEL_DESTRUCTIVE <= Potentially destructive to the kernel -+ * DTRACEACT_SPECULATIVE <= Speculation-related action -+ * DTRACEACT_AGGREGATION <= Aggregating action -+ */ -+#define DTRACEACT_NONE 0 /* no action */ -+#define DTRACEACT_DIFEXPR 1 /* action is DIF expression */ -+#define DTRACEACT_EXIT 2 /* exit() action */ -+#define DTRACEACT_PRINTF 3 /* printf() action */ -+#define DTRACEACT_PRINTA 4 /* printa() action */ -+#define DTRACEACT_LIBACT 5 /* library-controlled action */ -+#define DTRACEACT_TRACEMEM 6 /* tracemem() action */ -+#define DTRACEACT_PCAP 7 /* pcap() action */ -+ -+#define DTRACEACT_PROC 0x0100 -+#define DTRACEACT_USTACK (DTRACEACT_PROC + 1) -+#define DTRACEACT_JSTACK (DTRACEACT_PROC + 2) -+#define DTRACEACT_USYM (DTRACEACT_PROC + 3) -+#define DTRACEACT_UMOD (DTRACEACT_PROC + 4) -+#define DTRACEACT_UADDR (DTRACEACT_PROC + 5) -+ -+#define DTRACEACT_PROC_DESTRUCTIVE 0x0200 -+#define DTRACEACT_STOP (DTRACEACT_PROC_DESTRUCTIVE + 1) -+#define DTRACEACT_RAISE (DTRACEACT_PROC_DESTRUCTIVE + 2) -+#define DTRACEACT_SYSTEM (DTRACEACT_PROC_DESTRUCTIVE + 3) -+#define DTRACEACT_FREOPEN (DTRACEACT_PROC_DESTRUCTIVE + 4) -+ -+#define DTRACEACT_PROC_CONTROL 0x0300 -+ -+#define DTRACEACT_KERNEL 0x0400 -+#define DTRACEACT_STACK (DTRACEACT_KERNEL + 1) -+#define DTRACEACT_SYM (DTRACEACT_KERNEL + 2) -+#define DTRACEACT_MOD (DTRACEACT_KERNEL + 3) -+ -+#define DTRACEACT_KERNEL_DESTRUCTIVE 0x0500 -+#define DTRACEACT_BREAKPOINT (DTRACEACT_KERNEL_DESTRUCTIVE + 1) -+#define DTRACEACT_PANIC (DTRACEACT_KERNEL_DESTRUCTIVE + 2) -+#define DTRACEACT_CHILL (DTRACEACT_KERNEL_DESTRUCTIVE + 3) -+ -+#define DTRACEACT_SPECULATIVE 0x0600 -+#define DTRACEACT_SPECULATE (DTRACEACT_SPECULATIVE + 1) -+#define DTRACEACT_COMMIT (DTRACEACT_SPECULATIVE + 2) -+#define DTRACEACT_DISCARD (DTRACEACT_SPECULATIVE + 3) -+ -+#define DTRACEACT_CLASS(x) ((x) & 0xff00) -+ -+#define DTRACEACT_ISAGG(x) \ -+ (DTRACEACT_CLASS(x) == DTRACEACT_AGGREGATION) -+ -+#define DTRACEACT_ISDESTRUCTIVE(x) \ -+ (DTRACEACT_CLASS(x) == DTRACEACT_PROC_DESTRUCTIVE || \ -+ DTRACEACT_CLASS(x) == DTRACEACT_KERNEL_DESTRUCTIVE) -+ -+#define DTRACEACT_ISSPECULATIVE(x) \ -+ (DTRACEACT_CLASS(x) == DTRACEACT_SPECULATIVE) -+ -+#define DTRACEACT_ISPRINTFLIKE(x) \ -+ ((x) == DTRACEACT_PRINTF || (x) == DTRACEACT_PRINTA || \ -+ (x) == DTRACEACT_SYSTEM || (x) == DTRACEACT_FREOPEN) -+ -+/* -+ * DTrace Aggregating Actions -+ * -+ * These are functions f(x) for which the following is true: -+ * -+ * f(f(x_0) U f(x_1) U ... U f(x_n)) = f(x_0 U x_1 U ... U x_n) -+ * -+ * where x_n is a set of arbitrary data. Aggregating actions are in their own -+ * DTrace action class, DTTRACEACT_AGGREGATION. The macros provided here allow -+ * for easier processing of the aggregation argument and data payload for a few -+ * aggregating actions (notably: quantize(), lquantize(), and ustack()). -+ */ -+ -+#define DTRACEACT_AGGREGATION 0x0700 -+#define DTRACEAGG_COUNT (DTRACEACT_AGGREGATION + 1) -+#define DTRACEAGG_MIN (DTRACEACT_AGGREGATION + 2) -+#define DTRACEAGG_MAX (DTRACEACT_AGGREGATION + 3) -+#define DTRACEAGG_AVG (DTRACEACT_AGGREGATION + 4) -+#define DTRACEAGG_SUM (DTRACEACT_AGGREGATION + 5) -+#define DTRACEAGG_STDDEV (DTRACEACT_AGGREGATION + 6) -+#define DTRACEAGG_QUANTIZE (DTRACEACT_AGGREGATION + 7) -+#define DTRACEAGG_LQUANTIZE (DTRACEACT_AGGREGATION + 8) -+#define DTRACEAGG_LLQUANTIZE (DTRACEACT_AGGREGATION + 9) -+ -+#define DTRACE_QUANTIZE_NBUCKETS \ -+ (((sizeof(uint64_t) * NBBY) - 1) * 2 + 1) -+ -+#define DTRACE_QUANTIZE_ZEROBUCKET ((sizeof(uint64_t) * NBBY) - 1) -+ -+#define DTRACE_QUANTIZE_BUCKETVAL(buck) \ -+ (int64_t)((buck) < DTRACE_QUANTIZE_ZEROBUCKET ? \ -+ -(1LL << (DTRACE_QUANTIZE_ZEROBUCKET - 1 - (buck))) : \ -+ (buck) == DTRACE_QUANTIZE_ZEROBUCKET ? 0 : \ -+ 1LL << ((buck) - DTRACE_QUANTIZE_ZEROBUCKET - 1)) -+ -+#define DTRACE_LQUANTIZE_STEPSHIFT 48 -+#define DTRACE_LQUANTIZE_STEPMASK ((uint64_t)UINT16_MAX << 48) -+#define DTRACE_LQUANTIZE_LEVELSHIFT 32 -+#define DTRACE_LQUANTIZE_LEVELMASK ((uint64_t)UINT16_MAX << 32) -+#define DTRACE_LQUANTIZE_BASESHIFT 0 -+#define DTRACE_LQUANTIZE_BASEMASK UINT32_MAX -+ -+#define DTRACE_LQUANTIZE_STEP(x) \ -+ (uint16_t)(((x) & DTRACE_LQUANTIZE_STEPMASK) >> \ -+ DTRACE_LQUANTIZE_STEPSHIFT) -+ -+#define DTRACE_LQUANTIZE_LEVELS(x) \ -+ (uint16_t)(((x) & DTRACE_LQUANTIZE_LEVELMASK) >> \ -+ DTRACE_LQUANTIZE_LEVELSHIFT) -+ -+#define DTRACE_LQUANTIZE_BASE(x) \ -+ (int32_t)(((x) & DTRACE_LQUANTIZE_BASEMASK) >> \ -+ DTRACE_LQUANTIZE_BASESHIFT) -+ -+#define DTRACE_LLQUANTIZE_STEPSSHIFT 48 -+#define DTRACE_LLQUANTIZE_STEPSMASK ((uint64_t)UINT16_MAX << 48) -+#define DTRACE_LLQUANTIZE_HMAGSHIFT 32 -+#define DTRACE_LLQUANTIZE_HMAGMASK ((uint64_t)UINT16_MAX << 32) -+#define DTRACE_LLQUANTIZE_LMAGSHIFT 16 -+#define DTRACE_LLQUANTIZE_LMAGMASK ((uint64_t)UINT16_MAX << 16) -+#define DTRACE_LLQUANTIZE_FACTORSHIFT 0 -+#define DTRACE_LLQUANTIZE_FACTORMASK UINT16_MAX -+ -+#define DTRACE_LLQUANTIZE_STEPS(x) \ -+ (uint16_t)(((x) & DTRACE_LLQUANTIZE_STEPSMASK) >> \ -+ DTRACE_LLQUANTIZE_STEPSSHIFT) -+ -+#define DTRACE_LLQUANTIZE_HMAG(x) \ -+ (uint16_t)(((x) & DTRACE_LLQUANTIZE_HMAGMASK) >> \ -+ DTRACE_LLQUANTIZE_HMAGSHIFT) -+ -+#define DTRACE_LLQUANTIZE_LMAG(x) \ -+ (uint16_t)(((x) & DTRACE_LLQUANTIZE_LMAGMASK) >> \ -+ DTRACE_LLQUANTIZE_LMAGSHIFT) -+ -+#define DTRACE_LLQUANTIZE_FACTOR(x) \ -+ (uint16_t)(((x) & DTRACE_LLQUANTIZE_FACTORMASK) >> \ -+ DTRACE_LLQUANTIZE_FACTORSHIFT) -+ -+#define DTRACE_USTACK_NFRAMES(x) (uint32_t)((x) & UINT32_MAX) -+#define DTRACE_USTACK_STRSIZE(x) (uint32_t)((x) >> 32) -+#define DTRACE_USTACK_ARG(x, y) \ -+ ((((uint64_t)(y)) << 32) | ((x) & UINT32_MAX)) -+ -+#ifndef _LP64 -+# ifndef _LITTLE_ENDIAN -+# define DTRACE_PTR(type, name) uint32_t name##pad; type *name -+# else -+# define DTRACE_PTR(type, name) type *name; uint32_t name##pad -+# endif -+#else -+# define DTRACE_PTR(type, name) type *name -+#endif -+ -+#endif /* _LINUX_DTRACE_ACTIONS_DEFINES_H */ -diff --git a/include/uapi/linux/dtrace/arg.h b/include/uapi/linux/dtrace/arg.h -new file mode 100644 -index 000000000000..4a9099a816e6 ---- /dev/null -+++ b/include/uapi/linux/dtrace/arg.h -@@ -0,0 +1,42 @@ -+/* SPDX-License-Identifier: UPL-1.0 */ -+/* -+ * Licensed under the Universal Permissive License v 1.0 as shown at -+ * http://oss.oracle.com/licenses/upl. -+ * -+ * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _LINUX_DTRACE_ARG_H -+#define _LINUX_DTRACE_ARG_H -+ -+#include <linux/dtrace/universal.h> -+#include <linux/dtrace/arg_defines.h> -+ -+/* -+ * Because it would waste both space and time, argument types do not reside -+ * with the probe. In order to determine argument types for args[X] -+ * variables, the D compiler queries for argument types on a probe-by-probe -+ * basis. (This optimizes for the common case that arguments are either not -+ * used or used in an untyped fashion.) Typed arguments are specified with a -+ * string of the type name in the dtragd_native member of the argument -+ * description structure. Typed arguments may be further translated to types -+ * of greater stability; the provider indicates such a translated argument by -+ * filling in the dtargd_xlate member with the string of the translated type. -+ * Finally, the provider may indicate which argument value a given argument -+ * maps to by setting the dtargd_mapping member -- allowing a single argument -+ * to map to multiple args[X] variables. -+ */ -+typedef struct dtrace_argdesc { -+ dtrace_id_t dtargd_id; -+ int dtargd_ndx; -+ int dtargd_mapping; -+ char dtargd_native[DTRACE_ARGTYPELEN]; -+ char dtargd_xlate[DTRACE_ARGTYPELEN]; -+} dtrace_argdesc_t; -+ -+#endif /* _LINUX_DTRACE_ARG_H */ -diff --git a/include/uapi/linux/dtrace/arg_defines.h b/include/uapi/linux/dtrace/arg_defines.h -new file mode 100644 -index 000000000000..72862cd1b8e6 ---- /dev/null -+++ b/include/uapi/linux/dtrace/arg_defines.h -@@ -0,0 +1,21 @@ -+/* SPDX-License-Identifier: UPL-1.0 */ -+/* -+ * Licensed under the Universal Permissive License v 1.0 as shown at -+ * http://oss.oracle.com/licenses/upl. -+ * -+ * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _LINUX_DTRACE_ARG_DEFINES_H -+#define _LINUX_DTRACE_ARG_DEFINES_H -+ -+#include <linux/dtrace/universal.h> -+ -+struct dtrace_argdesc; -+ -+#endif /* _LINUX_DTRACE_ARG_DEFINES_H */ -diff --git a/include/uapi/linux/dtrace/buffer.h b/include/uapi/linux/dtrace/buffer.h -new file mode 100644 -index 000000000000..9bbbc4f1f14b ---- /dev/null -+++ b/include/uapi/linux/dtrace/buffer.h -@@ -0,0 +1,43 @@ -+/* SPDX-License-Identifier: UPL-1.0 */ -+/* -+ * Licensed under the Universal Permissive License v 1.0 as shown at -+ * http://oss.oracle.com/licenses/upl. -+ * -+ * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _LINUX_DTRACE_BUFFER_H -+#define _LINUX_DTRACE_BUFFER_H -+ -+#include <linux/dtrace/universal.h> -+#include <linux/dtrace/actions_defines.h> -+#include <linux/dtrace/buffer_defines.h> -+ -+/* -+ * In order to get a snapshot of the principal or aggregation buffer, -+ * user-level passes a buffer description to the kernel with the dtrace_bufdesc -+ * structure. This describes which CPU user-level is interested in, and -+ * where user-level wishes the kernel to snapshot the buffer to (the -+ * dtbd_data field). The kernel uses the same structure to pass back some -+ * information regarding the buffer: the size of data actually copied out, the -+ * number of drops, the number of errors, and the offset of the oldest record. -+ * If the buffer policy is a "switch" policy, taking a snapshot of the -+ * principal buffer has the additional effect of switching the active and -+ * inactive buffers. Taking a snapshot of the aggregation buffer _always_ has -+ * the additional effect of switching the active and inactive buffers. -+ */ -+typedef struct dtrace_bufdesc { -+ uint64_t dtbd_size; /* size of buffer */ -+ uint32_t dtbd_cpu; /* CPU or DTRACE_CPUALL */ -+ uint32_t dtbd_errors; /* number of errors */ -+ uint64_t dtbd_drops; /* number of drops */ -+ DTRACE_PTR(char, dtbd_data); /* data */ -+ uint64_t dtbd_oldest; /* offset of oldest record */ -+} dtrace_bufdesc_t; -+ -+#endif /* _LINUX_DTRACE_BUFFER_H */ -diff --git a/include/uapi/linux/dtrace/buffer_defines.h b/include/uapi/linux/dtrace/buffer_defines.h -new file mode 100644 -index 000000000000..16c3c193618a ---- /dev/null -+++ b/include/uapi/linux/dtrace/buffer_defines.h -@@ -0,0 +1,21 @@ -+/* SPDX-License-Identifier: UPL-1.0 */ -+/* -+ * Licensed under the Universal Permissive License v 1.0 as shown at -+ * http://oss.oracle.com/licenses/upl. -+ * -+ * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _LINUX_DTRACE_BUFFER_DEFINES_H -+#define _LINUX_DTRACE_BUFFER_DEFINES_H -+ -+#include <linux/dtrace/universal.h> -+ -+struct dtrace_bufdesc; -+ -+#endif /* _LINUX_DTRACE_BUFFER_DEFINES_H */ -diff --git a/include/uapi/linux/dtrace/conf.h b/include/uapi/linux/dtrace/conf.h -new file mode 100644 -index 000000000000..95b201958f4c ---- /dev/null -+++ b/include/uapi/linux/dtrace/conf.h -@@ -0,0 +1,35 @@ -+/* SPDX-License-Identifier: UPL-1.0 */ -+/* -+ * Licensed under the Universal Permissive License v 1.0 as shown at -+ * http://oss.oracle.com/licenses/upl. -+ * -+ * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _LINUX_DTRACE_CONF_H -+#define _LINUX_DTRACE_CONF_H -+ -+#include <linux/dtrace/universal.h> -+#include <linux/dtrace/conf_defines.h> -+ -+/* -+ * User-level may need to understand some elements of the kernel DTrace -+ * configuration in order to generate correct DIF. This information is -+ * conveyed via the dtrace_conf structure. -+ */ -+typedef struct dtrace_conf { -+ uint_t dtc_difversion; /* supported DIF version */ -+ uint_t dtc_difintregs; /* # of DIF integer registers */ -+ uint_t dtc_diftupregs; /* # of DIF tuple registers */ -+ uint_t dtc_ctfmodel; /* CTF data model */ -+ /* Deviation from Solaris... Used to just be 8 padding entries. */ -+ uint_t dtc_maxbufs; /* max # of buffers */ -+ uint_t dtc_pad[7]; /* reserved for future use */ -+} dtrace_conf_t; -+ -+#endif /* _LINUX_DTRACE_CONF_H */ -diff --git a/include/uapi/linux/dtrace/conf_defines.h b/include/uapi/linux/dtrace/conf_defines.h -new file mode 100644 -index 000000000000..5c4a1cb5d37c ---- /dev/null -+++ b/include/uapi/linux/dtrace/conf_defines.h -@@ -0,0 +1,21 @@ -+/* SPDX-License-Identifier: UPL-1.0 */ -+/* -+ * Licensed under the Universal Permissive License v 1.0 as shown at -+ * http://oss.oracle.com/licenses/upl. -+ * -+ * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _LINUX_DTRACE_CONF_DEFINES_H -+#define _LINUX_DTRACE_CONF_DEFINES_H -+ -+#include <linux/dtrace/universal.h> -+ -+struct dtrace_conf; -+ -+#endif /* _LINUX_DTRACE_CONF_DEFINES_H */ -diff --git a/include/uapi/linux/dtrace/cpu_defines.h b/include/uapi/linux/dtrace/cpu_defines.h -new file mode 100644 -index 000000000000..a1cd3e410ccc ---- /dev/null -+++ b/include/uapi/linux/dtrace/cpu_defines.h -@@ -0,0 +1,17 @@ -+/* SPDX-License-Identifier: UPL-1.0 */ -+/* -+ * Licensed under the Universal Permissive License v 1.0 as shown at -+ * http://oss.oracle.com/licenses/upl. -+ * -+ * Copyright (c) 2011, 2013, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+#ifndef _LINUX_DTRACE_CPU_DEFINES_H_ -+#define _LINUX_DTRACE_CPU_DEFINES_H_ -+ -+typedef uint32_t processorid_t; -+typedef uint32_t psetid_t; -+typedef uint32_t chipid_t; -+typedef uint32_t lgrp_id_t; -+ -+#endif /* _LINUX_DTRACE_CPU_DEFINES_H_ */ -diff --git a/include/uapi/linux/dtrace/dif.h b/include/uapi/linux/dtrace/dif.h -new file mode 100644 -index 000000000000..92daea17a1f1 ---- /dev/null -+++ b/include/uapi/linux/dtrace/dif.h -@@ -0,0 +1,60 @@ -+/* SPDX-License-Identifier: UPL-1.0 */ -+/* -+ * Licensed under the Universal Permissive License v 1.0 as shown at -+ * http://oss.oracle.com/licenses/upl. -+ * -+ * Copyright (c) 2009, 2018, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _LINUX_DTRACE_DIF_H -+#define _LINUX_DTRACE_DIF_H -+ -+#include <linux/dtrace/universal.h> -+#include <linux/dtrace/dif_defines.h> -+ -+/* -+ * The following definitions describe the DTrace Intermediate Format (DIF), a a -+ * RISC-like instruction set and program encoding used to represent predicates -+ * and actions that can be bound to DTrace probes. The constants below defining -+ * the number of available registers are suggested minimums; the compiler should -+ * use DTRACEIOC_CONF to dynamically obtain the number of registers provided by -+ * the current DTrace implementation. -+ */ -+ -+/* -+ * A DTrace Intermediate Format Type (DIF Type) is used to represent the types -+ * of variables, function and associative array arguments, and the return type -+ * for each DIF object (shown below). It contains a description of the type, -+ * its size in bytes, and a module identifier. -+ */ -+ -+typedef struct dtrace_diftype { -+ uint8_t dtdt_kind; -+ uint8_t dtdt_ckind; -+ uint8_t dtdt_flags; -+ uint8_t dtdt_pad; -+ uint32_t dtdt_size; -+} dtrace_diftype_t; -+ -+/* -+ * A DTrace Intermediate Format variable record is used to describe each of the -+ * variables referenced by a given DIF object. It contains an integer variable -+ * identifier along with variable scope and properties, as shown below. The -+ * size of this structure must be sizeof (int) aligned. -+ */ -+ -+typedef struct dtrace_difv { -+ uint32_t dtdv_name; -+ uint32_t dtdv_id; -+ uint8_t dtdv_kind; -+ uint8_t dtdv_scope; -+ uint16_t dtdv_flags; -+ struct dtrace_diftype dtdv_type; -+} dtrace_difv_t; -+ -+#endif /* _LINUX_DTRACE_DIF_H */ -diff --git a/include/uapi/linux/dtrace/dif_defines.h b/include/uapi/linux/dtrace/dif_defines.h -new file mode 100644 -index 000000000000..80b913f097a2 ---- /dev/null -+++ b/include/uapi/linux/dtrace/dif_defines.h -@@ -0,0 +1,288 @@ -+/* SPDX-License-Identifier: UPL-1.0 */ -+/* -+ * Licensed under the Universal Permissive License v 1.0 as shown at -+ * http://oss.oracle.com/licenses/upl. -+ * -+ * Copyright (c) 2009, 2017, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _LINUX_DTRACE_DIF_DEFINES_H -+#define _LINUX_DTRACE_DIF_DEFINES_H -+ -+#include <linux/dtrace/universal.h> -+ -+/* -+ * The following definitions describe the DTrace Intermediate Format (DIF), a a -+ * RISC-like instruction set and program encoding used to represent predicates -+ * and actions that can be bound to DTrace probes. The constants below defining -+ * the number of available registers are suggested minimums; the compiler should -+ * use DTRACEIOC_CONF to dynamically obtain the number of registers provided by -+ * the current DTrace implementation. -+ */ -+ -+#define DIF_VERSION_1 1 -+#define DIF_VERSION_2 2 -+#define DIF_VERSION DIF_VERSION_2 -+#define DIF_DIR_NREGS 8 /* number of DIF integer registers */ -+#define DIF_DTR_NREGS 8 /* number of DIF tuple registers */ -+ -+#define DIF_OP_OR 1 /* or r1, r2, rd */ -+#define DIF_OP_XOR 2 /* xor r1, r2, rd */ -+#define DIF_OP_AND 3 /* and r1, r2, rd */ -+#define DIF_OP_SLL 4 /* sll r1, r2, rd */ -+#define DIF_OP_SRL 5 /* srl r1, r2, rd */ -+#define DIF_OP_SUB 6 /* sub r1, r2, rd */ -+#define DIF_OP_ADD 7 /* add r1, r2, rd */ -+#define DIF_OP_MUL 8 /* mul r1, r2, rd */ -+#define DIF_OP_SDIV 9 /* sdiv r1, r2, rd */ -+#define DIF_OP_UDIV 10 /* udiv r1, r2, rd */ -+#define DIF_OP_SREM 11 /* srem r1, r2, rd */ -+#define DIF_OP_UREM 12 /* urem r1, r2, rd */ -+#define DIF_OP_NOT 13 /* not r1, rd */ -+#define DIF_OP_MOV 14 /* mov r1, rd */ -+#define DIF_OP_CMP 15 /* cmp r1, r2 */ -+#define DIF_OP_TST 16 /* tst r1 */ -+#define DIF_OP_BA 17 /* ba label */ -+#define DIF_OP_BE 18 /* be label */ -+#define DIF_OP_BNE 19 /* bne label */ -+#define DIF_OP_BG 20 /* bg label */ -+#define DIF_OP_BGU 21 /* bgu label */ -+#define DIF_OP_BGE 22 /* bge label */ -+#define DIF_OP_BGEU 23 /* bgeu label */ -+#define DIF_OP_BL 24 /* bl label */ -+#define DIF_OP_BLU 25 /* blu label */ -+#define DIF_OP_BLE 26 /* ble label */ -+#define DIF_OP_BLEU 27 /* bleu label */ -+#define DIF_OP_LDSB 28 /* ldsb [r1], rd */ -+#define DIF_OP_LDSH 29 /* ldsh [r1], rd */ -+#define DIF_OP_LDSW 30 /* ldsw [r1], rd */ -+#define DIF_OP_LDUB 31 /* ldub [r1], rd */ -+#define DIF_OP_LDUH 32 /* lduh [r1], rd */ -+#define DIF_OP_LDUW 33 /* lduw [r1], rd */ -+#define DIF_OP_LDX 34 /* ldx [r1], rd */ -+#define DIF_OP_RET 35 /* ret rd */ -+#define DIF_OP_NOP 36 /* nop */ -+#define DIF_OP_SETX 37 /* setx intindex, rd */ -+#define DIF_OP_SETS 38 /* sets strindex, rd */ -+#define DIF_OP_SCMP 39 /* scmp r1, r2 */ -+#define DIF_OP_LDGA 40 /* ldga var, ri, rd */ -+#define DIF_OP_LDGS 41 /* ldgs var, rd */ -+#define DIF_OP_STGS 42 /* stgs var, rs */ -+#define DIF_OP_LDTA 43 /* ldta var, ri, rd */ -+#define DIF_OP_LDTS 44 /* ldts var, rd */ -+#define DIF_OP_STTS 45 /* stts var, rs */ -+#define DIF_OP_SRA 46 /* sra r1, r2, rd */ -+#define DIF_OP_CALL 47 /* call subr, rd */ -+#define DIF_OP_PUSHTR 48 /* pushtr type, rs, rr */ -+#define DIF_OP_PUSHTV 49 /* pushtv type, rs, rv */ -+#define DIF_OP_POPTS 50 /* popts */ -+#define DIF_OP_FLUSHTS 51 /* flushts */ -+#define DIF_OP_LDGAA 52 /* ldgaa var, rd */ -+#define DIF_OP_LDTAA 53 /* ldtaa var, rd */ -+#define DIF_OP_STGAA 54 /* stgaa var, rs */ -+#define DIF_OP_STTAA 55 /* sttaa var, rs */ -+#define DIF_OP_LDLS 56 /* ldls var, rd */ -+#define DIF_OP_STLS 57 /* stls var, rs */ -+#define DIF_OP_ALLOCS 58 /* allocs r1, rd */ -+#define DIF_OP_COPYS 59 /* copys r1, r2, rd */ -+#define DIF_OP_STB 60 /* stb r1, [rd] */ -+#define DIF_OP_STH 61 /* sth r1, [rd] */ -+#define DIF_OP_STW 62 /* stw r1, [rd] */ -+#define DIF_OP_STX 63 /* stx r1, [rd] */ -+#define DIF_OP_ULDSB 64 /* uldsb [r1], rd */ -+#define DIF_OP_ULDSH 65 /* uldsh [r1], rd */ -+#define DIF_OP_ULDSW 66 /* uldsw [r1], rd */ -+#define DIF_OP_ULDUB 67 /* uldub [r1], rd */ -+#define DIF_OP_ULDUH 68 /* ulduh [r1], rd */ -+#define DIF_OP_ULDUW 69 /* ulduw [r1], rd */ -+#define DIF_OP_ULDX 70 /* uldx [r1], rd */ -+#define DIF_OP_RLDSB 71 /* rldsb [r1], rd */ -+#define DIF_OP_RLDSH 72 /* rldsh [r1], rd */ -+#define DIF_OP_RLDSW 73 /* rldsw [r1], rd */ -+#define DIF_OP_RLDUB 74 /* rldub [r1], rd */ -+#define DIF_OP_RLDUH 75 /* rlduh [r1], rd */ -+#define DIF_OP_RLDUW 76 /* rlduw [r1], rd */ -+#define DIF_OP_RLDX 77 /* rldx [r1], rd */ -+#define DIF_OP_XLATE 78 /* xlate xlrindex, rd */ -+#define DIF_OP_XLARG 79 /* xlarg xlrindex, rd */ -+ -+#define DIF_INTOFF_MAX 0xffff /* highest integer table offset */ -+#define DIF_STROFF_MAX 0xffff /* highest string table offset */ -+#define DIF_REGISTER_MAX 0xff /* highest register number */ -+#define DIF_VARIABLE_MAX 0xffff /* highest variable identifier */ -+#define DIF_SUBROUTINE_MAX 0xffff /* highest subroutine code */ -+ -+#define DIF_VAR_ARRAY_MIN 0x0000 /* lowest numbered array variable */ -+#define DIF_VAR_ARRAY_UBASE 0x0080 /* lowest user-defined array */ -+#define DIF_VAR_ARRAY_MAX 0x00ff /* highest numbered array variable */ -+ -+#define DIF_VAR_OTHER_MIN 0x0100 /* lowest numbered scalar or assc */ -+#define DIF_VAR_OTHER_UBASE 0x0500 /* lowest user-defined scalar or assc */ -+#define DIF_VAR_OTHER_MAX 0xffff /* highest numbered scalar or assc */ -+ -+#define DIF_VAR_ARGS 0x0000 -+#define DIF_VAR_REGS 0x0001 -+#define DIF_VAR_UREGS 0x0002 -+#define DIF_VAR_CURTHREAD 0x0100 -+#define DIF_VAR_TIMESTAMP 0x0101 -+#define DIF_VAR_VTIMESTAMP 0x0102 -+#define DIF_VAR_IPL 0x0103 -+#define DIF_VAR_EPID 0x0104 -+#define DIF_VAR_ID 0x0105 -+#define DIF_VAR_ARG0 0x0106 -+#define DIF_VAR_ARG1 0x0107 -+#define DIF_VAR_ARG2 0x0108 -+#define DIF_VAR_ARG3 0x0109 -+#define DIF_VAR_ARG4 0x010a -+#define DIF_VAR_ARG5 0x010b -+#define DIF_VAR_ARG6 0x010c -+#define DIF_VAR_ARG7 0x010d -+#define DIF_VAR_ARG8 0x010e -+#define DIF_VAR_ARG9 0x010f -+#define DIF_VAR_STACKDEPTH 0x0110 -+#define DIF_VAR_CALLER 0x0111 -+#define DIF_VAR_PROBEPROV 0x0112 -+#define DIF_VAR_PROBEMOD 0x0113 -+#define DIF_VAR_PROBEFUNC 0x0114 -+#define DIF_VAR_PROBENAME 0x0115 -+#define DIF_VAR_PID 0x0116 -+#define DIF_VAR_TID 0x0117 -+#define DIF_VAR_EXECNAME 0x0118 -+#define DIF_VAR_ZONENAME 0x0119 -+#define DIF_VAR_WALLTIMESTAMP 0x011a -+#define DIF_VAR_USTACKDEPTH 0x011b -+#define DIF_VAR_UCALLER 0x011c -+#define DIF_VAR_PPID 0x011d -+#define DIF_VAR_UID 0x011e -+#define DIF_VAR_GID 0x011f -+#define DIF_VAR_ERRNO 0x0120 -+#define DIF_VAR_CURCPU 0x0121 -+ -+#define DIF_SUBR_RAND 0 -+#define DIF_SUBR_MUTEX_OWNED 1 -+#define DIF_SUBR_MUTEX_OWNER 2 -+#define DIF_SUBR_MUTEX_TYPE_ADAPTIVE 3 -+#define DIF_SUBR_MUTEX_TYPE_SPIN 4 -+#define DIF_SUBR_RW_READ_HELD 5 -+#define DIF_SUBR_RW_WRITE_HELD 6 -+#define DIF_SUBR_RW_ISWRITER 7 -+#define DIF_SUBR_COPYIN 8 -+#define DIF_SUBR_COPYINSTR 9 -+#define DIF_SUBR_SPECULATION 10 -+#define DIF_SUBR_PROGENYOF 11 -+#define DIF_SUBR_STRLEN 12 -+#define DIF_SUBR_COPYOUT 13 -+#define DIF_SUBR_COPYOUTSTR 14 -+#define DIF_SUBR_ALLOCA 15 -+#define DIF_SUBR_BCOPY 16 -+#define DIF_SUBR_COPYINTO 17 -+#define DIF_SUBR_MSGDSIZE 18 -+#define DIF_SUBR_MSGSIZE 19 -+#define DIF_SUBR_GETMAJOR 20 -+#define DIF_SUBR_GETMINOR 21 -+#define DIF_SUBR_DDI_PATHNAME 22 -+#define DIF_SUBR_STRJOIN 23 -+#define DIF_SUBR_LLTOSTR 24 -+#define DIF_SUBR_BASENAME 25 -+#define DIF_SUBR_DIRNAME 26 -+#define DIF_SUBR_CLEANPATH 27 -+#define DIF_SUBR_STRCHR 28 -+#define DIF_SUBR_STRRCHR 29 -+#define DIF_SUBR_STRSTR 30 -+#define DIF_SUBR_STRTOK 31 -+#define DIF_SUBR_SUBSTR 32 -+#define DIF_SUBR_INDEX 33 -+#define DIF_SUBR_RINDEX 34 -+#define DIF_SUBR_HTONS 35 -+#define DIF_SUBR_HTONL 36 -+#define DIF_SUBR_HTONLL 37 -+#define DIF_SUBR_NTOHS 38 -+#define DIF_SUBR_NTOHL 39 -+#define DIF_SUBR_NTOHLL 40 -+#define DIF_SUBR_INET_NTOP 41 -+#define DIF_SUBR_INET_NTOA 42 -+#define DIF_SUBR_INET_NTOA6 43 -+#define DIF_SUBR_D_PATH 44 -+#define DIF_SUBR_LINK_NTOP 45 -+ -+#define DIF_SUBR_MAX 45 -+ -+typedef uint32_t dif_instr_t; -+ -+#define DIF_INSTR_OP(i) (((i) >> 24) & 0xff) -+#define DIF_INSTR_R1(i) (((i) >> 16) & 0xff) -+#define DIF_INSTR_R2(i) (((i) >> 8) & 0xff) -+#define DIF_INSTR_RD(i) ((i) & 0xff) -+#define DIF_INSTR_RS(i) ((i) & 0xff) -+#define DIF_INSTR_LABEL(i) ((i) & 0xffffff) -+#define DIF_INSTR_VAR(i) (((i) >> 8) & 0xffff) -+#define DIF_INSTR_INTEGER(i) (((i) >> 8) & 0xffff) -+#define DIF_INSTR_STRING(i) (((i) >> 8) & 0xffff) -+#define DIF_INSTR_SUBR(i) (((i) >> 8) & 0xffff) -+#define DIF_INSTR_TYPE(i) (((i) >> 16) & 0xff) -+#define DIF_INSTR_XLREF(i) (((i) >> 8) & 0xffff) -+#define DIF_INSTR_FMT(op, r1, r2, d) \ -+ (((op) << 24) | ((r1) << 16) | ((r2) << 8) | (d)) -+ -+#define DIF_INSTR_NOT(r1, d) (DIF_INSTR_FMT(DIF_OP_NOT, r1, 0, d)) -+#define DIF_INSTR_MOV(r1, d) (DIF_INSTR_FMT(DIF_OP_MOV, r1, 0, d)) -+#define DIF_INSTR_CMP(op, r1, r2) (DIF_INSTR_FMT(op, r1, r2, 0)) -+#define DIF_INSTR_TST(r1) (DIF_INSTR_FMT(DIF_OP_TST, r1, 0, 0)) -+#define DIF_INSTR_BRANCH(op, label) (((op) << 24) | (label)) -+#define DIF_INSTR_LOAD(op, r1, d) (DIF_INSTR_FMT(op, r1, 0, d)) -+#define DIF_INSTR_STORE(op, r1, d) (DIF_INSTR_FMT(op, r1, 0, d)) -+#define DIF_INSTR_SETX(i, d) ((DIF_OP_SETX << 24) | ((i) << 8) | (d)) -+#define DIF_INSTR_SETS(s, d) ((DIF_OP_SETS << 24) | ((s) << 8) | (d)) -+#define DIF_INSTR_RET(d) (DIF_INSTR_FMT(DIF_OP_RET, 0, 0, d)) -+#define DIF_INSTR_NOP (DIF_OP_NOP << 24) -+#define DIF_INSTR_LDA(op, v, r, d) (DIF_INSTR_FMT(op, v, r, d)) -+#define DIF_INSTR_LDV(op, v, d) (((op) << 24) | ((v) << 8) | (d)) -+#define DIF_INSTR_STV(op, v, rs) (((op) << 24) | ((v) << 8) | (rs)) -+#define DIF_INSTR_CALL(s, d) ((DIF_OP_CALL << 24) | ((s) << 8) | (d)) -+#define DIF_INSTR_PUSHTS(op, t, r2, rs) (DIF_INSTR_FMT(op, t, r2, rs)) -+#define DIF_INSTR_POPTS (DIF_OP_POPTS << 24) -+#define DIF_INSTR_FLUSHTS (DIF_OP_FLUSHTS << 24) -+#define DIF_INSTR_ALLOCS(r1, d) (DIF_INSTR_FMT(DIF_OP_ALLOCS, r1, 0, d)) -+#define DIF_INSTR_COPYS(r1, r2, d) (DIF_INSTR_FMT(DIF_OP_COPYS, r1, r2, d)) -+#define DIF_INSTR_XLATE(op, r, d) (((op) << 24) | ((r) << 8) | (d)) -+ -+#define DIF_REG_R0 0 -+ -+/* -+ * A DTrace Intermediate Format Type (DIF Type) is used to represent the types -+ * of variables, function and associative array arguments, and the return type -+ * for each DIF object (shown below). It contains a description of the type, -+ * its size in bytes, and a module identifier. -+ */ -+ -+#define DIF_TYPE_CTF 0 -+#define DIF_TYPE_STRING 1 -+ -+#define DIF_TF_BYREF 0x1 -+ -+/* -+ * A DTrace Intermediate Format variable record is used to describe each of the -+ * variables referenced by a given DIF object. It contains an integer variable -+ * identifier along with variable scope and properties, as shown below. The -+ * size of this structure must be sizeof (int) aligned. -+ */ -+ -+#define DIFV_KIND_ARRAY 0 -+#define DIFV_KIND_SCALAR 1 -+ -+#define DIFV_SCOPE_GLOBAL 0 -+#define DIFV_SCOPE_THREAD 1 -+#define DIFV_SCOPE_LOCAL 2 -+ -+#define DIFV_F_REF 0x1 -+#define DIFV_F_MOD 0x2 -+ -+struct dtrace_diftype; -+struct dtrace_difv; -+ -+#endif /* _LINUX_DTRACE_DIF_DEFINES_H */ -diff --git a/include/uapi/linux/dtrace/difo.h b/include/uapi/linux/dtrace/difo.h -new file mode 100644 -index 000000000000..6e9efd7f0a43 ---- /dev/null -+++ b/include/uapi/linux/dtrace/difo.h -@@ -0,0 +1,57 @@ -+/* SPDX-License-Identifier: UPL-1.0 */ -+/* -+ * Licensed under the Universal Permissive License v 1.0 as shown at -+ * http://oss.oracle.com/licenses/upl. -+ * -+ * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _LINUX_DTRACE_DIFO_H -+#define _LINUX_DTRACE_DIFO_H -+ -+#include <linux/dtrace/universal.h> -+#include <linux/dtrace/dif.h> -+#include <linux/dtrace/dof_defines.h> -+ -+/* -+ * A DIFO is used to store the compiled DIF for a D expression, its return -+ * type, and its string and variable tables. The string table is a single -+ * buffer of character data into which sets instructions and variable -+ * references can reference strings using a byte offset. The variable table -+ * is an array of dtrace_difv_t structures that describe the name and type of -+ * each variable and the id used in the DIF code. This structure is described -+ * above in the DIF section of this header file. The DIFO is used at both -+ * user-level (in the library) and in the kernel, but the structure is never -+ * passed between the two: the DOF structures form the only interface. As a -+ * result, the definition can change depending on the presence of _KERNEL. -+ */ -+ -+typedef struct dtrace_difo { -+ dif_instr_t *dtdo_buf; /* instruction buffer */ -+ uint64_t *dtdo_inttab; /* integer table (optional) */ -+ char *dtdo_strtab; /* string table (optional) */ -+ struct dtrace_difv *dtdo_vartab; /* variable table (optional) */ -+ uint_t dtdo_len; /* length of instruction buffer */ -+ uint_t dtdo_intlen; /* length of integer table */ -+ uint_t dtdo_strlen; /* length of string table */ -+ uint_t dtdo_varlen; /* length of variable table */ -+ struct dtrace_diftype dtdo_rtype; /* return type */ -+ uint_t dtdo_refcnt; /* owner reference count */ -+ uint_t dtdo_destructive; /* invokes destructive subroutines */ -+#ifndef _KERNEL -+ struct dtrace_diftype orig_dtdo_rtype; /* original return type */ -+ struct dof_relodesc *dtdo_kreltab; /* kernel relocations */ -+ struct dof_relodesc *dtdo_ureltab; /* user relocations */ -+ struct dt_node **dtdo_xlmtab; /* translator references */ -+ uint_t dtdo_krelen; /* length of krelo table */ -+ uint_t dtdo_urelen; /* length of urelo table */ -+ uint_t dtdo_xlmlen; /* length of translator table */ -+#endif -+} dtrace_difo_t; -+ -+#endif /* _LINUX_DTRACE_DIFO_H */ -diff --git a/include/uapi/linux/dtrace/difo_defines.h b/include/uapi/linux/dtrace/difo_defines.h -new file mode 100644 -index 000000000000..fdd25f2b7691 ---- /dev/null -+++ b/include/uapi/linux/dtrace/difo_defines.h -@@ -0,0 +1,21 @@ -+/* SPDX-License-Identifier: UPL-1.0 */ -+/* -+ * Licensed under the Universal Permissive License v 1.0 as shown at -+ * http://oss.oracle.com/licenses/upl. -+ * -+ * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _LINUX_DTRACE_DIFO_DEFINES_H -+#define _LINUX_DTRACE_DIFO_DEFINES_H -+ -+#include <linux/dtrace/universal.h> -+ -+struct dtrace_difo; -+ -+#endif /* _LINUX_DTRACE_DIFO_DEFINES_H */ -diff --git a/include/uapi/linux/dtrace/dof.h b/include/uapi/linux/dtrace/dof.h -new file mode 100644 -index 000000000000..54c6ca710443 ---- /dev/null -+++ b/include/uapi/linux/dtrace/dof.h -@@ -0,0 +1,196 @@ -+/* SPDX-License-Identifier: UPL-1.0 */ -+/* -+ * Licensed under the Universal Permissive License v 1.0 as shown at -+ * http://oss.oracle.com/licenses/upl. -+ * -+ * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _LINUX_DTRACE_DOF_H -+#define _LINUX_DTRACE_DOF_H -+ -+#include <linux/dtrace/universal.h> -+#include <linux/dtrace/dif.h> -+#include <linux/dtrace/dof_defines.h> -+ -+/* -+ * DTrace programs can be persistently encoded in the DOF format so that they -+ * may be embedded in other programs (for example, in an ELF file) or in the -+ * dtrace driver configuration file for use in anonymous tracing. The DOF -+ * format is versioned and extensible so that it can be revised and so that -+ * internal data structures can be modified or extended compatibly. All DOF -+ * structures use fixed-size types, so the 32-bit and 64-bit representations -+ * are identical and consumers can use either data model transparently. -+ * -+ * The file layout is structured as follows: -+ * -+ * +---------------+-------------------+----- ... ----+---- ... ------+ -+ * | dof_hdr_t | dof_sec_t[ ... ] | loadable | non-loadable | -+ * | (file header) | (section headers) | section data | section data | -+ * +---------------+-------------------+----- ... ----+---- ... ------+ -+ * |<------------ dof_hdr.dofh_loadsz --------------->| | -+ * |<------------ dof_hdr.dofh_filesz ------------------------------->| -+ * -+ * The file header stores meta-data including a magic number, data model for -+ * the instrumentation, data encoding, and properties of the DIF code within. -+ * The header describes its own size and the size of the section headers. By -+ * convention, an array of section headers follows the file header, and then -+ * the data for all loadable sections and unloadable sections. This permits -+ * consumer code to easily download the headers and all loadable data into the -+ * DTrace driver in one contiguous chunk, omitting other extraneous sections. -+ * -+ * The section headers describe the size, offset, alignment, and section type -+ * for each section. Sections are described using a set of #defines that tell -+ * the consumer what kind of data is expected. Sections can contain links to -+ * other sections by storing a dof_secidx_t, an index into the section header -+ * array, inside of the section data structures. The section header includes -+ * an entry size so that sections with data arrays can grow their structures. -+ * -+ * The DOF data itself can contain many snippets of DIF (i.e. >1 DIFOs), which -+ * are represented themselves as a collection of related DOF sections. This -+ * permits us to change the set of sections associated with a DIFO over time, -+ * and also permits us to encode DIFOs that contain different sets of sections. -+ * When a DOF section wants to refer to a DIFO, it stores the dof_secidx_t of a -+ * section of type DOF_SECT_DIFOHDR. This section's data is then an array of -+ * dof_secidx_t's which in turn denote the sections associated with this DIFO. -+ * -+ * This loose coupling of the file structure (header and sections) to the -+ * structure of the DTrace program itself (ECB descriptions, action -+ * descriptions, and DIFOs) permits activities such as relocation processing -+ * to occur in a single pass without having to understand D program structure. -+ * -+ * Finally, strings are always stored in ELF-style string tables along with a -+ * string table section index and string table offset. Therefore strings in -+ * DOF are always arbitrary-length and not bound to the current implementation. -+ */ -+ -+typedef struct dof_hdr { -+ uint8_t dofh_ident[DOF_ID_SIZE];/* ident bytes (see defines) */ -+ uint32_t dofh_flags; /* file attribute flags (if any) */ -+ uint32_t dofh_hdrsize; /* size of file header in bytes */ -+ uint32_t dofh_secsize; /* size of section header in bytes */ -+ uint32_t dofh_secnum; /* number of section headers */ -+ uint64_t dofh_secoff; /* file offset of section headers */ -+ uint64_t dofh_loadsz; /* file size of loadable portion */ -+ uint64_t dofh_filesz; /* file size of entire DOF file */ -+ uint64_t dofh_pad; /* reserved for future use */ -+} dof_hdr_t; -+ -+typedef struct dof_sec { -+ uint32_t dofs_type; /* section type (see defines) */ -+ uint32_t dofs_align; /* section data memory alignment */ -+ uint32_t dofs_flags; /* section flags (if any) */ -+ uint32_t dofs_entsize; /* size of section entry (if table) */ -+ uint64_t dofs_offset; /* offset of section data within file */ -+ uint64_t dofs_size; /* size of section data in bytes */ -+} dof_sec_t; -+ -+ -+typedef struct dof_ecbdesc { -+ dof_secidx_t dofe_probes; /* link to DOF_SECT_PROBEDESC */ -+ dof_secidx_t dofe_pred; /* link to DOF_SECT_DIFOHDR */ -+ dof_secidx_t dofe_actions; /* link to DOF_SECT_ACTDESC */ -+ uint32_t dofe_pad; /* reserved for future use */ -+ uint64_t dofe_uarg; /* user-supplied library argument */ -+} dof_ecbdesc_t; -+ -+typedef struct dof_probedesc { -+ dof_secidx_t dofp_strtab; /* link to DOF_SECT_STRTAB section */ -+ dof_stridx_t dofp_provider; /* provider string */ -+ dof_stridx_t dofp_mod; /* module string */ -+ dof_stridx_t dofp_func; /* function string */ -+ dof_stridx_t dofp_name; /* name string */ -+ uint32_t dofp_id; /* probe identifier (or zero) */ -+} dof_probedesc_t; -+ -+typedef struct dof_actdesc { -+ dof_secidx_t dofa_difo; /* link to DOF_SECT_DIFOHDR */ -+ dof_secidx_t dofa_strtab; /* link to DOF_SECT_STRTAB section */ -+ uint32_t dofa_kind; /* action kind (DTRACEACT_* constant) */ -+ uint32_t dofa_ntuple; /* number of subsequent tuple actions */ -+ uint64_t dofa_arg; /* kind-specific argument */ -+ uint64_t dofa_uarg; /* user-supplied argument */ -+} dof_actdesc_t; -+ -+typedef struct dof_difohdr { -+ struct dtrace_diftype dofd_rtype; /* return type for this fragment */ -+ dof_secidx_t dofd_links[1]; /* variable length array of indices */ -+} dof_difohdr_t; -+ -+typedef struct dof_relohdr { -+ dof_secidx_t dofr_strtab; /* link to DOF_SECT_STRTAB for names */ -+ dof_secidx_t dofr_relsec; /* link to DOF_SECT_RELTAB for relos */ -+ dof_secidx_t dofr_tgtsec; /* link to section we are relocating */ -+} dof_relohdr_t; -+ -+typedef struct dof_relodesc { -+ dof_stridx_t dofr_name; /* string name of relocation symbol */ -+ uint32_t dofr_type; /* relo type (DOF_RELO_* constant) */ -+ uint64_t dofr_offset; /* byte offset for relocation */ -+ uint64_t dofr_data; /* additional type-specific data */ -+} dof_relodesc_t; -+ -+typedef struct dof_optdesc { -+ uint32_t dofo_option; /* option identifier */ -+ dof_secidx_t dofo_strtab; /* string table, if string option */ -+ uint64_t dofo_value; /* option value or string index */ -+} dof_optdesc_t; -+ -+typedef struct dof_provider { -+ dof_secidx_t dofpv_strtab; /* link to DOF_SECT_STRTAB section */ -+ dof_secidx_t dofpv_probes; /* link to DOF_SECT_PROBES section */ -+ dof_secidx_t dofpv_prargs; /* link to DOF_SECT_PRARGS section */ -+ dof_secidx_t dofpv_proffs; /* link to DOF_SECT_PROFFS section */ -+ dof_stridx_t dofpv_name; /* provider name string */ -+ dof_attr_t dofpv_provattr; /* provider attributes */ -+ dof_attr_t dofpv_modattr; /* module attributes */ -+ dof_attr_t dofpv_funcattr; /* function attributes */ -+ dof_attr_t dofpv_nameattr; /* name attributes */ -+ dof_attr_t dofpv_argsattr; /* args attributes */ -+ dof_secidx_t dofpv_prenoffs; /* link to DOF_SECT_PRENOFFS section */ -+} dof_provider_t; -+ -+typedef struct dof_probe { -+ uint64_t dofpr_addr; /* probe base address or offset */ -+ dof_stridx_t dofpr_func; /* probe function string */ -+ dof_stridx_t dofpr_name; /* probe name string */ -+ dof_stridx_t dofpr_nargv; /* native argument type strings */ -+ dof_stridx_t dofpr_xargv; /* translated argument type strings */ -+ uint32_t dofpr_argidx; /* index of first argument mapping */ -+ uint32_t dofpr_offidx; /* index of first offset entry */ -+ uint8_t dofpr_nargc; /* native argument count */ -+ uint8_t dofpr_xargc; /* translated argument count */ -+ uint16_t dofpr_noffs; /* number of offset entries for probe */ -+ uint32_t dofpr_enoffidx; /* index of first is-enabled offset */ -+ uint16_t dofpr_nenoffs; /* number of is-enabled offsets */ -+ uint16_t dofpr_pad1; /* reserved for future use */ -+ uint32_t dofpr_pad2; /* reserved for future use */ -+} dof_probe_t; -+ -+typedef struct dof_xlator { -+ dof_secidx_t dofxl_members; /* link to DOF_SECT_XLMEMBERS section */ -+ dof_secidx_t dofxl_strtab; /* link to DOF_SECT_STRTAB section */ -+ dof_stridx_t dofxl_argv; /* input parameter type strings */ -+ uint32_t dofxl_argc; /* input parameter list length */ -+ dof_stridx_t dofxl_type; /* output type string name */ -+ dof_attr_t dofxl_attr; /* output stability attributes */ -+} dof_xlator_t; -+ -+typedef struct dof_xlmember { -+ dof_secidx_t dofxm_difo; /* member link to DOF_SECT_DIFOHDR */ -+ dof_stridx_t dofxm_name; /* member name */ -+ struct dtrace_diftype dofxm_type; /* member type */ -+} dof_xlmember_t; -+ -+typedef struct dof_xlref { -+ dof_secidx_t dofxr_xlator; /* link to DOF_SECT_XLATORS section */ -+ uint32_t dofxr_member; /* index of referenced dof_xlmember */ -+ uint32_t dofxr_argn; /* index of argument for DIF_OP_XLARG */ -+} dof_xlref_t; -+ -+#endif /* _LINUX_DTRACE_DOF_H */ -diff --git a/include/uapi/linux/dtrace/dof_defines.h b/include/uapi/linux/dtrace/dof_defines.h -new file mode 100644 -index 000000000000..5357d5e099cc ---- /dev/null -+++ b/include/uapi/linux/dtrace/dof_defines.h -@@ -0,0 +1,192 @@ -+/* SPDX-License-Identifier: UPL-1.0 */ -+/* -+ * Licensed under the Universal Permissive License v 1.0 as shown at -+ * http://oss.oracle.com/licenses/upl. -+ * -+ * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _LINUX_DTRACE_DOF_DEFINES_H -+#define _LINUX_DTRACE_DOF_DEFINES_H -+ -+#include <linux/dtrace/universal.h> -+ -+/* -+ * DTrace programs can be persistently encoded in the DOF format so that they -+ * may be embedded in other programs (for example, in an ELF file) or in the -+ * dtrace driver configuration file for use in anonymous tracing. The DOF -+ * format is versioned and extensible so that it can be revised and so that -+ * internal data structures can be modified or extended compatibly. All DOF -+ * structures use fixed-size types, so the 32-bit and 64-bit representations -+ * are identical and consumers can use either data model transparently. -+ * -+ * The file layout is structured as follows: -+ * -+ * +---------------+-------------------+----- ... ----+---- ... ------+ -+ * | dof_hdr_t | dof_sec_t[ ... ] | loadable | non-loadable | -+ * | (file header) | (section headers) | section data | section data | -+ * +---------------+-------------------+----- ... ----+---- ... ------+ -+ * |<------------ dof_hdr.dofh_loadsz --------------->| | -+ * |<------------ dof_hdr.dofh_filesz ------------------------------->| -+ * -+ * The file header stores meta-data including a magic number, data model for -+ * the instrumentation, data encoding, and properties of the DIF code within. -+ * The header describes its own size and the size of the section headers. By -+ * convention, an array of section headers follows the file header, and then -+ * the data for all loadable sections and unloadable sections. This permits -+ * consumer code to easily download the headers and all loadable data into the -+ * DTrace driver in one contiguous chunk, omitting other extraneous sections. -+ * -+ * The section headers describe the size, offset, alignment, and section type -+ * for each section. Sections are described using a set of #defines that tell -+ * the consumer what kind of data is expected. Sections can contain links to -+ * other sections by storing a dof_secidx_t, an index into the section header -+ * array, inside of the section data structures. The section header includes -+ * an entry size so that sections with data arrays can grow their structures. -+ * -+ * The DOF data itself can contain many snippets of DIF (i.e. >1 DIFOs), which -+ * are represented themselves as a collection of related DOF sections. This -+ * permits us to change the set of sections associated with a DIFO over time, -+ * and also permits us to encode DIFOs that contain different sets of sections. -+ * When a DOF section wants to refer to a DIFO, it stores the dof_secidx_t of a -+ * section of type DOF_SECT_DIFOHDR. This section's data is then an array of -+ * dof_secidx_t's which in turn denote the sections associated with this DIFO. -+ * -+ * This loose coupling of the file structure (header and sections) to the -+ * structure of the DTrace program itself (ECB descriptions, action -+ * descriptions, and DIFOs) permits activities such as relocation processing -+ * to occur in a single pass without having to understand D program structure. -+ * -+ * Finally, strings are always stored in ELF-style string tables along with a -+ * string table section index and string table offset. Therefore strings in -+ * DOF are always arbitrary-length and not bound to the current implementation. -+ */ -+ -+#define DOF_ID_SIZE 16 /* total size of dofh_ident[] in bytes */ -+ -+#define DOF_ID_MAG0 0 -+#define DOF_ID_MAG1 1 -+#define DOF_ID_MAG2 2 -+#define DOF_ID_MAG3 3 -+#define DOF_ID_MODEL 4 -+#define DOF_ID_ENCODING 5 -+#define DOF_ID_VERSION 6 -+#define DOF_ID_DIFVERS 7 -+#define DOF_ID_DIFIREG 8 /* DIF integer registers used by compiler */ -+#define DOF_ID_DIFTREG 9 /* DIF tuple registers used by compiler */ -+#define DOF_ID_PAD 10 /* start of padding bytes (all zeroes) */ -+ -+#define DOF_MAG_MAG0 0x7F /* DOF_ID_MAG[0-3] */ -+#define DOF_MAG_MAG1 'D' -+#define DOF_MAG_MAG2 'O' -+#define DOF_MAG_MAG3 'F' -+ -+#define DOF_MAG_STRING "\177DOF" -+#define DOF_MAG_STRLEN 4 -+ -+#define DOF_MODEL_NONE 0 /* DOF_ID_MODEL */ -+#define DOF_MODEL_ILP32 1 -+#define DOF_MODEL_LP64 2 -+ -+#ifdef _LP64 -+#define DOF_MODEL_NATIVE DOF_MODEL_LP64 -+#else -+#define DOF_MODEL_NATIVE DOF_MODEL_ILP32 -+#endif -+ -+#define DOF_ENCODE_NONE 0 /* DOF_ID_ENCODING */ -+#define DOF_ENCODE_LSB 1 -+#define DOF_ENCODE_MSB 2 -+ -+#ifndef _LITTLE_ENDIAN -+#define DOF_ENCODE_NATIVE DOF_ENCODE_MSB -+#else -+#define DOF_ENCODE_NATIVE DOF_ENCODE_LSB -+#endif -+ -+#define DOF_VERSION_1 1 -+#define DOF_VERSION_2 2 -+#define DOF_VERSION DOF_VERSION_2 -+ -+#define DOF_FL_VALID 0 /* mask of all valid dofh_flags bits */ -+ -+typedef uint32_t dof_secidx_t; /* section header table index type */ -+typedef uint32_t dof_stridx_t; /* string table index type */ -+ -+#define DOF_SECIDX_NONE -1U /* null value for section indices */ -+#define DOF_STRIDX_NONE -1U /* null value for string indices */ -+ -+#define DOF_SECT_NONE 0 /* null section */ -+#define DOF_SECT_COMMENTS 1 /* compiler comments */ -+#define DOF_SECT_SOURCE 2 /* D program source code */ -+#define DOF_SECT_ECBDESC 3 /* dof_ecbdesc_t */ -+#define DOF_SECT_PROBEDESC 4 /* dof_probedesc_t */ -+#define DOF_SECT_ACTDESC 5 /* dof_actdesc_t array */ -+#define DOF_SECT_DIFOHDR 6 /* dof_difohdr_t (variable length) */ -+#define DOF_SECT_DIF 7 /* uint32_t array of byte code */ -+#define DOF_SECT_STRTAB 8 /* string table */ -+#define DOF_SECT_VARTAB 9 /* dtrace_difv_t array */ -+#define DOF_SECT_RELTAB 10 /* dof_relodesc_t array */ -+#define DOF_SECT_TYPTAB 11 /* dtrace_diftype_t array */ -+#define DOF_SECT_URELHDR 12 /* dof_relohdr_t (user relocations) */ -+#define DOF_SECT_KRELHDR 13 /* dof_relohdr_t (kernel relocations) */ -+#define DOF_SECT_OPTDESC 14 /* dof_optdesc_t array */ -+#define DOF_SECT_PROVIDER 15 /* dof_provider_t */ -+#define DOF_SECT_PROBES 16 /* dof_probe_t array */ -+#define DOF_SECT_PRARGS 17 /* uint8_t array (probe arg mappings) */ -+#define DOF_SECT_PROFFS 18 /* uint32_t array (probe arg offsets) */ -+#define DOF_SECT_INTTAB 19 /* uint64_t array */ -+#define DOF_SECT_UTSNAME 20 /* struct utsname */ -+#define DOF_SECT_XLTAB 21 /* dof_xlref_t array */ -+#define DOF_SECT_XLMEMBERS 22 /* dof_xlmember_t array */ -+#define DOF_SECT_XLIMPORT 23 /* dof_xlator_t */ -+#define DOF_SECT_XLEXPORT 24 /* dof_xlator_t */ -+#define DOF_SECT_PREXPORT 25 /* dof_secidx_t array (exported objs) */ -+#define DOF_SECT_PRENOFFS 26 /* uint32_t array (enabled offsets) */ -+ -+#define DOF_SECF_LOAD 1 /* section should be loaded */ -+ -+#define DOF_SEC_ISLOADABLE(x) \ -+ (((x) == DOF_SECT_ECBDESC) || ((x) == DOF_SECT_PROBEDESC) || \ -+ ((x) == DOF_SECT_ACTDESC) || ((x) == DOF_SECT_DIFOHDR) || \ -+ ((x) == DOF_SECT_DIF) || ((x) == DOF_SECT_STRTAB) || \ -+ ((x) == DOF_SECT_VARTAB) || ((x) == DOF_SECT_RELTAB) || \ -+ ((x) == DOF_SECT_TYPTAB) || ((x) == DOF_SECT_URELHDR) || \ -+ ((x) == DOF_SECT_KRELHDR) || ((x) == DOF_SECT_OPTDESC) || \ -+ ((x) == DOF_SECT_PROVIDER) || ((x) == DOF_SECT_PROBES) || \ -+ ((x) == DOF_SECT_PRARGS) || ((x) == DOF_SECT_PROFFS) || \ -+ ((x) == DOF_SECT_INTTAB) || ((x) == DOF_SECT_XLTAB) || \ -+ ((x) == DOF_SECT_XLMEMBERS) || ((x) == DOF_SECT_XLIMPORT) || \ -+ ((x) == DOF_SECT_XLIMPORT) || ((x) == DOF_SECT_XLEXPORT) || \ -+ ((x) == DOF_SECT_PREXPORT) || ((x) == DOF_SECT_PRENOFFS)) -+ -+#define DOF_RELO_NONE 0 /* empty relocation entry */ -+#define DOF_RELO_SETX 1 /* relocate setx value */ -+ -+typedef uint32_t dof_attr_t; /* encoded stability attributes */ -+ -+#define DOF_ATTR(n, d, c) (((n) << 24) | ((d) << 16) | ((c) << 8)) -+#define DOF_ATTR_NAME(a) (((a) >> 24) & 0xff) -+#define DOF_ATTR_DATA(a) (((a) >> 16) & 0xff) -+#define DOF_ATTR_CLASS(a) (((a) >> 8) & 0xff) -+ -+struct dof_hdr; -+struct dof_sec; -+struct dof_ecbdesc; -+struct dof_probedesc; -+struct dof_actdesc; -+struct dof_difohdr; -+struct dof_relohdr; -+struct dof_relodesc; -+struct dof_optdesc; -+struct dof_provider; -+struct dof_xlator; -+struct dof_xlmember; -+struct dof_xlref; -+ -+#endif /* _LINUX_DTRACE_DOF_DEFINES_H */ -diff --git a/include/uapi/linux/dtrace/dtrace.h b/include/uapi/linux/dtrace/dtrace.h -new file mode 100644 -index 000000000000..0ee9d35876ef ---- /dev/null -+++ b/include/uapi/linux/dtrace/dtrace.h -@@ -0,0 +1,33 @@ -+/* SPDX-License-Identifier: UPL-1.0 */ -+/* -+ * Licensed under the Universal Permissive License v 1.0 as shown at -+ * http://oss.oracle.com/licenses/upl. -+ * -+ * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _LINUX_DTRACE_H_ -+#define _LINUX_DTRACE_H_ -+ -+#include <linux/dtrace/universal.h> -+#include <linux/dtrace/dif.h> -+#include <linux/dtrace/actions.h> -+#include <linux/dtrace/dof.h> -+#include <linux/dtrace/difo.h> -+#include <linux/dtrace/enabling.h> -+#include <linux/dtrace/metadesc.h> -+#include <linux/dtrace/options.h> -+#include <linux/dtrace/buffer.h> -+#include <linux/dtrace/status.h> -+#include <linux/dtrace/conf.h> -+#include <linux/dtrace/faults.h> -+#include <linux/dtrace/arg.h> -+#include <linux/dtrace/stability.h> -+#include <linux/dtrace/helpers.h> -+ -+#endif /* _LINUX_DTRACE_H_ */ -diff --git a/include/uapi/linux/dtrace/enabling.h b/include/uapi/linux/dtrace/enabling.h -new file mode 100644 -index 000000000000..8aac2ab9ea8d ---- /dev/null -+++ b/include/uapi/linux/dtrace/enabling.h -@@ -0,0 +1,76 @@ -+/* SPDX-License-Identifier: UPL-1.0 */ -+/* -+ * Licensed under the Universal Permissive License v 1.0 as shown at -+ * http://oss.oracle.com/licenses/upl. -+ * -+ * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _LINUX_DTRACE_ENABLING_H -+#define _LINUX_DTRACE_ENABLING_H -+ -+#include <linux/dtrace/universal.h> -+#include <linux/dtrace/difo_defines.h> -+#include <linux/dtrace/enabling_defines.h> -+ -+/* -+ * When DTrace is tracking the description of a DTrace enabling entity (probe, -+ * predicate, action, ECB, record, etc.), it does so in a description -+ * structure. These structures all end in "desc", and are used at both -+ * user-level and in the kernel -- but (with the exception of -+ * dtrace_probedesc_t) they are never passed between them. Typically, -+ * user-level will use the description structures when assembling an enabling. -+ * It will then distill those description structures into a DOF object (see -+ * above), and send it into the kernel. The kernel will again use the -+ * description structures to create a description of the enabling as it reads -+ * the DOF. When the description is complete, the enabling will be actually -+ * created -- turning it into the structures that represent the enabling -+ * instead of merely describing it. Not surprisingly, the description -+ * structures bear a strong resemblance to the DOF structures that act as their -+ * conduit. -+ */ -+ -+struct dtrace_predicate; -+ -+typedef struct dtrace_probedesc { -+ dtrace_id_t dtpd_id; /* probe identifier */ -+ char dtpd_provider[DTRACE_PROVNAMELEN]; /* probe provider name */ -+ char dtpd_mod[DTRACE_MODNAMELEN]; /* probe module name */ -+ char dtpd_func[DTRACE_FUNCNAMELEN]; /* probe function name */ -+ char dtpd_name[DTRACE_NAMELEN]; /* probe name */ -+} dtrace_probedesc_t; -+ -+typedef struct dtrace_repldesc { -+ struct dtrace_probedesc dtrpd_match; /* probe descr. to match */ -+ struct dtrace_probedesc dtrpd_create; /* probe descr. to create */ -+} dtrace_repldesc_t; -+ -+typedef struct dtrace_preddesc { -+ struct dtrace_difo *dtpdd_difo; /* pointer to DIF object */ -+ struct dtrace_predicate *dtpdd_predicate; /* pointer to predicate */ -+} dtrace_preddesc_t; -+ -+typedef struct dtrace_actdesc { -+ struct dtrace_difo *dtad_difo; /* pointer to DIF object */ -+ struct dtrace_actdesc *dtad_next; /* next action */ -+ dtrace_actkind_t dtad_kind; /* kind of action */ -+ uint32_t dtad_ntuple; /* number in tuple */ -+ uint64_t dtad_arg; /* action argument */ -+ uint64_t dtad_uarg; /* user argument */ -+ int dtad_refcnt; /* reference count */ -+} dtrace_actdesc_t; -+ -+typedef struct dtrace_ecbdesc { -+ struct dtrace_actdesc *dted_action; /* action description(s) */ -+ struct dtrace_preddesc dted_pred; /* predicate description */ -+ struct dtrace_probedesc dted_probe; /* probe description */ -+ uint64_t dted_uarg; /* library argument */ -+ int dted_refcnt; /* reference count */ -+} dtrace_ecbdesc_t; -+ -+#endif /* _LINUX_DTRACE_ENABLING_H */ -diff --git a/include/uapi/linux/dtrace/enabling_defines.h b/include/uapi/linux/dtrace/enabling_defines.h -new file mode 100644 -index 000000000000..221c3efca015 ---- /dev/null -+++ b/include/uapi/linux/dtrace/enabling_defines.h -@@ -0,0 +1,25 @@ -+/* SPDX-License-Identifier: UPL-1.0 */ -+/* -+ * Licensed under the Universal Permissive License v 1.0 as shown at -+ * http://oss.oracle.com/licenses/upl. -+ * -+ * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _LINUX_DTRACE_ENABLING_DEFINES_H -+#define _LINUX_DTRACE_ENABLING_DEFINES_H -+ -+#include <linux/dtrace/universal.h> -+ -+struct dtrace_probedesc; -+struct dtrace_repldesc; -+struct dtrace_preddesc; -+struct dtrace_actdesc; -+struct dtrace_ecbdesc; -+ -+#endif /* _LINUX_DTRACE_ENABLING_DEFINES_H */ -diff --git a/include/uapi/linux/dtrace/fasttrap.h b/include/uapi/linux/dtrace/fasttrap.h -new file mode 100644 -index 000000000000..4dbf1a2a35cd ---- /dev/null -+++ b/include/uapi/linux/dtrace/fasttrap.h -@@ -0,0 +1,56 @@ -+/* SPDX-License-Identifier: UPL-1.0 */ -+/* -+ * Licensed under the Universal Permissive License v 1.0 as shown at -+ * http://oss.oracle.com/licenses/upl. -+ * -+ * Copyright (c) 2009, 2018, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _LINUX_DTRACE_FASTTRAP_H -+#define _LINUX_DTRACE_FASTTRAP_H -+ -+#include <linux/dtrace/universal.h> -+#include <linux/dtrace/fasttrap_defines.h> -+ -+typedef enum fasttrap_probe_type { -+ DTFTP_NONE = 0, -+ DTFTP_ENTRY, -+ DTFTP_RETURN, -+ DTFTP_OFFSETS, -+ DTFTP_POST_OFFSETS, -+ DTFTP_IS_ENABLED -+} fasttrap_probe_type_t; -+ -+typedef struct fasttrap_probe_spec { -+ pid_t ftps_pid; /* task PID */ -+ enum fasttrap_probe_type ftps_type; /* probe type */ -+ char ftps_func[DTRACE_FUNCNAMELEN]; /* probe function */ -+ char ftps_mod[DTRACE_MODNAMELEN]; /* probe module */ -+ uint64_t ftps_pc; /* probe address */ -+ uint64_t ftps_size; /* function size (in bytes) */ -+ uint8_t ftps_glen; /* glob pattern length */ -+ char ftps_gstr[1]; /* glob pattern string */ -+} fasttrap_probe_spec_t; -+ -+typedef uint8_t fasttrap_instr_t; -+ -+typedef struct fasttrap_instr_query { -+ uint64_t ftiq_pc; -+ pid_t ftiq_pid; -+ fasttrap_instr_t ftiq_instr; -+} fasttrap_instr_query_t; -+ -+/* -+ * Include after the definitions, to get ioctl()s when fasttrap.h is included. -+ * fasttrap_ioctl.h also #includes this header, to get structures when it is -+ * included itself, as is done by headers_check. -+ */ -+ -+#include <linux/dtrace/fasttrap_ioctl.h> -+ -+#endif /* _LINUX_DTRACE_FASTTRAP_H */ -diff --git a/include/uapi/linux/dtrace/fasttrap_defines.h b/include/uapi/linux/dtrace/fasttrap_defines.h -new file mode 100644 -index 000000000000..4bb07564e1c4 ---- /dev/null -+++ b/include/uapi/linux/dtrace/fasttrap_defines.h -@@ -0,0 +1,25 @@ -+/* SPDX-License-Identifier: UPL-1.0 */ -+/* -+ * Licensed under the Universal Permissive License v 1.0 as shown at -+ * http://oss.oracle.com/licenses/upl. -+ * -+ * Copyright (c) 2009, 2019, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _LINUX_DTRACE_FASTTRAP_DEFINES_H -+#define _LINUX_DTRACE_FASTTRAP_DEFINES_H -+ -+#include <linux/dtrace/universal.h> -+ -+#ifndef __cplusplus -+enum fasttrap_probe_type; -+#endif -+struct fasttrap_probe_spec; -+struct fasttrap_instr_query; -+ -+#endif /* _LINUX_DTRACE_FASTTRAP_DEFINES_H */ -diff --git a/include/uapi/linux/dtrace/fasttrap_ioctl.h b/include/uapi/linux/dtrace/fasttrap_ioctl.h -new file mode 100644 -index 000000000000..b5a8b0731fb6 ---- /dev/null -+++ b/include/uapi/linux/dtrace/fasttrap_ioctl.h -@@ -0,0 +1,19 @@ -+/* SPDX-License-Identifier: UPL-1.0 */ -+/* -+ * Licensed under the Universal Permissive License v 1.0 as shown at -+ * http://oss.oracle.com/licenses/upl. -+ * -+ * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+#ifndef _LINUX_DTRACE_FASTRRAP_IOCTL_H_ -+#define _LINUX_DTRACE_FASTTRAP_IOCTL_H_ -+ -+#include <linux/ioctl.h> -+#include <linux/dtrace/fasttrap.h> -+ -+#define FASTTRAPIOC 0xf4 -+#define FASTTRAPIOC_MAKEPROBE _IOW(FASTTRAPIOC, 1, struct fasttrap_probe_spec) -+#define FASTTRAPIOC_GETINSTR _IOR(FASTTRAPIOC, 2, struct fasttrap_instr_query) -+ -+#endif /* _LINUX_DTRACE_FASTTRAP_IOCTL_H_ */ -diff --git a/include/uapi/linux/dtrace/faults.h b/include/uapi/linux/dtrace/faults.h -new file mode 100644 -index 000000000000..afa2ae548fa6 ---- /dev/null -+++ b/include/uapi/linux/dtrace/faults.h -@@ -0,0 +1,20 @@ -+/* SPDX-License-Identifier: UPL-1.0 */ -+/* -+ * Licensed under the Universal Permissive License v 1.0 as shown at -+ * http://oss.oracle.com/licenses/upl. -+ * -+ * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _LINUX_DTRACE_FAULTS_H -+#define _LINUX_DTRACE_FAULTS_H -+ -+#include <linux/dtrace/universal.h> -+#include <linux/dtrace/faults_defines.h> -+ -+#endif /* _LINUX_DTRACE_FAULTS_H */ -diff --git a/include/uapi/linux/dtrace/faults_defines.h b/include/uapi/linux/dtrace/faults_defines.h -new file mode 100644 -index 000000000000..d225f2e847e4 ---- /dev/null -+++ b/include/uapi/linux/dtrace/faults_defines.h -@@ -0,0 +1,39 @@ -+/* SPDX-License-Identifier: UPL-1.0 */ -+/* -+ * Licensed under the Universal Permissive License v 1.0 as shown at -+ * http://oss.oracle.com/licenses/upl. -+ * -+ * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _LINUX_DTRACE_FAULTS_DEFINES_H -+#define _LINUX_DTRACE_FAULTS_DEFINES_H -+ -+#include <linux/dtrace/universal.h> -+ -+/* -+ * The constants below DTRACEFLT_LIBRARY indicate probe processing faults; -+ * constants at or above DTRACEFLT_LIBRARY indicate faults in probe -+ * postprocessing at user-level. Probe processing faults induce an ERROR -+ * probe and are replicated in unistd.d to allow users' ERROR probes to decode -+ * the error condition using thse symbolic labels. -+ */ -+#define DTRACEFLT_UNKNOWN 0 /* Unknown fault */ -+#define DTRACEFLT_BADADDR 1 /* Bad address */ -+#define DTRACEFLT_BADALIGN 2 /* Bad alignment */ -+#define DTRACEFLT_ILLOP 3 /* Illegal operation */ -+#define DTRACEFLT_DIVZERO 4 /* Divide-by-zero */ -+#define DTRACEFLT_NOSCRATCH 5 /* Out of scratch space */ -+#define DTRACEFLT_KPRIV 6 /* Illegal kernel access */ -+#define DTRACEFLT_UPRIV 7 /* Illegal user access */ -+#define DTRACEFLT_TUPOFLOW 8 /* Tuple stack overflow */ -+#define DTRACEFLT_BADSTACK 9 /* Bad stack */ -+ -+#define DTRACEFLT_LIBRARY 1000 /* Library-level fault */ -+ -+#endif /* _LINUX_DTRACE_FAULTS_DEFINES_H */ -diff --git a/include/uapi/linux/dtrace/helpers.h b/include/uapi/linux/dtrace/helpers.h -new file mode 100644 -index 000000000000..553f23994881 ---- /dev/null -+++ b/include/uapi/linux/dtrace/helpers.h -@@ -0,0 +1,101 @@ -+/* SPDX-License-Identifier: UPL-1.0 */ -+/* -+ * Licensed under the Universal Permissive License v 1.0 as shown at -+ * http://oss.oracle.com/licenses/upl. -+ * -+ * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _LINUX_DTRACE_HELPERS_H -+#define _LINUX_DTRACE_HELPERS_H -+ -+#include <linux/dtrace/universal.h> -+#include <linux/dtrace/helpers_defines.h> -+ -+/* -+ * DTrace Helpers -+ * -+ * In general, DTrace establishes probes in processes and takes actions on -+ * processes without knowing their specific user-level structures. Instead of -+ * existing in the framework, process-specific knowledge is contained by the -+ * enabling D program -- which can apply process-specific knowledge by making -+ * appropriate use of DTrace primitives like copyin() and copyinstr() to -+ * operate on user-level data. However, there may exist some specific probes -+ * of particular semantic relevance that the application developer may wish to -+ * explicitly export. For example, an application may wish to export a probe -+ * at the point that it begins and ends certain well-defined transactions. In -+ * addition to providing probes, programs may wish to offer assistance for -+ * certain actions. For example, in highly dynamic environments (e.g., Java), -+ * it may be difficult to obtain a stack trace in terms of meaningful symbol -+ * names (the translation from instruction addresses to corresponding symbol -+ * names may only be possible in situ); these environments may wish to define -+ * a series of actions to be applied in situ to obtain a meaningful stack -+ * trace. -+ * -+ * These two mechanisms -- user-level statically defined tracing and assisting -+ * DTrace actions -- are provided via DTrace _helpers_. Helpers are specified -+ * via DOF, but unlike enabling DOF, helper DOF may contain definitions of -+ * providers, probes and their arguments. If a helper wishes to provide -+ * action assistance, probe descriptions and corresponding DIF actions may be -+ * specified in the helper DOF. For such helper actions, however, the probe -+ * description describes the specific helper: all DTrace helpers have the -+ * provider name "dtrace" and the module name "helper", and the name of the -+ * helper is contained in the function name (for example, the ustack() helper -+ * is named "ustack"). Any helper-specific name may be contained in the name -+ * (for example, if a helper were to have a constructor, it might be named -+ * "dtrace:helper:<helper>:init"). Helper actions are only called when the -+ * action that they are helping is taken. Helper actions may only return DIF -+ * expressions, and may only call the following subroutines: -+ * -+ * alloca() <= Allocates memory out of the consumer's scratch space -+ * bcopy() <= Copies memory to scratch space -+ * copyin() <= Copies memory from user-level into consumer's scratch -+ * copyinto() <= Copies memory into a specific location in scratch -+ * copyinstr() <= Copies a string into a specific location in scratch -+ * -+ * Helper actions may only access the following built-in variables: -+ * -+ * curthread <= Current kthread_t pointer -+ * tid <= Current thread identifier -+ * pid <= Current process identifier -+ * ppid <= Parent process identifier -+ * uid <= Current user ID -+ * gid <= Current group ID -+ * execname <= Current executable name -+ * zonename <= Current zone name -+ * -+ * Helper actions may not manipulate or allocate dynamic variables, but they -+ * may have clause-local and statically-allocated global variables. The -+ * helper action variable state is specific to the helper action -- variables -+ * used by the helper action may not be accessed outside of the helper -+ * action, and the helper action may not access variables that like outside -+ * of it. Helper actions may not load from kernel memory at-large; they are -+ * restricting to loading current user state (via copyin() and variants) and -+ * scratch space. As with probe enablings, helper actions are executed in -+ * program order. The result of the helper action is the result of the last -+ * executing helper expression. -+ * -+ * Helpers -- composed of either providers/probes or probes/actions (or both) -+ * -- are added by opening the "helper" minor node, and issuing an ioctl(2) -+ * (DTRACEHIOC_ADDDOF) that specifies the dof_helper_t structure. This -+ * encapsulates the name and base address of the user-level library or -+ * executable publishing the helpers and probes as well as the DOF that -+ * contains the definitions of those helpers and probes. -+ * -+ * The DTRACEHIOC_ADD and DTRACEHIOC_REMOVE are left in place for legacy -+ * helpers and should no longer be used. No other ioctls are valid on the -+ * helper minor node. -+ */ -+ -+typedef struct dof_helper { -+ char dofhp_mod[DTRACE_MODNAMELEN]; /* executable or library name */ -+ uint64_t dofhp_addr; /* base address of object */ -+ uint64_t dofhp_dof; /* address of helper DOF */ -+} dof_helper_t; -+ -+#endif /* _LINUX_DTRACE_HELPERS_H */ -diff --git a/include/uapi/linux/dtrace/helpers_defines.h b/include/uapi/linux/dtrace/helpers_defines.h -new file mode 100644 -index 000000000000..8bf52f058001 ---- /dev/null -+++ b/include/uapi/linux/dtrace/helpers_defines.h -@@ -0,0 +1,21 @@ -+/* SPDX-License-Identifier: UPL-1.0 */ -+/* -+ * Licensed under the Universal Permissive License v 1.0 as shown at -+ * http://oss.oracle.com/licenses/upl. -+ * -+ * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _LINUX_DTRACE_HELPERS_DEFINES_H -+#define _LINUX_DTRACE_HELPERS_DEFINES_H -+ -+#include <linux/dtrace/universal.h> -+ -+struct dof_helper; -+ -+#endif /* _LINUX_DTRACE_HELPERS_DEFINES_H */ -diff --git a/include/uapi/linux/dtrace/ioctl.h b/include/uapi/linux/dtrace/ioctl.h -new file mode 100644 -index 000000000000..ef2476af2629 ---- /dev/null -+++ b/include/uapi/linux/dtrace/ioctl.h -@@ -0,0 +1,47 @@ -+/* SPDX-License-Identifier: UPL-1.0 */ -+/* -+ * Licensed under the Universal Permissive License v 1.0 as shown at -+ * http://oss.oracle.com/licenses/upl. -+ * -+ * Copyright (c) 2009, 2018, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+#ifndef _LINUX_DTRACE_IOCTL_H_ -+#define _LINUX_DTRACE_IOCTL_H_ -+ -+#include <linux/ioctl.h> -+#include <linux/dtrace/arg.h> -+#include <linux/dtrace/buffer.h> -+#include <linux/dtrace/conf.h> -+#include <linux/dtrace/dof.h> -+#include <linux/dtrace/enabling.h> -+#include <linux/dtrace/helpers.h> -+#include <linux/dtrace/metadesc.h> -+#include <linux/dtrace/stability.h> -+#include <linux/dtrace/status.h> -+#include <linux/dtrace/cpu_defines.h> -+ -+#define DTRACEIOC 0xd4 -+#define DTRACEIOC_PROVIDER _IOR(DTRACEIOC, 1, struct dtrace_providerdesc) -+#define DTRACEIOC_PROBES _IOR(DTRACEIOC, 2, struct dtrace_probedesc) -+#define DTRACEIOC_BUFSNAP _IOR(DTRACEIOC, 4, struct dtrace_bufdesc) -+#define DTRACEIOC_PROBEMATCH _IOR(DTRACEIOC, 5, struct dtrace_probedesc) -+#define DTRACEIOC_ENABLE _IOW(DTRACEIOC, 6, void *) -+#define DTRACEIOC_AGGSNAP _IOR(DTRACEIOC, 7, struct dtrace_bufdesc) -+#define DTRACEIOC_EPROBE _IOW(DTRACEIOC, 8, struct dtrace_eprobedesc) -+#define DTRACEIOC_PROBEARG _IOR(DTRACEIOC, 9, struct dtrace_argdesc) -+#define DTRACEIOC_CONF _IOR(DTRACEIOC, 10, struct dtrace_conf) -+#define DTRACEIOC_STATUS _IOR(DTRACEIOC, 11, struct dtrace_status) -+#define DTRACEIOC_GO _IOW(DTRACEIOC, 12, processorid_t) -+#define DTRACEIOC_STOP _IOW(DTRACEIOC, 13, processorid_t) -+#define DTRACEIOC_AGGDESC _IOR(DTRACEIOC, 15, struct dtrace_aggdesc) -+#define DTRACEIOC_FORMAT _IOR(DTRACEIOC, 16, struct dtrace_fmtdesc) -+#define DTRACEIOC_DOFGET _IOR(DTRACEIOC, 17, struct dof_hdr) -+#define DTRACEIOC_REPLICATE _IOR(DTRACEIOC, 18, void *) -+ -+#define DTRACEHIOC 0xd8 -+#define DTRACEHIOC_ADD _IOW(DTRACEHIOC, 1, struct dof_hdr) -+#define DTRACEHIOC_REMOVE _IOW(DTRACEHIOC, 2, int) -+#define DTRACEHIOC_ADDDOF _IOW(DTRACEHIOC, 3, struct dof_helper) -+ -+#endif /* _LINUX_DTRACE_IOCTL_H */ -diff --git a/include/uapi/linux/dtrace/metadesc.h b/include/uapi/linux/dtrace/metadesc.h -new file mode 100644 -index 000000000000..a6b3d82b2c97 ---- /dev/null -+++ b/include/uapi/linux/dtrace/metadesc.h -@@ -0,0 +1,81 @@ -+/* SPDX-License-Identifier: UPL-1.0 */ -+/* -+ * Licensed under the Universal Permissive License v 1.0 as shown at -+ * http://oss.oracle.com/licenses/upl. -+ * -+ * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _LINUX_DTRACE_METADESC_H -+#define _LINUX_DTRACE_METADESC_H -+ -+#include <linux/dtrace/universal.h> -+#include <linux/dtrace/actions_defines.h> -+#include <linux/dtrace/metadesc_defines.h> -+ -+/* -+ * DTrace separates the trace data stream from the metadata stream. The only -+ * metadata tokens placed in the data stream are enabled probe identifiers -+ * (EPIDs) or (in the case of aggregations) aggregation identifiers. In order -+ * to determine the structure of the data, DTrace consumers pass the token to -+ * the kernel, and receive in return a corresponding description of the enabled -+ * probe (via the dtrace_eprobedesc structure) or the aggregation (via the -+ * dtrace_aggdesc structure). Both of these structures are expressed in terms -+ * of record descriptions (via the dtrace_recdesc structure) that describe the -+ * exact structure of the data. Some record descriptions may also contain a -+ * format identifier; this additional bit of metadata can be retrieved from the -+ * kernel, for which a format description is returned via the dtrace_fmtdesc -+ * structure. Note that all four of these structures must be bitness-neutral -+ * to allow for a 32-bit DTrace consumer on a 64-bit kernel. -+ */ -+typedef struct dtrace_recdesc { -+ dtrace_actkind_t dtrd_action; /* kind of action */ -+ uint32_t dtrd_size; /* size of record */ -+ uint32_t dtrd_offset; /* offset in ECB's data */ -+ uint16_t dtrd_alignment; /* required alignment */ -+ uint16_t dtrd_format; /* format, if any */ -+ uint64_t dtrd_arg; /* action argument */ -+ uint64_t dtrd_uarg; /* user argument */ -+} dtrace_recdesc_t; -+ -+typedef struct dtrace_eprobedesc { -+ dtrace_epid_t dtepd_epid; /* enabled probe ID */ -+ dtrace_id_t dtepd_probeid; /* probe ID */ -+ uint64_t dtepd_uarg; /* library argument */ -+ uint32_t dtepd_size; /* total size */ -+ int dtepd_nrecs; /* number of records */ -+ struct dtrace_recdesc dtepd_rec[1]; /* records themselves */ -+} dtrace_eprobedesc_t; -+ -+typedef struct dtrace_aggdesc { -+ DTRACE_PTR(char, dtagd_name); /* not filled in by kernel */ -+ dtrace_aggvarid_t dtagd_varid; /* not filled in by kernel */ -+ int dtagd_flags; /* not filled in by kernel */ -+ dtrace_aggid_t dtagd_id; /* aggregation ID */ -+ dtrace_epid_t dtagd_epid; /* enabled probe ID */ -+ uint32_t dtagd_size; /* size in bytes */ -+ int dtagd_nrecs; /* number of records */ -+ uint32_t dtagd_pad; /* explicit padding */ -+ struct dtrace_recdesc dtagd_rec[1]; /* record descriptions */ -+} dtrace_aggdesc_t; -+ -+typedef struct dtrace_fmtdesc { -+ DTRACE_PTR(char, dtfd_string); /* format string */ -+ int dtfd_length; /* length of format string */ -+ uint16_t dtfd_format; /* format identifier */ -+} dtrace_fmtdesc_t; -+ -+#define DTRACE_SIZEOF_EPROBEDESC(desc) \ -+ (sizeof(struct dtrace_eprobedesc) + ((desc)->dtepd_nrecs ? \ -+ (((desc)->dtepd_nrecs - 1) * sizeof(struct dtrace_recdesc)) : 0)) -+ -+#define DTRACE_SIZEOF_AGGDESC(desc) \ -+ (sizeof(struct dtrace_aggdesc) + ((desc)->dtagd_nrecs ? \ -+ (((desc)->dtagd_nrecs - 1) * sizeof(struct dtrace_recdesc)) : 0)) -+ -+#endif /* _LINUX_DTRACE_METADESC_H */ -diff --git a/include/uapi/linux/dtrace/metadesc_defines.h b/include/uapi/linux/dtrace/metadesc_defines.h -new file mode 100644 -index 000000000000..b27cc28822c8 ---- /dev/null -+++ b/include/uapi/linux/dtrace/metadesc_defines.h -@@ -0,0 +1,24 @@ -+/* SPDX-License-Identifier: UPL-1.0 */ -+/* -+ * Licensed under the Universal Permissive License v 1.0 as shown at -+ * http://oss.oracle.com/licenses/upl. -+ * -+ * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _LINUX_DTRACE_METADESC_DEFINES_H -+#define _LINUX_DTRACE_METADESC_DEFINES_H -+ -+#include <linux/dtrace/universal.h> -+ -+struct dtrace_recdesc; -+struct dtrace_eprobedesc; -+struct dtrace_aggdesc; -+struct dtrace_fmtdesc; -+ -+#endif /* _LINUX_DTRACE_METADESC_DEFINES_H */ -diff --git a/include/uapi/linux/dtrace/options.h b/include/uapi/linux/dtrace/options.h -new file mode 100644 -index 000000000000..0a652ca2a148 ---- /dev/null -+++ b/include/uapi/linux/dtrace/options.h -@@ -0,0 +1,20 @@ -+/* SPDX-License-Identifier: UPL-1.0 */ -+/* -+ * Licensed under the Universal Permissive License v 1.0 as shown at -+ * http://oss.oracle.com/licenses/upl. -+ * -+ * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _LINUX_DTRACE_OPTIONS_H -+#define _LINUX_DTRACE_OPTIONS_H -+ -+#include <linux/dtrace/universal.h> -+#include <linux/dtrace/options_defines.h> -+ -+#endif /* _LINUX_DTRACE_OPTIONS_H */ -diff --git a/include/uapi/linux/dtrace/options_defines.h b/include/uapi/linux/dtrace/options_defines.h -new file mode 100644 -index 000000000000..26009c84437e ---- /dev/null -+++ b/include/uapi/linux/dtrace/options_defines.h -@@ -0,0 +1,72 @@ -+/* SPDX-License-Identifier: UPL-1.0 */ -+/* -+ * Licensed under the Universal Permissive License v 1.0 as shown at -+ * http://oss.oracle.com/licenses/upl. -+ * -+ * Copyright (c) 2009, 2018, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _LINUX_DTRACE_OPTIONS_DEFINES_H -+#define _LINUX_DTRACE_OPTIONS_DEFINES_H -+ -+#include <linux/dtrace/universal.h> -+ -+/* -+ * Run-time DTrace options are set and retrieved via DOF_SECT_OPTDESC sections -+ * in a DOF image. The dof_optdesc structure contains an option identifier and -+ * an option value. The valid option identifiers are found below; the mapping -+ * between option identifiers and option identifying strings is maintained at -+ * user-level. Note that the value of DTRACEOPT_UNSET is such that all of the -+ * following are potentially valid option values: all positive integers, zero -+ * and negative one. Some options (notably "bufpolicy" and "bufresize") take -+ * predefined tokens as their values; these are defined with -+ * DTRACEOPT_{option}_{token}. -+ */ -+ -+#define DTRACEOPT_BUFSIZE 0 /* buffer size */ -+#define DTRACEOPT_BUFPOLICY 1 /* buffer policy */ -+#define DTRACEOPT_DYNVARSIZE 2 /* dynamic variable size */ -+#define DTRACEOPT_AGGSIZE 3 /* aggregation size */ -+#define DTRACEOPT_SPECSIZE 4 /* speculation size */ -+#define DTRACEOPT_NSPEC 5 /* number of speculations */ -+#define DTRACEOPT_STRSIZE 6 /* string size */ -+#define DTRACEOPT_CLEANRATE 7 /* dynvar cleaning rate */ -+#define DTRACEOPT_CPU 8 /* CPU to trace */ -+#define DTRACEOPT_BUFRESIZE 9 /* buffer resizing policy */ -+#define DTRACEOPT_GRABANON 10 /* grab anonymous state, if any */ -+#define DTRACEOPT_FLOWINDENT 11 /* indent function entry/return */ -+#define DTRACEOPT_QUIET 12 /* only output explicitly traced data */ -+#define DTRACEOPT_STACKFRAMES 13 /* number of stack frames */ -+#define DTRACEOPT_USTACKFRAMES 14 /* number of user stack frames */ -+#define DTRACEOPT_AGGRATE 15 /* aggregation snapshot rate */ -+#define DTRACEOPT_SWITCHRATE 16 /* buffer switching rate */ -+#define DTRACEOPT_STATUSRATE 17 /* status rate */ -+#define DTRACEOPT_DESTRUCTIVE 18 /* destructive actions allowed */ -+#define DTRACEOPT_STACKINDENT 19 /* output indent for stack traces */ -+#define DTRACEOPT_RAWBYTES 20 /* always print bytes in raw form */ -+#define DTRACEOPT_JSTACKFRAMES 21 /* number of jstack() frames */ -+#define DTRACEOPT_JSTACKSTRSIZE 22 /* size of jstack() string table */ -+#define DTRACEOPT_AGGSORTKEY 23 /* sort aggregations by key */ -+#define DTRACEOPT_AGGSORTREV 24 /* reverse-sort aggregations */ -+#define DTRACEOPT_AGGSORTPOS 25 /* agg. position to sort on */ -+#define DTRACEOPT_AGGSORTKEYPOS 26 /* agg. key position to sort on */ -+#define DTRACEOPT_QUIETRESIZE 27 /* quieten buffer-resize messages */ -+#define DTRACEOPT_NORESOLVE 28 /* prevent resolution of symbols */ -+#define DTRACEOPT_PCAPSIZE 29 /* number of bytes to be captured */ -+#define DTRACEOPT_MAX 30 /* number of options */ -+ -+#define DTRACEOPT_UNSET (dtrace_optval_t)-2 /* unset option */ -+ -+#define DTRACEOPT_BUFPOLICY_RING 0 /* ring buffer */ -+#define DTRACEOPT_BUFPOLICY_FILL 1 /* fill buffer, then stop */ -+#define DTRACEOPT_BUFPOLICY_SWITCH 2 /* switch buffers */ -+ -+#define DTRACEOPT_BUFRESIZE_AUTO 0 /* automatic resizing */ -+#define DTRACEOPT_BUFRESIZE_MANUAL 1 /* manual resizing */ -+ -+#endif /* _LINUX_DTRACE_OPTIONS_DEFINES_H */ -diff --git a/include/uapi/linux/dtrace/stability.h b/include/uapi/linux/dtrace/stability.h -new file mode 100644 -index 000000000000..380effdab291 ---- /dev/null -+++ b/include/uapi/linux/dtrace/stability.h -@@ -0,0 +1,52 @@ -+/* SPDX-License-Identifier: UPL-1.0 */ -+/* -+ * Licensed under the Universal Permissive License v 1.0 as shown at -+ * http://oss.oracle.com/licenses/upl. -+ * -+ * Copyright (c) 2009, 2015, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _LINUX_DTRACE_STABILITY_H -+#define _LINUX_DTRACE_STABILITY_H -+ -+#include <linux/dtrace/universal.h> -+#include <linux/dtrace/stability_defines.h> -+ -+/* -+ * Each DTrace provider advertises the name and data stability of each of its -+ * probe description components, as well as its architectural dependencies. The -+ * D compiler can query the provider attributes (dtrace_pattr_t) in order to -+ * compute the properties of an input program and report them. -+ */ -+ -+typedef struct dtrace_ppriv { -+ uint32_t dtpp_flags; /* privilege flags */ -+ uid_t dtpp_uid; /* user ID */ -+} dtrace_ppriv_t; -+ -+typedef struct dtrace_attribute { -+ dtrace_stability_t dtat_name; /* entity name stability */ -+ dtrace_stability_t dtat_data; /* entity data stability */ -+ dtrace_class_t dtat_class; /* entity data dependency */ -+} dtrace_attribute_t; -+ -+typedef struct dtrace_pattr { -+ struct dtrace_attribute dtpa_provider; /* provider attributes */ -+ struct dtrace_attribute dtpa_mod; /* module attributes */ -+ struct dtrace_attribute dtpa_func; /* function attributes */ -+ struct dtrace_attribute dtpa_name; /* name attributes */ -+ struct dtrace_attribute dtpa_args; /* args[] attributes */ -+} dtrace_pattr_t; -+ -+typedef struct dtrace_providerdesc { -+ char dtvd_name[DTRACE_PROVNAMELEN]; /* provider name */ -+ struct dtrace_pattr dtvd_attr; /* stability attributes */ -+ struct dtrace_ppriv dtvd_priv; /* privileges required */ -+} dtrace_providerdesc_t; -+ -+#endif /* _LINUX_DTRACE_STABILITY_H */ -diff --git a/include/uapi/linux/dtrace/stability_defines.h b/include/uapi/linux/dtrace/stability_defines.h -new file mode 100644 -index 000000000000..ca58c5c03c2b ---- /dev/null -+++ b/include/uapi/linux/dtrace/stability_defines.h -@@ -0,0 +1,53 @@ -+/* SPDX-License-Identifier: UPL-1.0 */ -+/* -+ * Licensed under the Universal Permissive License v 1.0 as shown at -+ * http://oss.oracle.com/licenses/upl. -+ * -+ * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _LINUX_DTRACE_STABILITY_DEFINES_H -+#define _LINUX_DTRACE_STABILITY_DEFINES_H -+ -+#include <linux/dtrace/universal.h> -+ -+typedef uint8_t dtrace_stability_t; /* stability code */ -+typedef uint8_t dtrace_class_t; /* architectural dependency class */ -+ -+#define DTRACE_STABILITY_INTERNAL 0 /* private to DTrace itself */ -+#define DTRACE_STABILITY_PRIVATE 1 /* private to Sun (see docs) */ -+#define DTRACE_STABILITY_OBSOLETE 2 /* scheduled for removal */ -+#define DTRACE_STABILITY_EXTERNAL 3 /* not controlled by Sun */ -+#define DTRACE_STABILITY_UNSTABLE 4 /* new or rapidly changing */ -+#define DTRACE_STABILITY_EVOLVING 5 /* less rapidly changing */ -+#define DTRACE_STABILITY_STABLE 6 /* mature interface from Sun */ -+#define DTRACE_STABILITY_STANDARD 7 /* industry standard */ -+#define DTRACE_STABILITY_MAX 7 /* maximum valid stability */ -+ -+#define DTRACE_CLASS_UNKNOWN 0 /* unknown architectural dependency */ -+#define DTRACE_CLASS_CPU 1 /* CPU-module-specific */ -+#define DTRACE_CLASS_PLATFORM 2 /* platform-specific (uname -i) */ -+#define DTRACE_CLASS_GROUP 3 /* hardware-group-specific (uname -m) */ -+#define DTRACE_CLASS_ISA 4 /* ISA-specific (uname -p) */ -+#define DTRACE_CLASS_COMMON 5 /* common to all systems */ -+#define DTRACE_CLASS_MAX 5 /* maximum valid class */ -+ -+#define DTRACE_PRIV_NONE 0x0000 -+#define DTRACE_PRIV_KERNEL 0x0001 -+#define DTRACE_PRIV_USER 0x0002 -+#define DTRACE_PRIV_PROC 0x0004 -+#define DTRACE_PRIV_OWNER 0x0008 -+#define DTRACE_PRIV_ALL (DTRACE_PRIV_KERNEL | DTRACE_PRIV_USER | \ -+ DTRACE_PRIV_PROC | DTRACE_PRIV_OWNER) -+ -+struct dtrace_ppriv; -+struct dtrace_attribute; -+struct dtrace_pattr; -+struct dtrace_providerdesc; -+ -+#endif /* _LINUX_DTRACE_STABILITY_DEFINES_H */ -diff --git a/include/uapi/linux/dtrace/status.h b/include/uapi/linux/dtrace/status.h -new file mode 100644 -index 000000000000..dc324199f170 ---- /dev/null -+++ b/include/uapi/linux/dtrace/status.h -@@ -0,0 +1,50 @@ -+/* SPDX-License-Identifier: UPL-1.0 */ -+/* -+ * Licensed under the Universal Permissive License v 1.0 as shown at -+ * http://oss.oracle.com/licenses/upl. -+ * -+ * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _LINUX_DTRACE_STATUS_H -+#define _LINUX_DTRACE_STATUS_H -+ -+#include <linux/dtrace/universal.h> -+ -+/* -+ * The status of DTrace is relayed via the dtrace_status structure. This -+ * structure contains members to count drops other than the capacity drops -+ * available via the buffer interface (see above). This consists of dynamic -+ * drops (including capacity dynamic drops, rinsing drops and dirty drops), and -+ * speculative drops (including capacity speculative drops, drops due to busy -+ * speculative buffers and drops due to unavailable speculative buffers). -+ * Additionally, the status structure contains a field to indicate the number -+ * of "fill"-policy buffers have been filled and a boolean field to indicate -+ * that exit() has been called. If the dtst_exiting field is non-zero, no -+ * further data will be generated until tracing is stopped (at which time any -+ * enablings of the END action will be processed); if user-level sees that -+ * this field is non-zero, tracing should be stopped as soon as possible. -+ */ -+ -+typedef struct dtrace_status { -+ uint64_t dtst_dyndrops; /* dynamic drops */ -+ uint64_t dtst_dyndrops_rinsing; /* dyn drops due to rinsing */ -+ uint64_t dtst_dyndrops_dirty; /* dyn drops due to dirty */ -+ uint64_t dtst_specdrops; /* speculative drops */ -+ uint64_t dtst_specdrops_busy; /* spec drops due to busy */ -+ uint64_t dtst_specdrops_unavail; /* spec drops due to unavail */ -+ uint64_t dtst_errors; /* total errors */ -+ uint64_t dtst_filled; /* number of filled bufs */ -+ uint64_t dtst_stkstroverflows; /* stack string tab overflows */ -+ uint64_t dtst_dblerrors; /* errors in ERROR probes */ -+ char dtst_killed; /* non-zero if killed */ -+ char dtst_exiting; /* non-zero if exit() called */ -+ char dtst_pad[6]; /* pad out to 64-bit align */ -+} dtrace_status_t; -+ -+#endif /* _LINUX_DTRACE_STATUS_H */ -diff --git a/include/uapi/linux/dtrace/universal.h b/include/uapi/linux/dtrace/universal.h -new file mode 100644 -index 000000000000..5c2f3f838fef ---- /dev/null -+++ b/include/uapi/linux/dtrace/universal.h -@@ -0,0 +1,47 @@ -+/* SPDX-License-Identifier: UPL-1.0 */ -+/* -+ * Licensed under the Universal Permissive License v 1.0 as shown at -+ * http://oss.oracle.com/licenses/upl. -+ * -+ * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _LINUX_DTRACE_UNIVERSAL_H_ -+#define _LINUX_DTRACE_UNIVERSAL_H_ -+ -+#define DTRACE_CPUALL -1 /* all CPUs */ -+#define DTRACE_IDNONE 0 /* invalid probe identifier */ -+#define DTRACE_EPIDNONE 0 /* invalid enabled probe identifier */ -+#define DTRACE_AGGIDNONE 0 /* invalid aggregation identifier */ -+#define DTRACE_AGGVARIDNONE 0 /* invalid aggregation variable ID */ -+#define DTRACE_CACHEIDNONE 0 /* invalid predicate cache */ -+#define DTRACE_PROVNONE 0 /* invalid provider identifier */ -+#define DTRACE_METAPROVNONE 0 /* invalid meta-provider identifier */ -+#define DTRACE_ARGNONE -1 /* invalid argument index */ -+ -+#define DTRACE_PROVNAMELEN 64 -+#define DTRACE_MODNAMELEN 64 -+#define DTRACE_FUNCNAMELEN 128 -+#define DTRACE_NAMELEN 64 -+#define DTRACE_FULLNAMELEN (DTRACE_PROVNAMELEN + DTRACE_MODNAMELEN + \ -+ DTRACE_FUNCNAMELEN + DTRACE_NAMELEN + 4) -+#define DTRACE_ARGTYPELEN 128 -+ -+typedef uint16_t dtrace_actkind_t; /* action kind */ -+ -+typedef uint32_t dtrace_aggid_t; /* aggregation identifier */ -+typedef uint32_t dtrace_cacheid_t; /* predicate cache identifier */ -+typedef uint32_t dtrace_epid_t; /* enabled probe identifier */ -+typedef uint32_t dtrace_optid_t; /* option identifier */ -+typedef uint32_t dtrace_specid_t; /* speculation identifier */ -+ -+typedef uint64_t dtrace_aggvarid_t; /* aggregation variable id */ -+typedef uint64_t dtrace_genid_t; /* generation identifier */ -+typedef uint64_t dtrace_optval_t; /* option value */ -+ -+#endif /* _LINUX_DTRACE_UNIVERSAL_H_ */ -diff --git a/init/Kconfig b/init/Kconfig -index 1b446780b372..38ef74b01db9 100644 ---- a/init/Kconfig -+++ b/init/Kconfig -@@ -1947,6 +1947,8 @@ config PROFILING - Say Y here to enable the extended profiling support mechanisms used - by profilers such as OProfile. - -+source "kernel/dtrace/Kconfig" -+ - # - # Place an empty function call at each tracepoint site. Can be - # dynamically changed for a probe function. -diff --git a/init/main.c b/init/main.c -index 6bcad75d60ad..4f4fb2735c4b 100644 ---- a/init/main.c -+++ b/init/main.c -@@ -94,6 +94,8 @@ - #include <linux/rodata_test.h> - #include <linux/jump_label.h> - #include <linux/mem_encrypt.h> -+#include <linux/dtrace_cpu.h> -+#include <linux/dtrace_os.h> - - #include <asm/io.h> - #include <asm/bugs.h> -@@ -1030,6 +1032,10 @@ asmlinkage __visible void __init start_kernel(void) - arch_post_acpi_subsys_init(); - sfi_init_late(); - -+#ifdef CONFIG_DTRACE -+ dtrace_os_init(); -+#endif -+ - /* Do the rest non-__init'ed, we're now alive */ - arch_call_rest_init(); - -@@ -1470,6 +1476,10 @@ static noinline void __init kernel_init_freeable(void) - - init_mm_internals(); - -+#ifdef CONFIG_DTRACE -+ dtrace_cpu_init(); -+#endif -+ - do_pre_smp_initcalls(); - lockup_detector_init(); - -diff --git a/kernel/Makefile b/kernel/Makefile -index 4cb4130ced32..5940ffdf7a25 100644 ---- a/kernel/Makefile -+++ b/kernel/Makefile -@@ -115,6 +115,7 @@ obj-$(CONFIG_TORTURE_TEST) += torture.o - - obj-$(CONFIG_HAS_IOMEM) += iomem.o - obj-$(CONFIG_RSEQ) += rseq.o -+obj-$(CONFIG_DTRACE) += dtrace/ - - obj-$(CONFIG_SYSCTL_KUNIT_TEST) += sysctl-test.o - -diff --git a/kernel/dtrace/Kconfig b/kernel/dtrace/Kconfig -new file mode 100644 -index 000000000000..854e4411343f ---- /dev/null -+++ b/kernel/dtrace/Kconfig -@@ -0,0 +1,54 @@ -+# -+# Copyright (c) 2010, 2017, Oracle and/or its affiliates. All rights reserved. -+# -+ -+menuconfig DTRACE -+ bool "DTrace (Dynamic Tracing) Support" -+ default y -+ depends on ARCH_SUPPORTS_DTRACE -+ select KALLSYMS -+ select KALLMODSYMS -+ select WAITFD -+ select CTF -+ help -+ The DTrace dynamic tracing framework. -+ -+if DTRACE -+ -+config DT_CORE -+ tristate "DTrace core" -+ default m -+ help -+ The core of DTrace: needed for all providers. -+ -+if DT_CORE -+ -+config DT_DT_TEST -+ tristate "DTrace Test Probe" -+ default m -+ help -+ A test provider used by the testsuite. -+ -+config DT_DEBUG -+ bool "DTrace debugging" -+ default m -+ help -+ This controls the inclusion of various piece of code that perform -+ internal checks within the DTrace core. It also enables all the -+ assertions within the DTrace code. -+ -+if DT_DEBUG -+ -+config DT_DEBUG_MUTEX -+ bool "DTrace mutex debugging" -+ default n -+ help -+ This controls the use of DTrace specific wrappers to output debug -+ messages whenever a mutex is locked or unlocked within the DTrace -+ code (core and providers). -+ -+endif # DT_DEBUG -+ -+endif # DT_CORE -+ -+endif #DTRACE -diff --git a/kernel/dtrace/Makefile b/kernel/dtrace/Makefile -new file mode 100644 -index 000000000000..872785327c3d ---- /dev/null -+++ b/kernel/dtrace/Makefile -@@ -0,0 +1,12 @@ -+# -+# Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. -+# -+ -+DT_CORE_ARCH_OBJS = $(addprefix ../../arch/$(SRCARCH)/kernel/, \ -+ dtrace_util.o) -+ -+ifdef CONFIG_DT_CORE -+obj-y += cyclic.o dtrace_os.o dtrace_cpu.o \ -+ dtrace_task.o dtrace_psinfo.o \ -+ $(DT_CORE_ARCH_OBJS) -+endif -diff --git a/kernel/dtrace/cyclic.c b/kernel/dtrace/cyclic.c -new file mode 100644 -index 000000000000..6497ceee3782 ---- /dev/null -+++ b/kernel/dtrace/cyclic.c -@@ -0,0 +1,526 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: cyclic.c -+ * DESCRIPTION: Minimal cyclic implementation -+ * -+ * Copyright (c) 2010, 2018, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/cpu.h> -+#include <linux/cyclic.h> -+#include <linux/hrtimer.h> -+#include <linux/module.h> -+#include <linux/proc_fs.h> -+#include <linux/seq_file.h> -+#include <linux/slab.h> -+#include <linux/spinlock.h> -+#include <linux/workqueue.h> -+ -+static int omni_enabled; -+ -+#define _CYCLIC_CPU_UNDEF (-1) -+#define _CYCLIC_CPU_OMNI (-2) -+#define CYCLIC_IS_OMNI(cyc) ((cyc)->cpu == _CYCLIC_CPU_OMNI) -+ -+struct cyclic_work { -+ struct work_struct work; -+ struct cyclic *cyc; -+}; -+ -+struct cyclic { -+ struct list_head list; -+ int cpu; -+ union { -+ struct { -+ struct cyc_time when; -+ struct cyc_handler hdlr; -+ uint32_t pend; -+ struct hrtimer timr; -+ struct cyclic_work work; -+ } cyc; -+ struct { -+ struct cyc_omni_handler hdlr; -+ struct list_head cycl; -+ } omni; -+ }; -+}; -+ -+static LIST_HEAD(cyclics); -+ -+static void cyclic_fire(struct work_struct *work) -+{ -+ struct cyclic_work *cwork = (struct cyclic_work *)work; -+ struct cyclic *cyc = cwork->cyc; -+ uint32_t cpnd, npnd; -+ -+ do { -+ /* -+ * We know that the 'pend' counter for the cyclic is non-zero. -+ * So, we can start with calling the handler at least once. -+ */ -+ (*cyc->cyc.hdlr.cyh_func)(cyc->cyc.hdlr.cyh_arg); -+ -+again: -+ /* -+ * The 'pend' counter may be modified by cyclic_expire() while -+ * we go through this loop. We use an atomic compare-and-set -+ * instruction to determine whether it got changed. If so, we -+ * retrieve the updated 'pend' value and try this again. -+ * -+ * Note that when the cyclic is being removed, the hrtimer will -+ * be cancelled first, which ensures that 'pend' will no longer -+ * be incremented. When that happens, this loop will simply -+ * run through the remaining pending calls, and terminate. -+ */ -+ cpnd = cyc->cyc.pend; -+ npnd = cpnd - 1; -+ if (cmpxchg(&cyc->cyc.pend, cpnd, npnd) != cpnd) -+ goto again; -+ } while (npnd > 0); -+} -+ -+/* -+ * Timer expiration handler for cyclic hrtimers. Cyclic worker functions must -+ * be able to perform a variety of tasks (including calling functions that -+ * could sleep), and therefore they cannot be called from interrupt context. -+ * -+ * We schedule a workqueue to do the actual work. -+ * -+ * But... under heavy load it is possible that the hrtimer will expire again -+ * before the workqueue had a chance to run. That would lead to missed events -+ * which isn't quite acceptable. Therefore, we use a counter to record how -+ * many times the timer has expired vs how many times the handler has been -+ * called. The counter is incremented by this function upon hrtimer expiration -+ * and decremented by the cyclic_fire. Note that the workqueue is responsible -+ * for calling the handler multiple times if the counter indicates that multiple -+ * invocation are pending. -+ * -+ * This function is called as hrtimer handler, and therefore runs in interrupt -+ * context, which by definition will ensure that manipulation of the 'pend' -+ * counter in the cyclic can be done without locking, and changes will appear -+ * atomic to the cyclic_fire(). -+ * -+ * Moral of the story: the handler may not get called at the absolute times as -+ * requested, but it will be called the correct number of times. -+ */ -+static enum hrtimer_restart cyclic_expire(struct hrtimer *timr) -+{ -+ struct cyclic *cyc = container_of(timr, struct cyclic, cyc.timr); -+ -+ /* -+ * High priority cyclics call directly into their handler. This means -+ * that the handler must satisfy all requirements for executing code in -+ * interrupt context. -+ */ -+ if (cyc->cyc.hdlr.cyh_level == CY_HIGH_LEVEL) { -+ (*cyc->cyc.hdlr.cyh_func)(cyc->cyc.hdlr.cyh_arg); -+ goto done; -+ } -+ -+ /* -+ * Increment the 'pend' counter, in case the work is already set to -+ * run. If the counter was 0 upon entry, we need to schedule the -+ * work. If the increment wraps the counter back to 0, we admit -+ * defeat, and reset it to its max value. -+ */ -+ if (cyc->cyc.pend++ == 0) -+ schedule_work_on(cyc->cpu, -+ (struct work_struct *)&cyc->cyc.work); -+ else if (cyc->cyc.pend == 0) -+ cyc->cyc.pend = UINT_MAX; -+ -+done: -+ /* -+ * Prepare the timer for the next expiration. -+ */ -+ if (cyc->cyc.when.cyt_interval == CY_INTERVAL_INF) -+ return HRTIMER_NORESTART; -+ -+ hrtimer_forward_now(timr, cyc->cyc.when.cyt_interval); -+ -+ return HRTIMER_RESTART; -+} -+ -+struct cyclic *cyclic_new(int omni) -+{ -+ struct cyclic *cyc; -+ -+ cyc = kmalloc(sizeof(struct cyclic), GFP_KERNEL); -+ if (cyc == NULL) -+ return NULL; -+ -+ INIT_LIST_HEAD(&cyc->list); -+ -+ if (!omni) { -+ cyc->cpu = _CYCLIC_CPU_UNDEF; -+ cyc->cyc.pend = 0; -+ hrtimer_init(&cyc->cyc.timr, CLOCK_MONOTONIC, -+ HRTIMER_MODE_REL_PINNED); -+ cyc->cyc.timr.function = cyclic_expire; -+ cyc->cyc.work.cyc = cyc; -+ INIT_WORK((struct work_struct *)&cyc->cyc.work, cyclic_fire); -+ } else { -+ cyc->cpu = _CYCLIC_CPU_OMNI; -+ INIT_LIST_HEAD(&cyc->omni.cycl); -+ } -+ -+ return cyc; -+} -+ -+static inline void cyclic_restart(struct cyclic *cyc) -+{ -+ if (cyc->cyc.when.cyt_interval == CY_INTERVAL_INF) -+ return; -+ -+ if (cyc->cyc.when.cyt_when == 0) -+ hrtimer_start(&cyc->cyc.timr, cyc->cyc.when.cyt_interval, -+ HRTIMER_MODE_REL_PINNED); -+ else -+ hrtimer_start(&cyc->cyc.timr, cyc->cyc.when.cyt_when, -+ HRTIMER_MODE_ABS_PINNED); -+} -+ -+/* -+ * Add a new cyclic to the system. -+ */ -+cyclic_id_t cyclic_add(struct cyc_handler *hdlr, struct cyc_time *when) -+{ -+ struct cyclic *cyc; -+ -+ if (hdlr == NULL || when == NULL) -+ return CYCLIC_NONE; -+ -+ cyc = cyclic_new(0); -+ if (cyc == NULL) -+ return CYCLIC_NONE; -+ -+ list_add(&cyc->list, &cyclics); -+ cyc->cpu = smp_processor_id(); -+ cyc->cyc.when = *when; -+ cyc->cyc.hdlr = *hdlr; -+ -+ cyclic_restart(cyc); -+ -+ return (cyclic_id_t)cyc; -+} -+EXPORT_SYMBOL(cyclic_add); -+ -+static void cyclic_omni_xcall(struct cyclic *cyc) -+{ -+ cyclic_restart(cyc); -+} -+ -+/* -+ * Add a new cyclic to the system. -+ */ -+static void cyclic_add_pinned(int cpu, struct cyclic *omni, -+ struct cyc_handler *hdlr, struct cyc_time *when) -+{ -+ struct cyclic *cyc; -+ -+ cyc = cyclic_new(0); -+ if (cyc == NULL) -+ return; -+ -+ list_add(&cyc->list, &omni->omni.cycl); -+ cyc->cpu = cpu; -+ cyc->cyc.when = *when; -+ cyc->cyc.hdlr = *hdlr; -+ -+ smp_call_function_single(cpu, (smp_call_func_t)cyclic_omni_xcall, -+ cyc, 1); -+} -+ -+/* -+ * Start a cyclic on a specific CPU as sub-cyclic to an omni-present cyclic. -+ */ -+static void cyclic_omni_start(struct cyclic *omni, int cpu) -+{ -+ struct cyc_time when; -+ struct cyc_handler hdlr; -+ -+ omni->omni.hdlr.cyo_online(omni->omni.hdlr.cyo_arg, cpu, &hdlr, &when); -+ cyclic_add_pinned(cpu, omni, &hdlr, &when); -+} -+ -+#ifdef CONFIG_HOTPLUG_CPU -+static int cyclic_cpu_offline(unsigned int cpu) -+{ -+ struct cyclic *cyc; -+ -+ list_for_each_entry(cyc, &cyclics, list) { -+ struct cyclic *c, *n; -+ -+ if (!CYCLIC_IS_OMNI(cyc)) -+ continue; -+ -+ list_for_each_entry_safe(c, n, &cyc->omni.cycl, list) { -+ if (c->cpu == cpu) -+ cyclic_remove((cyclic_id_t)c); -+ } -+ } -+ return 0; -+} -+ -+static int cyclic_cpu_online(unsigned int cpu) -+{ -+ struct cyclic *cyc; -+ -+ list_for_each_entry(cyc, &cyclics, list) { -+ struct cyclic *c, *n; -+ -+ if (!CYCLIC_IS_OMNI(cyc)) -+ continue; -+ -+ list_for_each_entry_safe(c, n, &cyc->omni.cycl, list) { -+ if (c->cpu == cpu) -+ break; -+ } -+ -+ if (c->cpu == cpu) -+ continue; -+ -+ cyclic_omni_start(cyc, cpu); -+ } -+ return 0; -+} -+#endif -+ -+/* -+ * Add a new omnipresent cyclic to the system. -+ */ -+cyclic_id_t cyclic_add_omni(struct cyc_omni_handler *omni) -+{ -+ int cpu; -+ struct cyclic *cyc; -+ -+ cyc = cyclic_new(1); -+ if (cyc == NULL) -+ return CYCLIC_NONE; -+ -+ list_add(&cyc->list, &cyclics); -+ cyc->omni.hdlr = *omni; -+ -+ for_each_online_cpu(cpu) -+ cyclic_omni_start(cyc, cpu); -+ -+ return (cyclic_id_t)cyc; -+} -+EXPORT_SYMBOL(cyclic_add_omni); -+ -+/* -+ * Remove a specific cyclic from the system. -+ */ -+void cyclic_remove(cyclic_id_t id) -+{ -+ struct cyclic *cyc = (struct cyclic *)id; -+ -+ if (CYCLIC_IS_OMNI(cyc)) { -+ struct cyclic *child, *n; -+ -+ /* -+ * If this is an omni-present cyclic, we first need to remove -+ * all the associated per-CPU cyclics. Note that the recursive -+ * call into cyclic_remove() for a child cyclic will remove it -+ * from the list of per-CPU cyclics associated with the -+ * omni-present cyclic, so we do not need to handle that here. -+ */ -+ list_for_each_entry_safe(child, n, &cyc->omni.cycl, list) -+ cyclic_remove((cyclic_id_t)child); -+ } else { -+ /* -+ * We know that hrtimer_cancel() will wait for the timer -+ * callback to finish if it is being executed at the time of -+ * making this call. It is therefore guaranteed that 'pend' -+ * will no longer get incremented. -+ * -+ * The call to cancel_work_sync() will wait for the workqueue -+ * handler to finish also, and since the handler always brings -+ * 'pend' down to zero prior to returning, it is guaranteed that -+ * (1) all pending handler calls will be made before -+ * cyclic_remove() returns -+ * (2) the amount of work to do before returning is finite. -+ */ -+ hrtimer_cancel(&cyc->cyc.timr); -+ cancel_work_sync((struct work_struct *)&cyc->cyc.work); -+ } -+ -+ list_del(&cyc->list); -+ kfree(cyc); -+} -+EXPORT_SYMBOL(cyclic_remove); -+ -+struct cyclic_reprog { -+ cyclic_id_t cycid; -+ ktime_t delta; -+}; -+ -+static void cyclic_reprogram_xcall(struct cyclic_reprog *creprog) -+{ -+ cyclic_reprogram(creprog->cycid, creprog->delta); -+} -+ -+/* -+ * Reprogram cyclic to fire with given delta from now. -+ * -+ * The underlying design makes it safe to call cyclic_reprogram from whithin a -+ * cyclic handler without race with cyclic_remove. If called from outside of the -+ * cyclic handler it is up to the owner to ensure to not call cyclic_reprogram -+ * after call to cyclic_remove. -+ * -+ * This function cannot be called from interrupt/bottom half contexts. -+ */ -+void cyclic_reprogram(cyclic_id_t id, ktime_t delta) -+{ -+ struct cyclic *cyc = (struct cyclic *)id; -+ -+ /* -+ * For omni present cyclic we reprogram child for current CPU. -+ */ -+ if (CYCLIC_IS_OMNI(cyc)) { -+ struct cyclic *c, *n; -+ -+ list_for_each_entry_safe(c, n, &cyc->omni.cycl, list) { -+ if (c->cpu != smp_processor_id()) -+ continue; -+ -+ hrtimer_start(&c->cyc.timr, delta, -+ HRTIMER_MODE_ABS_PINNED); -+ -+ break; -+ } -+ -+ return; -+ } -+ -+ /* -+ * Regular cyclic reprogram must ensure that the timer remains bound -+ * to the CPU it was registered on. In case we are called from -+ * different CPU we use xcall to trigger reprogram from correct cpu. -+ */ -+ if (cyc->cpu != smp_processor_id()) { -+ struct cyclic_reprog creprog = { -+ .cycid = id, -+ .delta = delta, -+ }; -+ -+ smp_call_function_single(cyc->cpu, (smp_call_func_t) -+ cyclic_reprogram_xcall, &creprog, 1); -+ } else { -+ hrtimer_start(&cyc->cyc.timr, delta, HRTIMER_MODE_REL_PINNED); -+ } -+} -+EXPORT_SYMBOL(cyclic_reprogram); -+ -+static void *s_start(struct seq_file *seq, loff_t *pos) -+{ -+ loff_t n = *pos; -+ struct cyclic *cyc; -+ -+ list_for_each_entry(cyc, &cyclics, list) { -+ if (n == 0) -+ return cyc; -+ -+ n--; -+ } -+ -+ return NULL; -+} -+ -+static void *s_next(struct seq_file *seq, void *p, loff_t *pos) -+{ -+ struct cyclic *cyc = p; -+ -+ ++*pos; -+ -+ cyc = list_entry(cyc->list.next, struct cyclic, list); -+ if (&cyc->list == &cyclics) -+ return NULL; -+ -+ return cyc; -+} -+ -+static void s_stop(struct seq_file *seq, void *p) -+{ -+} -+ -+static int s_show(struct seq_file *seq, void *p) -+{ -+ struct cyclic *cyc = p; -+ -+ if (CYCLIC_IS_OMNI(cyc)) { -+ struct cyclic *c; -+ -+ seq_puts(seq, "Omni-present cyclic:\n"); -+ list_for_each_entry(c, &cyc->omni.cycl, list) -+ seq_printf(seq, -+ " CPU-%d: %c %lld ns hdlr %pB arg %llx\n", -+ c->cpu, -+ c->cyc.hdlr.cyh_level == CY_HIGH_LEVEL -+ ? 'H' : 'l', -+ c->cyc.when.cyt_interval, -+ c->cyc.hdlr.cyh_func, -+ (uint64_t)c->cyc.hdlr.cyh_arg); -+ } else -+ seq_printf(seq, "CPU-%d: %c %lld ns hdlr %pB arg %llx\n", -+ cyc->cpu, -+ cyc->cyc.hdlr.cyh_level == CY_HIGH_LEVEL -+ ? 'H' : 'l', -+ cyc->cyc.when.cyt_interval, -+ cyc->cyc.hdlr.cyh_func, -+ (uint64_t)cyc->cyc.hdlr.cyh_arg); -+ -+ return 0; -+} -+ -+static const struct seq_operations cyclicinfo_ops = { -+ .start = s_start, -+ .next = s_next, -+ .stop = s_stop, -+ .show = s_show, -+}; -+ -+static int cyclicinfo_open(struct inode *inode, struct file *file) -+{ -+ return seq_open(file, &cyclicinfo_ops); -+} -+ -+static const struct proc_ops proc_cyclicinfo_ops = { -+ .proc_open = cyclicinfo_open, -+ .proc_read = seq_read, -+ .proc_lseek = seq_lseek, -+ .proc_release = seq_release, -+}; -+ -+static int __init cyclic_init(void) -+{ -+ int ret; -+ -+ proc_create("cyclicinfo", 0400, NULL, &proc_cyclicinfo_ops); -+ -+#ifdef CONFIG_HOTPLUG_CPU -+ if (!omni_enabled) { -+ ret = cpuhp_setup_state_nocalls(CPUHP_AP_CYCLIC_STARTING, -+ "Cyclic omni-timer starting", -+ cyclic_cpu_online, -+ cyclic_cpu_offline); -+ if (ret) -+ pr_warn_once("Cannot enable cyclic omni timer\n"); -+ else -+ omni_enabled = 1; -+ } -+#endif -+ -+ return 0; -+} -+module_init(cyclic_init); -diff --git a/kernel/dtrace/dtrace_cpu.c b/kernel/dtrace/dtrace_cpu.c -new file mode 100644 -index 000000000000..1bc6e3bb4ce0 ---- /dev/null -+++ b/kernel/dtrace/dtrace_cpu.c -@@ -0,0 +1,61 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_cpu.c -+ * DESCRIPTION: DTrce - per-CPU state -+ * -+ * Copyright (c) 2010, 2014, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/dtrace_cpu.h> -+#include <linux/module.h> -+#include <asm/dtrace_cpuinfo.h> -+ -+DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_core, dtrace_cpu_core); -+EXPORT_PER_CPU_SYMBOL(dtrace_cpu_core); -+ -+DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo, dtrace_cpu_info); -+EXPORT_PER_CPU_SYMBOL(dtrace_cpu_info); -+ -+void dtrace_cpu_init(void) -+{ -+ int cpu; -+ -+ /* -+ * Force this type into the CTF for the sake of userspace's -+ * ABI requirements. -+ */ -+ cpuinfo_t *dummy __attribute__((__unused__)) = NULL; -+ -+ for_each_present_cpu(cpu) { -+ cpuinfo_arch_t *ci = &cpu_data(cpu); -+ struct cpuinfo *cpui = per_cpu_info(cpu); -+ struct cpu_core *cpuc = per_cpu_core(cpu); -+ -+ cpui->cpu_id = cpu; -+ cpui->cpu_pset = 0; -+ cpui->cpu_chip = dtrace_cpuinfo_chip(ci); -+ cpui->cpu_lgrp = 0; -+ cpui->cpu_info = ci; -+ -+ cpuc->cpuc_dtrace_flags = 0; -+ cpuc->cpuc_dcpc_intr_state = 0; -+ cpuc->cpuc_dtrace_illval = 0; -+ mutex_init(&cpuc->cpuc_pid_lock); -+ -+ cpuc->cpu_dtrace_regs = NULL; -+ cpuc->cpu_dtrace_caller = 0; -+ rwlock_init(&cpuc->cpu_ft_lock); -+ -+ cpuc->cpuc_current_probe = DTRACE_IDNONE; -+ } -+} -diff --git a/kernel/dtrace/dtrace_os.c b/kernel/dtrace/dtrace_os.c -new file mode 100644 -index 000000000000..177b93e3177e ---- /dev/null -+++ b/kernel/dtrace/dtrace_os.c -@@ -0,0 +1,332 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_os.c -+ * DESCRIPTION: DTrace - OS support functions -+ * -+ * Copyright (c) 2010, 2018, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/binfmts.h> -+#include <linux/dtrace_cpu.h> -+#include <linux/dtrace_os.h> -+#include <linux/fs.h> -+#include <linux/hardirq.h> -+#include <linux/interrupt.h> -+#include <linux/kdebug.h> -+#include <linux/module.h> -+#include <linux/moduleloader.h> -+#include <linux/sched.h> -+#include <linux/slab.h> -+#include <linux/stacktrace.h> -+#include <linux/timekeeping.h> -+#include <linux/vmalloc.h> -+#include <linux/kallsyms.h> -+#include <linux/uaccess.h> -+#include <linux/workqueue.h> -+#include <asm/ptrace.h> -+#include <linux/init_task.h> -+#include <linux/sched/mm.h> -+#include <linux/shmem_fs.h> -+#include <linux/dtrace_task_impl.h> -+ -+/* -+ * OS SPECIFIC DTRACE SETUP -+ */ -+ -+/* -+ * DTrace pseudo module that represents vmlinux (the kernel itself). -+ * Since we populate its sdt data members only once, it can be marked -+ * as RO after init. -+ */ -+struct module *dtrace_kmod __ro_after_init = NULL; -+EXPORT_SYMBOL(dtrace_kmod); -+ -+int dtrace_ustackdepth_max = 2048; -+ -+struct kmem_cache *dtrace_pdata_cachep = NULL; -+ -+void __init dtrace_os_init(void) -+{ -+ /* -+ * Setup for module handling. -+ */ -+ dtrace_pdata_cachep = kmem_cache_create("dtrace_pdata_cache", -+ sizeof(struct dtrace_module), 0, -+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); -+ if (dtrace_pdata_cachep == NULL) -+ pr_debug("Can't allocate kmem cache for pdata\n"); -+ -+ /* -+ * We need to set up a psinfo structure for PID 0 (swapper). -+ */ -+ dtrace_task_os_init(); -+ dtrace_psinfo_os_init(); -+ dtrace_task_init(&init_task); -+ dtrace_psinfo_alloc(&init_task); -+} -+ -+/* -+ * MODULE SUPPORT FUNCTIONS -+ */ -+extern struct list_head *dtrace_modules; -+ -+/* -+ * Iterate over all loaded kernel modules. This is required until the linux -+ * kernel receives its own module iterator. -+ */ -+void dtrace_for_each_module(for_each_module_fn func, void *arg) -+{ -+ struct module *mp; -+ -+ if (func == NULL) -+ return; -+ -+ /* The dtrace fake module is not in the list. */ -+ func(arg, dtrace_kmod); -+ -+ list_for_each_entry(mp, dtrace_modules, list) { -+ -+#ifdef MODULES_VADDR -+ if ((uintptr_t)mp < MODULES_VADDR || -+ (uintptr_t)mp >= MODULES_END) -+ continue; -+#else -+ if ((uintptr_t)mp < VMALLOC_START || -+ (uintptr_t)mp >= VMALLOC_END) -+ continue; -+#endif -+ -+ func(arg, mp); -+ } -+} -+EXPORT_SYMBOL_GPL(dtrace_for_each_module); -+ -+ -+void dtrace_mod_pdata_alloc(struct module *mp) -+{ -+ struct dtrace_module *pdata; -+ -+ pdata = kmem_cache_alloc(dtrace_pdata_cachep, GFP_KERNEL | __GFP_ZERO); -+ if (pdata == NULL) { -+ mp->pdata = NULL; -+ return; -+ } -+ -+ dtrace_mod_pdata_init(pdata); -+ mp->pdata = pdata; -+} -+ -+void dtrace_mod_pdata_free(struct module *mp) -+{ -+ struct dtrace_module *pdata = mp->pdata; -+ -+ if (mp->pdata == NULL) -+ return; -+ -+ mp->pdata = NULL; -+ dtrace_mod_pdata_cleanup(pdata); -+ kmem_cache_free(dtrace_pdata_cachep, pdata); -+} -+ -+/* -+ * This function is called with module_mutex held. -+ */ -+int dtrace_destroy_prov(struct module *mp) -+{ -+ struct dtrace_module *pdata = mp->pdata; -+ -+ if (pdata != NULL && pdata->prov_exit != NULL) -+ return pdata->prov_exit(); -+ -+ return 1; -+} -+ -+/*---------------------------------------------------------------------------*\ -+(* TIME SUPPORT FUNCTIONS *) -+\*---------------------------------------------------------------------------*/ -+enum dtrace_vtime_state dtrace_vtime_active = 0; -+ -+/* -+ * Until Linux kernel gains lock-free realtime clock access we are maintaining -+ * our own version for lock-free access from within a probe context. -+ */ -+static struct dtrace_time_fast { -+ seqcount_t dtwf_seq; -+ ktime_t dtwf_offsreal[2]; -+} dtrace_time ____cacheline_aligned; -+ -+/* -+ * Callback from timekeeper code that allows dtrace to update its own time data. -+ */ -+void dtrace_update_time(struct timekeeper *tk) -+{ -+ raw_write_seqcount_latch(&dtrace_time.dtwf_seq); -+ dtrace_time.dtwf_offsreal[0] = tk->offs_real; -+ raw_write_seqcount_latch(&dtrace_time.dtwf_seq); -+ dtrace_time.dtwf_offsreal[1] = tk->offs_real; -+} -+ -+/* Lock free walltime */ -+ktime_t dtrace_get_walltime(void) -+{ -+ u64 nsec = ktime_get_mono_fast_ns(); -+ unsigned int seq; -+ ktime_t offset; -+ -+ do { -+ seq = raw_read_seqcount_latch(&dtrace_time.dtwf_seq); -+ offset = dtrace_time.dtwf_offsreal[seq & 0x1]; -+ } while (read_seqcount_retry(&dtrace_time.dtwf_seq, seq)); -+ -+ return ktime_add_ns(offset, nsec); -+} -+EXPORT_SYMBOL(dtrace_get_walltime); -+ -+ktime_t dtrace_gethrtime(void) -+{ -+ return ns_to_ktime(ktime_get_raw_fast_ns()); -+} -+EXPORT_SYMBOL(dtrace_gethrtime); -+ -+/* Needed for lockstat probes where we cannot include ktime.h */ -+u64 dtrace_gethrtime_ns(void) -+{ -+ return ktime_get_raw_fast_ns(); -+} -+EXPORT_SYMBOL(dtrace_gethrtime_ns); -+ -+void dtrace_vtime_enable(void) -+{ -+ enum dtrace_vtime_state old, new; -+ -+ do { -+ old = dtrace_vtime_active; -+ if (old == DTRACE_VTIME_ACTIVE) { -+ pr_warn_once("DTrace virtual time already enabled"); -+ return; -+ } -+ -+ new = DTRACE_VTIME_ACTIVE; -+ } while (cmpxchg(&dtrace_vtime_active, old, new) != old); -+} -+EXPORT_SYMBOL(dtrace_vtime_enable); -+ -+void dtrace_vtime_disable(void) -+{ -+ int old, new; -+ -+ do { -+ old = dtrace_vtime_active; -+ if (old == DTRACE_VTIME_INACTIVE) { -+ pr_warn_once("DTrace virtual time already disabled"); -+ return; -+ } -+ -+ new = DTRACE_VTIME_INACTIVE; -+ } while (cmpxchg(&dtrace_vtime_active, old, new) != old); -+} -+EXPORT_SYMBOL(dtrace_vtime_disable); -+ -+void dtrace_vtime_switch(struct task_struct *prev, struct task_struct *next) -+{ -+ struct dtrace_task *dprev = prev->dt_task; -+ struct dtrace_task *dnext = next->dt_task; -+ ktime_t now = dtrace_gethrtime(); -+ -+ if (dprev != NULL && ktime_nz(dprev->dt_start)) { -+ dprev->dt_vtime = ktime_add(dprev->dt_vtime, -+ ktime_sub(now, -+ dprev->dt_start)); -+ dprev->dt_start = ktime_set(0, 0); -+ } -+ -+ if (dnext != NULL) -+ dnext->dt_start = now; -+} -+ -+void dtrace_stacktrace(struct stacktrace_state *st) -+{ -+ int i; -+ -+ if ((st->flags & STACKTRACE_TYPE) == STACKTRACE_USER) { -+ dtrace_user_stacktrace(st); -+ return; -+ } -+ -+ if (st->pcs == NULL) { -+ st->depth = 0; -+ return; -+ } -+ -+ st->depth = stack_trace_save((long unsigned int *) st->pcs, -+ st->limit ? st->limit : 512, st->depth); -+ -+ /* -+ * For entirely unknown reasons, the save_stack_trace() implementation -+ * on x86_64 adds a ULONG_MAX entry after the last stack trace entry. -+ * This might be a sentinel value, but given that struct stack_trace -+ * already contains a nr_entries counter, this seems rather pointless. -+ * Alas, we need to add a special case for that... And to make matters -+ * worse, it actually does this only when there is room for it (i.e. -+ * when nr_entries < max_entries). -+ * Since ULONG_MAX is never a valid PC, we can just check for that. -+ */ -+#if defined(CONFIG_X86_64) || defined(CONFIG_ARM64) -+ if (st->depth && st->pcs[st->depth - 1] == ULONG_MAX) -+ st->depth--; -+#endif -+ -+ if (st->fps != NULL) { -+ for (i = 0; i < st->limit; i++) -+ st->fps[i] = 0; -+ } -+} -+EXPORT_SYMBOL(dtrace_stacktrace); -+ -+/* -+ * INVALID OPCODE AND PAGE FAULT HANDLING -+ */ -+static struct notifier_block dtrace_die = { -+ .notifier_call = dtrace_die_notifier, -+ .priority = 0x7fffffff -+}; -+ -+static int dtrace_enabled; -+ -+/* -+ * DTrace enable/disable must be called with dtrace_lock being held. It is not -+ * possible to check for safety here with an ASSERT as the lock itself is in the -+ * DTrace Framework kernel module. -+ */ -+int dtrace_enable(void) -+{ -+ if (dtrace_enabled) -+ return 0; -+ -+ if (register_die_notifier(&dtrace_die) != 0) -+ return 1; -+ -+ dtrace_enabled = 1; -+ return 0; -+} -+EXPORT_SYMBOL(dtrace_enable); -+ -+void dtrace_disable(void) -+{ -+ if (!dtrace_enabled) -+ return; -+ -+ unregister_die_notifier(&dtrace_die); -+ dtrace_enabled = 0; -+} -+EXPORT_SYMBOL(dtrace_disable); -diff --git a/kernel/dtrace/dtrace_psinfo.c b/kernel/dtrace/dtrace_psinfo.c -new file mode 100644 -index 000000000000..bb5f6fc2ce63 ---- /dev/null -+++ b/kernel/dtrace/dtrace_psinfo.c -@@ -0,0 +1,212 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_psinfo.c -+ * DESCRIPTION: DTrace - DTrace psinfo implementation -+ * -+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/binfmts.h> -+#include <linux/dtrace_psinfo.h> -+#include <linux/dtrace_task_impl.h> -+#include <linux/mm.h> -+#include <linux/sched.h> -+#include <linux/sched/mm.h> -+#include <linux/slab.h> -+#include <linux/uaccess.h> -+ -+struct kmem_cache *dtrace_psinfo_cachep; -+ -+/* -+ * Free the psinfo_t structure. -+ */ -+void dtrace_psinfo_free(struct dtrace_psinfo *psinfo) -+{ -+ kfree(psinfo->dtps_argv); -+ kfree(psinfo->dtps_envp); -+ kmem_cache_free(dtrace_psinfo_cachep, psinfo); -+} -+ -+/* -+ * Allocate a new dtrace_psinfo_t structure. -+ */ -+void dtrace_psinfo_alloc(struct task_struct *tsk) -+{ -+ struct dtrace_psinfo *psinfo; -+ struct mm_struct *mm = NULL; -+ -+ if (unlikely(tsk->dt_task == NULL)) -+ return; -+ -+ if (likely(tsk->dt_task->dt_psinfo != NULL)) { -+ struct dtrace_psinfo *tmp = tsk->dt_task->dt_psinfo; -+ tsk->dt_task->dt_psinfo = NULL; -+ -+ dtrace_psinfo_put(tmp); -+ } -+ -+ psinfo = kmem_cache_alloc(dtrace_psinfo_cachep, GFP_KERNEL); -+ if (psinfo == NULL) -+ goto fail; -+ -+ mm = get_task_mm(tsk); -+ if (mm) { -+ size_t len = mm->arg_end - mm->arg_start; -+ int i = 0; -+ char *p; -+ -+ /* -+ * Construct the psargs string. -+ */ -+ if (len > 0) { -+ if (len >= PR_PSARGS_SZ) -+ len = PR_PSARGS_SZ - 1; -+ -+ i = access_process_vm(tsk, mm->arg_start, -+ psinfo->dtps_psargs, len, 0); -+ -+ if (i > 0) { -+ if (i < len) -+ len = i; -+ -+ for (i = 0, --len; i < len; i++) { -+ if (psinfo->dtps_psargs[i] == '\0') -+ psinfo->dtps_psargs[i] = ' '; -+ } -+ } -+ } -+ -+ if (i < 0) -+ i = 0; -+ -+ while (i < PR_PSARGS_SZ) -+ psinfo->dtps_psargs[i++] = 0; -+ -+ /* -+ * Determine the number of arguments. -+ */ -+ psinfo->dtps_argc = 0; -+ for (p = (char *)mm->arg_start; p < (char *)mm->arg_end; -+ psinfo->dtps_argc++) { -+ size_t l = strnlen_user(p, MAX_ARG_STRLEN); -+ -+ if (!l) -+ break; -+ -+ p += l + 1; -+ } -+ -+ /* -+ * Limit the number of stored argument pointers. -+ */ -+ len = psinfo->dtps_argc; -+ if (len >= PR_ARGV_SZ) -+ len = PR_ARGV_SZ - 1; -+ -+ psinfo->dtps_argv = kmalloc((len + 1) * sizeof(char *), -+ GFP_KERNEL); -+ if (psinfo->dtps_argv == NULL) -+ goto fail; -+ -+ /* -+ * Now populate the array of argument strings. -+ */ -+ for (i = 0, p = (char *)mm->arg_start; i < len; i++) { -+ psinfo->dtps_argv[i] = p; -+ p += strnlen_user(p, MAX_ARG_STRLEN) + 1; -+ } -+ psinfo->dtps_argv[len] = NULL; -+ -+ /* -+ * Determine the number of environment variables. -+ */ -+ psinfo->dtps_envc = 0; -+ for (p = (char *)mm->env_start; p < (char *)mm->env_end; -+ psinfo->dtps_envc++) { -+ size_t l = strnlen_user(p, MAX_ARG_STRLEN); -+ -+ if (!l) -+ break; -+ -+ p += l + 1; -+ } -+ -+ /* -+ * Limit the number of stored environment pointers. -+ */ -+ len = psinfo->dtps_envc; -+ if (len >= PR_ENVP_SZ) -+ len = PR_ENVP_SZ - 1; -+ -+ psinfo->dtps_envp = kmalloc((len + 1) * sizeof(char *), -+ GFP_KERNEL); -+ if (psinfo->dtps_envp == NULL) -+ goto fail; -+ -+ /* -+ * Now populate the array of environment variable strings. -+ */ -+ for (i = 0, p = (char *)mm->env_start; i < len; i++) { -+ psinfo->dtps_envp[i] = p; -+ p += strnlen_user(p, MAX_ARG_STRLEN) + 1; -+ } -+ psinfo->dtps_envp[len] = NULL; -+ -+ mmput(mm); -+ } else { -+ size_t len = min(TASK_COMM_LEN, PR_PSARGS_SZ); -+ int i; -+ -+ /* -+ * We end up here for tasks that do not have managed memory at -+ * all, which generally means that this is a kernel thread. -+ * If it is not, this is still safe because we know that tasks -+ * always have the comm member populated with something (even -+ * if it would be an empty string). -+ */ -+ memcpy(psinfo->dtps_psargs, tsk->comm, len); -+ for (i = len; i < PR_PSARGS_SZ; i++) -+ psinfo->dtps_psargs[i] = 0; -+ -+ psinfo->dtps_argc = 0; -+ psinfo->dtps_argv = kmalloc(sizeof(char *), GFP_KERNEL); -+ psinfo->dtps_argv[0] = NULL; -+ psinfo->dtps_envc = 0; -+ psinfo->dtps_envp = kmalloc(sizeof(char *), GFP_KERNEL); -+ psinfo->dtps_envp[0] = NULL; -+ } -+ -+ atomic_set(&psinfo->dtps_usage, 1); -+ tsk->dt_task->dt_psinfo = psinfo; /* new one */ -+ -+ return; -+ -+fail: -+ if (mm) -+ mmput(mm); -+ -+ if (psinfo) -+ dtrace_psinfo_free(psinfo); -+} -+ -+/* -+ * Initialize DTrace's psinfo subsystem. -+ */ -+void __init dtrace_psinfo_os_init(void) -+{ -+ dtrace_psinfo_cachep = kmem_cache_create("dtrace_psinfo_cache", -+ sizeof(struct dtrace_psinfo), 0, -+ SLAB_HWCACHE_ALIGN | SLAB_PANIC, -+ NULL); -+ -+} -diff --git a/kernel/dtrace/dtrace_task.c b/kernel/dtrace/dtrace_task.c -new file mode 100644 -index 000000000000..02bcc6b7e0a2 ---- /dev/null -+++ b/kernel/dtrace/dtrace_task.c -@@ -0,0 +1,237 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_task.c -+ * DESCRIPTION: DTrace - per-task data -+ * -+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/dtrace_task_impl.h> -+#include <linux/sched/mm.h> -+#include <linux/slab.h> -+ -+struct kmem_cache *dtrace_task_cachep; -+ -+void (*dtrace_helpers_fork)(struct task_struct *, struct task_struct *); -+EXPORT_SYMBOL(dtrace_helpers_fork); -+ -+/* -+ * Reset per-task sate to default values. Modifies only part of -+ * the state that does not persist across process forks. -+ */ -+static void dtrace_task_reinit(struct dtrace_task *dtsk) -+{ -+ dtsk->dt_predcache = 0; -+ dtsk->dt_stop = 0; -+ dtsk->dt_sig = 0; -+ -+ dtsk->dt_helpers = NULL; -+ dtsk->dt_probes = 0; -+ dtsk->dt_tp_count = 0; -+} -+ -+/* -+ * Allocate new per-task structure and initialize it with default -+ * values. -+ */ -+static struct dtrace_task *dtrace_task_alloc(void) -+{ -+ struct dtrace_task *dtsk; -+ -+ /* Try to allocate new task. */ -+ dtsk = kmem_cache_alloc(dtrace_task_cachep, GFP_KERNEL); -+ if (dtsk == NULL) -+ return NULL; -+ -+ /* Initialize new task. */ -+ dtrace_task_reinit(dtsk); -+ -+ dtsk->dt_vtime = ktime_set(0, 0); -+ dtsk->dt_start = ktime_set(0, 0); -+ dtsk->dt_psinfo = NULL; -+ dtsk->dt_ustack = NULL; -+ -+ return dtsk; -+} -+ -+/* -+ * Cleans all attached resources to the per-task structure so it is ready to be -+ * reused or freed. -+ */ -+static void dtrace_task_cleanup(struct task_struct *tsk) -+{ -+ struct dtrace_psinfo *psinfo; -+ -+ /* Nothing to remove. */ -+ if (tsk->dt_task == NULL) -+ return; -+ -+ /* Release psinfo if any. */ -+ psinfo = tsk->dt_task->dt_psinfo; -+ if (psinfo != NULL) { -+ tsk->dt_task->dt_psinfo = NULL; -+ dtrace_psinfo_put(psinfo); -+ } -+} -+ -+/* -+ * Kernel hooks for per-task events. -+ */ -+ -+/* -+ * Called when a new task has been created. -+ * -+ * It tries to allocate new per-task data strcture and initialize -+ * it with default values. -+ */ -+void dtrace_task_init(struct task_struct *tsk) -+{ -+ struct mm_struct *mm = NULL; -+ -+ /* Initialize new task structure */ -+ tsk->dt_task = dtrace_task_alloc(); -+ if (tsk->dt_task == NULL) -+ return; -+ -+ /* Try to setup initial userspace stack. */ -+ mm = get_task_mm(tsk); -+ if (mm) { -+ tsk->dt_task->dt_ustack = (void *)mm->start_stack; -+ mmput(mm); -+ } -+} -+ -+/* -+ * Called when a task has been duplicated. -+ * -+ * When a task is duplicated this is called early to provide new instance -+ * of per-task data. This hook is called very early after a dup has been -+ * performed. The new task shares almost everything with its parent and -+ * locking performed must be aligned with locking of the kernel. -+ * -+ * DTrace resets new task to its default values. -+ */ -+void dtrace_task_dup(struct task_struct *src, struct task_struct *dst) -+{ -+ struct dtrace_psinfo *psinfo; -+ struct dtrace_task *dtsk; -+ -+ /* Nothing to clone. */ -+ if (src->dt_task == NULL) -+ return; -+ -+ /* Allocate and reinitialize new task. */ -+ dtsk = dtrace_task_alloc(); -+ if (dtsk == NULL) { -+ dst->dt_task = NULL; -+ return; -+ } -+ dtrace_task_reinit(dtsk); -+ -+ /* Share psinfo if it is available. */ -+ psinfo = src->dt_task->dt_psinfo; -+ if (psinfo != NULL) { -+ dtrace_psinfo_get(psinfo); -+ dtsk->dt_psinfo = psinfo; -+ } -+ -+ /* Copy remaining attributes of the source task. */ -+ dtsk->dt_ustack = src->dt_task->dt_ustack; -+ dst->dt_task = dtsk; -+} -+ -+/* -+ * Called when a process has been copied. -+ * -+ * If the original task has helpers attached fork them too. -+ */ -+void dtrace_task_copy(struct task_struct *tsk, struct task_struct *child) -+{ -+ if (tsk->dt_task == NULL) -+ return; -+ -+ if (child->dt_task == NULL) -+ return; -+ -+ /* Handle helpers for this task. */ -+ if (likely(dtrace_helpers_fork == NULL)) -+ return; -+ -+ if (tsk->dt_task->dt_helpers != NULL) -+ (*dtrace_helpers_fork)(tsk, child); -+} -+ -+/* -+ * Called when a task has performed exec. -+ * -+ * If DTrace's per-task structure is already allocated it is reused for -+ * the new task. If it is not present an allocation attempt is made. -+ */ -+void dtrace_task_exec(struct task_struct *tsk) -+{ -+ struct mm_struct *mm = NULL; -+ -+ /* Try to reuse existing dtrace task. */ -+ if (tsk->dt_task != NULL) { -+ dtrace_task_cleanup(tsk); -+ dtrace_task_reinit(tsk->dt_task); -+ -+ /* Try to set up initial userspace stack. */ -+ mm = get_task_mm(tsk); -+ if (mm) { -+ tsk->dt_task->dt_ustack = (void *)mm->start_stack; -+ mmput(mm); -+ } -+ } else { -+ dtrace_task_init(tsk); -+ -+ /* No luck, we won't be able to trace this task. */ -+ if (tsk->dt_task == NULL) -+ return; -+ } -+ -+ /* Finalize init of the per-task structure. */ -+ dtrace_psinfo_alloc(tsk); -+} -+ -+/* -+ * Called when a task is about to be released. -+ * -+ * The DTrace's per-task data are disconnected and freed. -+ */ -+void dtrace_task_free(struct task_struct *tsk) -+{ -+ struct dtrace_task *dtsk = tsk->dt_task; -+ -+ /* Nothing to do. */ -+ if (dtsk == NULL) -+ return; -+ -+ /* Release the per-task data. */ -+ dtrace_task_cleanup(tsk); -+ tsk->dt_task = NULL; -+ kmem_cache_free(dtrace_task_cachep, dtsk); -+} -+ -+/* -+ * Initialize DTrace's task subsystem. -+ */ -+void __init dtrace_task_os_init(void) -+{ -+ /* Will panic if not initialized so no need to check for errors. */ -+ dtrace_task_cachep = kmem_cache_create("dtrace_task_cache", -+ sizeof(struct dtrace_task), 0, -+ SLAB_HWCACHE_ALIGN | SLAB_PANIC, -+ NULL); -+} -+ -diff --git a/kernel/exit.c b/kernel/exit.c -index 07ff3139edf2..145e1fbe63c9 100644 ---- a/kernel/exit.c -+++ b/kernel/exit.c -@@ -63,6 +63,7 @@ - #include <linux/random.h> - #include <linux/rcuwait.h> - #include <linux/compat.h> -+#include <linux/dtrace_os.h> - - #include <linux/uaccess.h> - #include <asm/unistd.h> -@@ -785,6 +786,9 @@ void __noreturn do_exit(long code) - tsk->exit_code = code; - taskstats_exit(tsk, group_dead); - -+ /* Remove DTrace state for this task */ -+ dtrace_task_free(tsk); -+ - exit_mm(); - - if (group_dead) -diff --git a/kernel/fork.c b/kernel/fork.c -index c9ba2b7bfef9..d6e085e5b6d7 100644 ---- a/kernel/fork.c -+++ b/kernel/fork.c -@@ -94,6 +94,7 @@ - #include <linux/thread_info.h> - #include <linux/stackleak.h> - #include <linux/kasan.h> -+#include <linux/dtrace_task_impl.h> - - #include <asm/pgtable.h> - #include <asm/pgalloc.h> -@@ -454,6 +455,8 @@ void put_task_stack(struct task_struct *tsk) - - void free_task(struct task_struct *tsk) - { -+ dtrace_task_free(tsk); -+ - #ifndef CONFIG_THREAD_INFO_IN_TASK - /* - * The task is finally done with both the stack and thread_info, -@@ -947,6 +950,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) - #ifdef CONFIG_MEMCG - tsk->active_memcg = NULL; - #endif -+ -+ dtrace_task_dup(orig, tsk); - return tsk; - - free_stack: -@@ -2287,6 +2292,25 @@ static __latent_entropy struct task_struct *copy_process( - syscall_tracepoint_update(p); - write_unlock_irq(&tasklist_lock); - -+#ifdef CONFIG_DTRACE -+ /* -+ * We make this call fairly late into the copy_process() handling, -+ * because we need to ensure that we can look up this task based on -+ * its pid using find_task_by_vpid(). We also must ensure that the -+ * tasklist_lock has been released. -+ */ -+ dtrace_task_copy(current, p); -+ -+ /* -+ * If we're called with stack_start != 0, this is almost certainly a -+ * thread being created in current. Make sure it gets its own psinfo -+ * data, because we need to record a new bottom of stack value. -+ */ -+ if (p->mm && args->stack) -+ if (p->dt_task != NULL) -+ p->dt_task->dt_ustack = (void *)args->stack; -+#endif -+ - proc_fork_connector(p); - cgroup_post_fork(p); - cgroup_threadgroup_change_end(current); -diff --git a/kernel/module.c b/kernel/module.c -index 70affc757bbc..b59b513f712c 100644 ---- a/kernel/module.c -+++ b/kernel/module.c -@@ -42,6 +42,7 @@ - #include <asm/mmu_context.h> - #include <linux/license.h> - #include <asm/sections.h> -+#include <linux/dtrace_os.h> - #include <linux/tracepoint.h> - #include <linux/ftrace.h> - #include <linux/livepatch.h> -@@ -86,6 +87,9 @@ - DEFINE_MUTEX(module_mutex); - EXPORT_SYMBOL_GPL(module_mutex); - static LIST_HEAD(modules); -+#ifdef CONFIG_DTRACE -+struct list_head *dtrace_modules = &modules; -+#endif /* CONFIG_DTRACE */ - - /* Work queue for freeing init sections in success case */ - static struct work_struct init_free_wq; -@@ -1016,6 +1020,12 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, - } - } - -+ /* Try destroying DTrace provider. */ -+ if (!dtrace_destroy_prov(mod)) { -+ ret = -EBUSY; -+ goto out; -+ } -+ - /* Stop the machine so refcounts can't move and disable module. */ - ret = try_stop_module(mod, flags, &forced); - if (ret != 0) -@@ -2146,6 +2156,7 @@ void __weak module_arch_freeing_init(struct module *mod) - /* Free a module, remove from lists, etc. */ - static void free_module(struct module *mod) - { -+ dtrace_mod_pdata_free(mod); - trace_module_free(mod); - - mod_sysfs_teardown(mod); -@@ -3849,6 +3860,9 @@ static int load_module(struct load_info *info, const char __user *uargs, - /* Ftrace init must be called in the MODULE_STATE_UNFORMED state */ - ftrace_module_init(mod); - -+ /* Allocate DTrace per-module data. */ -+ dtrace_mod_pdata_alloc(mod); -+ - /* Finally it's fully formed, ready to start executing. */ - err = complete_formation(mod, info); - if (err) -diff --git a/kernel/sched/core.c b/kernel/sched/core.c -index 3dd675697301..4c2014cc77cd 100644 ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -9,6 +9,7 @@ - #include "sched.h" - - #include <linux/nospec.h> -+#include <linux/dtrace_os.h> - - #include <linux/kcov.h> - -@@ -3196,6 +3197,11 @@ static struct rq *finish_task_switch(struct task_struct *prev) - - rq->prev_mm = NULL; - -+#ifdef CONFIG_DTRACE -+ if (dtrace_vtime_active) -+ dtrace_vtime_switch(prev, current); -+#endif -+ - /* - * A task struct has one reference for the use as "current". - * If a task dies, then it sets TASK_DEAD in tsk->state and calls -@@ -6686,6 +6692,10 @@ void __init sched_init(void) - #endif /* CONFIG_SMP */ - hrtick_rq_init(rq); - atomic_set(&rq->nr_iowait, 0); -+ -+#ifdef CONFIG_DTRACE -+ rq->dtrace_cpu_info = per_cpu_info(i); -+#endif - } - - set_load_weight(&init_task, false); -diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h -index b056149c228b..ae014c47d711 100644 ---- a/kernel/sched/sched.h -+++ b/kernel/sched/sched.h -@@ -327,6 +327,7 @@ extern bool dl_cpu_busy(unsigned int cpu); - #ifdef CONFIG_CGROUP_SCHED - - #include <linux/cgroup.h> -+#include <linux/dtrace_cpu.h> - #include <linux/psi.h> - - struct cfs_rq; -@@ -1005,6 +1006,9 @@ struct rq { - /* Must be inspected within a rcu lock section */ - struct cpuidle_state *idle_state; - #endif -+#ifdef CONFIG_DTRACE -+ struct cpuinfo *dtrace_cpu_info; -+#endif - }; - - #ifdef CONFIG_FAIR_GROUP_SCHED -diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c -index ca69290bee2a..44181877a96d 100644 ---- a/kernel/time/timekeeping.c -+++ b/kernel/time/timekeeping.c -@@ -22,6 +22,7 @@ - #include <linux/pvclock_gtod.h> - #include <linux/compiler.h> - #include <linux/audit.h> -+#include <linux/dtrace_os.h> - - #include "tick-internal.h" - #include "ntp_internal.h" -@@ -669,6 +670,7 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action) - tk_update_ktime_data(tk); - - update_vsyscall(tk); -+ dtrace_update_time(tk); - update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET); - - tk->tkr_mono.base_real = tk->tkr_mono.base + tk->offs_real; -diff --git a/scripts/coccinelle/dtrace/enum-elision.cocci b/scripts/coccinelle/dtrace/enum-elision.cocci -new file mode 100644 -index 000000000000..77a5b33bd166 ---- /dev/null -+++ b/scripts/coccinelle/dtrace/enum-elision.cocci -@@ -0,0 +1,29 @@ -+/// Reduce uses of typedefs of named enums to the name of the enum -+ -+virtual patch -+virtual context -+virtual org -+virtual report -+ -+@td@ -+type T; -+identifier E; -+@@ -+- typedef enum E -++ enum E -+ { ... -+- } T; -++ }; -+@@ -+type td.T; -+identifier td.E; -+@@ -+- T -++ enum E -+ -+@@ -+type td.T; -+identifier td.E; -+@@ -+- const T -++ const enum E -diff --git a/scripts/coccinelle/dtrace/typedef-elision.cocci b/scripts/coccinelle/dtrace/typedef-elision.cocci -new file mode 100644 -index 000000000000..bc4caf375b60 ---- /dev/null -+++ b/scripts/coccinelle/dtrace/typedef-elision.cocci -@@ -0,0 +1,83 @@ -+/// Reduce uses of typedefs of named structures to the name of the structure -+ -+virtual patch -+virtual context -+virtual org -+virtual report -+ -+@td@ -+type T; -+identifier S; -+@@ -+( -+- typedef struct S -++ struct S -+ { ... -+- } T; -++ }; -+| -+ struct S; -+- typedef struct S T; -+) -+@@ -+type td.T; -+identifier td.S; -+@@ -+- T -++ struct S -+ -+@@ -+type td.T; -+identifier td.S; -+@@ -+- const T -++ const struct S -+ -+/// Now structures declared with typedefs of opaque structs, one by one -+@@ -+typedef dtrace_ecb_t; -+@@ -+- dtrace_ecb_t -++ struct dtrace_ecb -+ -+@@ -+typedef dtrace_actdesc_t; -+@@ -+- dtrace_actdesc_t -++ struct dtrace_actdesc -+ -+@@ -+typedef dtrace_state_t; -+@@ -+- dtrace_state_t -++ struct dtrace_state -+ -+@@ -+typedef dtrace_vstate_t; -+@@ -+- dtrace_vstate_t -++ struct dtrace_vstate -+ -+@@ -+typedef dtrace_mstate_t; -+@@ -+- dtrace_mstate_t -++ struct dtrace_mstate -+ -+@@ -+typedef dtrace_task_t; -+@@ -+- dtrace_task_t -++ struct dtrace_task -+ -+@@ -+typedef dtrace_psinfo_t; -+@@ -+- dtrace_psinfo_t -++ struct dtrace_psinfo -+ -+@@ -+typedef dt_fbt_bl_entry_t; -+@@ -+- dt_fbt_bl_entry_t -++ struct dt_fbt_bl_entry -diff --git a/scripts/package/mkspec b/scripts/package/mkspec -index 63511c885a37..902cb09ae46c 100755 ---- a/scripts/package/mkspec -+++ b/scripts/package/mkspec -@@ -150,6 +150,7 @@ $M %exclude /lib/modules/$KERNELRELEASE/source - %files headers - %defattr (-, root, root) - /usr/include -+ %exclude /usr/include/linux/dtrace - $S$M - $S$M %files devel - $S$M %defattr (-, root, root) --- -2.27.0 - diff --git a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0005-dtrace-modular-components-and-x86-support.patch b/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0005-dtrace-modular-components-and-x86-support.patch deleted file mode 100644 index 5890f96e9586..000000000000 --- a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0005-dtrace-modular-components-and-x86-support.patch +++ /dev/null @@ -1,18393 +0,0 @@ -From 157815093bf4ffa662a6bfc9d949b59861ff145e Mon Sep 17 00:00:00 2001 -From: Kris Van Hees <kris.van.hees@oracle.com> -Date: Mon, 19 Nov 2018 17:52:54 +0000 -Subject: [PATCH 05/20] dtrace: modular components and x86 support - -This implements the core DTrace module (including the entire DIF -interpreter and support for all built-in D variables and functions) and -one test provider, dt_test.ko. It uses the machinery added in the last -few commits. An x86 implementation of the architecture-dependent parts -is also added so that one platform at least can compile it. - -At this stage, almost no probes will exist: they are added by the -following commits, that add providers and SDT probes. - -Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com> -Signed-off-by: Nick Alcock <nick.alcock@oracle.com> -Signed-off-by: Tomas Jedlicka <tomas.jedlicka@oracle.com> -Signed-off-by: Eugene Loh <eugene.loh@oracle.com> -Signed-off-by: David Mc Lean <david.mclean@oracle.com> -Signed-off-by: Vincent Lim <vincent.lim@oracle.com> ---- - Makefile | 1 + - arch/x86/dtrace/Makefile.arch | 11 + - arch/x86/dtrace/dtrace_asm_x86_64.S | 228 ++ - arch/x86/dtrace/dtrace_isa_x86_64.c | 228 ++ - dtrace/Makefile | 19 + - dtrace/ctf_api.h | 33 + - dtrace/dt_test.h | 30 + - dtrace/dt_test_dev.c | 176 + - dtrace/dt_test_mod.c | 52 + - dtrace/dtrace.h | 35 + - dtrace/dtrace_actdesc.c | 91 + - dtrace/dtrace_anon.c | 144 + - dtrace/dtrace_buffer.c | 490 +++ - dtrace/dtrace_debug.h | 118 + - dtrace/dtrace_dev.c | 1599 +++++++++ - dtrace/dtrace_dev.h | 35 + - dtrace/dtrace_dif.c | 4905 +++++++++++++++++++++++++++ - dtrace/dtrace_dof.c | 2504 ++++++++++++++ - dtrace/dtrace_ecb.c | 936 +++++ - dtrace/dtrace_enable.c | 449 +++ - dtrace/dtrace_fmt.c | 104 + - dtrace/dtrace_hash.c | 266 ++ - dtrace/dtrace_isa.c | 361 ++ - dtrace/dtrace_match.c | 364 ++ - dtrace/dtrace_mod.c | 45 + - dtrace/dtrace_predicate.c | 80 + - dtrace/dtrace_priv.c | 120 + - dtrace/dtrace_probe.c | 1542 +++++++++ - dtrace/dtrace_probe_ctx.c | 659 ++++ - dtrace/dtrace_ptofapi.c | 649 ++++ - dtrace/dtrace_spec.c | 434 +++ - dtrace/dtrace_state.c | 1108 ++++++ - dtrace/dtrace_util.c | 283 ++ - 33 files changed, 18099 insertions(+) - create mode 100644 arch/x86/dtrace/Makefile.arch - create mode 100644 arch/x86/dtrace/dtrace_asm_x86_64.S - create mode 100644 arch/x86/dtrace/dtrace_isa_x86_64.c - create mode 100644 dtrace/Makefile - create mode 100644 dtrace/ctf_api.h - create mode 100644 dtrace/dt_test.h - create mode 100644 dtrace/dt_test_dev.c - create mode 100644 dtrace/dt_test_mod.c - create mode 100644 dtrace/dtrace.h - create mode 100644 dtrace/dtrace_actdesc.c - create mode 100644 dtrace/dtrace_anon.c - create mode 100644 dtrace/dtrace_buffer.c - create mode 100644 dtrace/dtrace_debug.h - create mode 100644 dtrace/dtrace_dev.c - create mode 100644 dtrace/dtrace_dev.h - create mode 100644 dtrace/dtrace_dif.c - create mode 100644 dtrace/dtrace_dof.c - create mode 100644 dtrace/dtrace_ecb.c - create mode 100644 dtrace/dtrace_enable.c - create mode 100644 dtrace/dtrace_fmt.c - create mode 100644 dtrace/dtrace_hash.c - create mode 100644 dtrace/dtrace_isa.c - create mode 100644 dtrace/dtrace_match.c - create mode 100644 dtrace/dtrace_mod.c - create mode 100644 dtrace/dtrace_predicate.c - create mode 100644 dtrace/dtrace_priv.c - create mode 100644 dtrace/dtrace_probe.c - create mode 100644 dtrace/dtrace_probe_ctx.c - create mode 100644 dtrace/dtrace_ptofapi.c - create mode 100644 dtrace/dtrace_spec.c - create mode 100644 dtrace/dtrace_state.c - create mode 100644 dtrace/dtrace_util.c - -diff --git a/Makefile b/Makefile -index d6b9a7d2c973..dc05d9af9d1b 100644 ---- a/Makefile -+++ b/Makefile -@@ -623,6 +623,7 @@ net-y := net/ - libs-y := lib/ - core-y := usr/ - virt-y := virt/ -+dtrace-y := dtrace/ - endif # KBUILD_EXTMOD - - # The all: target is the default when no target is given on the -diff --git a/arch/x86/dtrace/Makefile.arch b/arch/x86/dtrace/Makefile.arch -new file mode 100644 -index 000000000000..ffb9ef4d1722 ---- /dev/null -+++ b/arch/x86/dtrace/Makefile.arch -@@ -0,0 +1,11 @@ -+# -+# Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. -+# -+ -+DTARCHDIR = ../arch/x86/dtrace -+ -+ccflags-y += -I$(srctree)/arch/x86/dtrace/include -Idtrace -+ -+dtrace-obj += dtrace_asm_x86_64.o dtrace_isa_x86_64.o -+ -+dtrace-y += $(addprefix $(DTARCHDIR)/, $(dtrace-obj)) -diff --git a/arch/x86/dtrace/dtrace_asm_x86_64.S b/arch/x86/dtrace/dtrace_asm_x86_64.S -new file mode 100644 -index 000000000000..46d3fe1b9703 ---- /dev/null -+++ b/arch/x86/dtrace/dtrace_asm_x86_64.S -@@ -0,0 +1,228 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Dynamic Tracing for Linux - x86 specific assembly -+ * -+ * Copyright (c) 2010, 2017, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/linkage.h> -+#include <asm/smap.h> -+ -+#define CPU_DTRACE_BADADDR 0x0004 /* DTrace fault: bad address */ -+ -+#if defined(__x86_64__) -+ SYM_CODE_START(dtrace_caller) -+ movq $-1, %rax -+ ret -+ SYM_CODE_END(dtrace_caller) -+ -+#elif defined(__i386__) -+ -+ SYM_CODE_START(dtrace_caller) -+ movl $-1, %eax -+ ret -+ SYM_CODE_END(dtrace_caller) -+ -+#endif /* __i386__ */ -+ -+#if defined(__x86_64__) -+ -+ SYM_FUNC_START(dtrace_copy) -+ pushq %rbp -+ movq %rsp, %rbp -+ -+ ASM_STAC -+ xchgq %rdi, %rsi # make %rsi source, %rdi dest -+ movq %rdx, %rcx # load count -+ repz # repeat for count ... -+ smovb # move from %ds:rsi to %ed:rdi -+ ASM_CLAC -+ leave -+ ret -+ SYM_FUNC_END(dtrace_copy) -+ -+#elif defined(__i386__) -+ -+ SYM_FUNC_START(dtrace_copy) -+ pushl %ebp -+ movl %esp, %ebp -+ pushl %esi -+ pushl %edi -+ -+ movl 8(%ebp), %esi # Load source address -+ movl 12(%ebp), %edi # Load destination address -+ movl 16(%ebp), %ecx # Load count -+ repz # Repeat for count... -+ smovb # move from %ds:si to %es:di -+ -+ popl %edi -+ popl %esi -+ movl %ebp, %esp -+ popl %ebp -+ ret -+ SYM_FUNC_END(dtrace_copy) -+ -+#endif /* __i386__ */ -+ -+#if defined(__x86_64__) -+ -+ SYM_FUNC_START(dtrace_copystr) -+ pushq %rbp -+ movq %rsp, %rbp -+ -+ ASM_STAC -+0: -+ movb (%rdi), %al # load from source -+ movb %al, (%rsi) # store to destination -+ addq $1, %rdi # increment source pointer -+ addq $1, %rsi # increment destination pointer -+ subq $1, %rdx # decrement remaining count -+ cmpb $0, %al -+ je 2f -+ testq $0xfff, %rdx # test if count is 4k-aligned -+ jnz 1f # if not, continue with copying -+ testq $CPU_DTRACE_BADADDR, (%rcx) # load and test dtrace flags -+ jnz 2f -+1: -+ cmpq $0, %rdx -+ jne 0b -+2: -+ ASM_CLAC -+ leave -+ ret -+ -+ SYM_FUNC_END(dtrace_copystr) -+ -+#elif defined(__i386__) -+ -+ SYM_FUNC_START(dtrace_copystr) -+ -+ pushl %ebp # Setup stack frame -+ movl %esp, %ebp -+ pushl %ebx # Save registers -+ -+ movl 8(%ebp), %ebx # Load source address -+ movl 12(%ebp), %edx # Load destination address -+ movl 16(%ebp), %ecx # Load count -+ -+0: -+ movb (%ebx), %al # Load from source -+ movb %al, (%edx) # Store to destination -+ incl %ebx # Increment source pointer -+ incl %edx # Increment destination pointer -+ decl %ecx # Decrement remaining count -+ cmpb $0, %al -+ je 2f -+ testl $0xfff, %ecx # Check if count is 4k-aligned -+ jnz 1f -+ movl 20(%ebp), %eax # load flags pointer -+ testl $CPU_DTRACE_BADADDR, (%eax) # load and test dtrace flags -+ jnz 2f -+1: -+ cmpl $0, %ecx -+ jne 0b -+ -+2: -+ popl %ebx -+ movl %ebp, %esp -+ popl %ebp -+ ret -+ -+ SYM_FUNC_END(dtrace_copystr) -+ -+#endif /* __i386__ */ -+ -+#if defined(__x86_64__) -+ -+ SYM_CODE_START(dtrace_fuword8_nocheck) -+ xorq %rax, %rax -+ ASM_STAC -+ movb (%rdi), %al -+ ASM_CLAC -+ ret -+ SYM_CODE_END(dtrace_fuword8_nocheck) -+ -+#elif defined(__i386__) -+ -+ SYM_CODE_START(dtrace_fuword8_nocheck) -+ movl 4(%esp), %ecx -+ xorl %eax, %eax -+ movzbl (%ecx), %eax -+ ret -+ SYM_CODE_END(dtrace_fuword8_nocheck) -+ -+#endif /* __i386__ */ -+ -+#if defined(__x86_64__) -+ -+ SYM_CODE_START(dtrace_fuword16_nocheck) -+ xorq %rax, %rax -+ ASM_STAC -+ movw (%rdi), %ax -+ ASM_CLAC -+ ret -+ SYM_CODE_END(dtrace_fuword16_nocheck) -+ -+#elif defined(__i386__) -+ -+ SYM_CODE_START(dtrace_fuword16_nocheck) -+ movl 4(%esp), %ecx -+ xorl %eax, %eax -+ movzwl (%ecx), %eax -+ ret -+ SYM_CODE_END(dtrace_fuword16_nocheck) -+ -+#endif /* __i386__ */ -+ -+#if defined(__x86_64__) -+ -+ SYM_CODE_START(dtrace_fuword32_nocheck) -+ xorq %rax, %rax -+ ASM_STAC -+ movl (%rdi), %eax -+ ASM_CLAC -+ ret -+ SYM_CODE_END(dtrace_fuword32_nocheck) -+ -+#elif defined(__i386__) -+ -+ SYM_CODE_START(dtrace_fuword32_nocheck) -+ movl 4(%esp), %ecx -+ xorl %eax, %eax -+ movl (%ecx), %eax -+ ret -+ SYM_CODE_END(dtrace_fuword32_nocheck) -+ -+#endif /* __i386__ */ -+ -+#if defined(__x86_64__) -+ -+ SYM_CODE_START(dtrace_fuword64_nocheck) -+ ASM_STAC -+ movq (%rdi), %rax -+ ASM_CLAC -+ ret -+ SYM_CODE_END(dtrace_fuword64_nocheck) -+ -+#elif defined(__i386__) -+ -+ SYM_CODE_START(dtrace_fuword64_nocheck) -+ movl 4(%esp), %ecx -+ xorl %eax, %eax -+ xorl %edx, %edx -+ movl (%ecx), %eax -+ movl 4(%ecx), %edx -+ ret -+ SYM_CODE_END(dtrace_fuword64_nocheck) -+ -+#endif /* __i386__ */ -diff --git a/arch/x86/dtrace/dtrace_isa_x86_64.c b/arch/x86/dtrace/dtrace_isa_x86_64.c -new file mode 100644 -index 000000000000..5accadc9f121 ---- /dev/null -+++ b/arch/x86/dtrace/dtrace_isa_x86_64.c -@@ -0,0 +1,228 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_isa_x86_64.c -+ * DESCRIPTION: DTrace - x86_64 architecture specific support functions -+ * -+ * Copyright (c) 2010, 2017, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/dtrace_cpu.h> -+#include <asm/unwind.h> -+ -+#include "dtrace.h" -+ -+/* Register indices */ -+#define REG_TRAPNO 25 -+#define REG_GS 24 -+#define REG_FS 23 -+#define REG_ES 22 -+#define REG_DS 21 -+#define REG_SS 20 -+#define REG_RSP 19 -+#define REG_RFL 18 -+#define REG_CS 17 -+#define REG_RIP 16 -+#define REG_ERR 15 -+#define REG_RDI 14 -+#define REG_RSI 13 -+#define REG_RDX 12 -+#define REG_RCX 11 -+#define REG_RAX 10 -+#define REG_R8 9 -+#define REG_R9 8 -+#define REG_R10 7 -+#define REG_R11 6 -+#define REG_RBX 5 -+#define REG_RBP 4 -+#define REG_R12 3 -+#define REG_R13 2 -+#define REG_R14 1 -+#define REG_R15 0 -+ -+extern void dtrace_copy(uintptr_t, uintptr_t, size_t); -+extern void dtrace_copystr(uintptr_t, uintptr_t, size_t, -+ volatile uint16_t *); -+ -+uintptr_t _userlimit = 0x00007fffffffffffLL; -+uintptr_t kernelbase = 0xffff880000000000LL; -+ -+static int dtrace_copycheck(uintptr_t uaddr, uintptr_t kaddr, size_t size) -+{ -+#ifdef FIXME -+ ASSERT(kaddr >= kernelbase && kaddr + size >= kaddr); -+#else -+ if (kaddr < kernelbase || kaddr + size < kaddr) { -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); -+ this_cpu_core->cpuc_dtrace_illval = kaddr; -+ return 0; -+ } -+#endif -+ -+ if (uaddr + size >= kernelbase || uaddr + size < uaddr) { -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); -+ this_cpu_core->cpuc_dtrace_illval = uaddr; -+ return 0; -+ } -+ -+ return 1; -+} -+ -+void dtrace_copyin_arch(uintptr_t uaddr, uintptr_t kaddr, size_t size, -+ volatile uint16_t *flags) -+{ -+ if (dtrace_copycheck(uaddr, kaddr, size)) -+ dtrace_copy(uaddr, kaddr, size); -+} -+ -+void dtrace_copyout(uintptr_t uaddr, uintptr_t kaddr, size_t size, -+ volatile uint16_t *flags) -+{ -+ if (dtrace_copycheck(uaddr, kaddr, size)) -+ dtrace_copy(kaddr, uaddr, size); -+} -+ -+void dtrace_copyinstr_arch(uintptr_t uaddr, uintptr_t kaddr, size_t size, -+ volatile uint16_t *flags) -+{ -+ if (dtrace_copycheck(uaddr, kaddr, size)) -+ dtrace_copystr(uaddr, kaddr, size, flags); -+} -+ -+void dtrace_copyoutstr(uintptr_t uaddr, uintptr_t kaddr, size_t size, -+ volatile uint16_t *flags) -+{ -+ if (dtrace_copycheck(uaddr, kaddr, size)) -+ dtrace_copystr(kaddr, uaddr, size, flags); -+} -+ -+#define DTRACE_FUWORD(bits) \ -+ uint##bits##_t dtrace_fuword##bits(void *uaddr) \ -+ { \ -+ extern uint##bits##_t dtrace_fuword##bits##_nocheck(void *);\ -+ \ -+ if ((uintptr_t)uaddr > _userlimit) { \ -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); \ -+ this_cpu_core->cpuc_dtrace_illval = (uintptr_t)uaddr; \ -+ return 0; \ -+ } \ -+ \ -+ return dtrace_fuword##bits##_nocheck(uaddr); \ -+ } -+ -+DTRACE_FUWORD(8) -+DTRACE_FUWORD(16) -+DTRACE_FUWORD(32) -+DTRACE_FUWORD(64) -+ -+uint64_t dtrace_getarg(int argno, int aframes) -+{ -+ unsigned long bp; -+ uint64_t *st; -+ uint64_t val; -+ int i; -+ struct unwind_state state; -+ -+ if (this_cpu_core->cpu_dtrace_regs) -+ bp = this_cpu_core->cpu_dtrace_regs->bp; -+ else { -+ unwind_start(&state, current, NULL, NULL); -+ for (i = 0; !unwind_done(&state) && i < aframes; -+ unwind_next_frame(&state)) { -+ i++; -+ } -+ -+ bp = (unsigned long)state.bp; -+ } -+ -+ ASSERT(argno >= 5); -+ -+ /* -+ * The first 5 arguments (arg0 through arg4) are passed in registers -+ * to dtrace_probe(). The remaining arguments (arg5 through arg9) are -+ * passed on the stack. -+ * -+ * Stack layout: -+ * bp[0] = pushed bp from caller -+ * bp[1] = return address -+ * bp[2] = 6th argument (arg5 -> argno = 5) -+ * bp[3] = 7th argument (arg6 -> argno = 6) -+ * ... -+ */ -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); -+ st = (uint64_t *)bp; -+ val = st[2 + (argno - 5)]; -+ DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); -+ -+ return val; -+} -+ -+ulong_t dtrace_getreg(struct task_struct *task, uint_t reg) -+{ -+ struct pt_regs *rp = task_pt_regs(task); -+ -+ int regmap[] = { -+ REG_RBX, /* 0 -> EBX */ -+ REG_RCX, /* 1 -> ECX */ -+ REG_RDX, /* 2 -> EDX */ -+ REG_RSI, /* 3 -> ESI */ -+ REG_RDI, /* 4 -> EDI */ -+ REG_RBP, /* 5 -> EBP */ -+ REG_RAX, /* 6 -> EAX */ -+ REG_DS, /* 7 -> DS */ -+ REG_ES, /* 8 -> ES */ -+ REG_FS, /* 9 -> FS */ -+ REG_GS, /* 10 -> GS */ -+ REG_TRAPNO, /* 11 -> TRAPNO */ -+ REG_RIP, /* 12 -> EIP */ -+ REG_CS, /* 13 -> CS */ -+ REG_RFL, /* 14 -> EFL */ -+ REG_RSP, /* 15 -> UESP */ -+ REG_SS, /* 16 -> SS */ -+ }; -+ -+ if (reg > REG_TRAPNO) { -+ /* -+ * Convert register alias index into register mapping index. -+ */ -+ reg -= REG_GS + 1; -+ -+ if (reg >= sizeof(regmap) / sizeof(int)) { -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); -+ return 0; -+ } -+ -+ reg = regmap[reg]; -+ } -+ -+ /* -+ * Most common case: direct index into pt_regs structure. -+ */ -+ if (reg <= REG_SS) -+ return (&rp->r15)[reg]; -+ -+ switch (reg) { -+ case REG_DS: -+ return task->thread.ds; -+ case REG_ES: -+ return task->thread.es; -+ case REG_FS: -+ return task->thread.fsbase; -+ case REG_GS: -+ return task->thread.gsbase; -+ case REG_TRAPNO: -+ return task->thread.trap_nr; -+ default: -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); -+ return 0; -+ } -+} -diff --git a/dtrace/Makefile b/dtrace/Makefile -new file mode 100644 -index 000000000000..36a4b97b922c ---- /dev/null -+++ b/dtrace/Makefile -@@ -0,0 +1,19 @@ -+# -+# Copyright (c) 2011, 2017, Oracle and/or its affiliates. All rights reserved. -+# -+ -+obj-$(CONFIG_DT_CORE) += dtrace.o -+obj-$(CONFIG_DT_DT_TEST) += dt_test.o -+ -+dtrace-y := dtrace_mod.o dtrace_dev.o \ -+ dtrace_actdesc.o dtrace_anon.o \ -+ dtrace_buffer.o dtrace_dif.o dtrace_dof.o \ -+ dtrace_ecb.o dtrace_enable.o \ -+ dtrace_fmt.o dtrace_hash.o dtrace_isa.o \ -+ dtrace_match.o dtrace_priv.o \ -+ dtrace_probe.o dtrace_probe_ctx.o \ -+ dtrace_ptofapi.o dtrace_predicate.o \ -+ dtrace_spec.o dtrace_state.o dtrace_util.o -+dt_test-y := dt_test_mod.o dt_test_dev.o -+ -+-include arch/$(SRCARCH)/dtrace/Makefile.arch -diff --git a/dtrace/ctf_api.h b/dtrace/ctf_api.h -new file mode 100644 -index 000000000000..e09bafc676f4 ---- /dev/null -+++ b/dtrace/ctf_api.h -@@ -0,0 +1,33 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Compact C Type format -+ * -+ * Copyright (c) 2010, 2017, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#ifndef __CTF_API_H_ -+#define __CTF_API_H_ -+ -+/* -+ * The CTF data model is inferred to be the caller's data model or the data -+ * model of the given object, unless ctf_setmodel() is explicitly called. -+ */ -+#define CTF_MODEL_ILP32 1 /* object data model is ILP32 */ -+#define CTF_MODEL_LP64 2 /* object data model is LP64 */ -+#ifdef CONFIG_64BIT -+# define CTF_MODEL_NATIVE CTF_MODEL_LP64 -+#else -+# define CTF_MODEL_NATIVE CTF_MODEL_ILP32 -+#endif -+ -+#endif /* __CTF_API_H_ */ -diff --git a/dtrace/dt_test.h b/dtrace/dt_test.h -new file mode 100644 -index 000000000000..6efe4656001b ---- /dev/null -+++ b/dtrace/dt_test.h -@@ -0,0 +1,30 @@ -+/* -+ * Dynamic Tracing for Linux - test provider -+ * -+ * Copyright (c) 2011, 2017, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#ifndef _DT_TEST_H_ -+#define _DT_TEST_H_ -+ -+extern void dt_test_provide(void *, const struct dtrace_probedesc *); -+extern int dt_test_enable(void *arg, dtrace_id_t, void *); -+extern void dt_test_disable(void *arg, dtrace_id_t, void *); -+extern void dt_test_destroy(void *, dtrace_id_t, void *); -+ -+extern dtrace_provider_id_t dt_test_id; -+ -+extern int dt_test_dev_init(void); -+extern void dt_test_dev_exit(void); -+ -+#endif /* _DT_TEST_H_ */ -diff --git a/dtrace/dt_test_dev.c b/dtrace/dt_test_dev.c -new file mode 100644 -index 000000000000..8e1f5bab8a12 ---- /dev/null -+++ b/dtrace/dt_test_dev.c -@@ -0,0 +1,176 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dt_test_dev.c -+ * DESCRIPTION: DTrace - test provider device driver -+ * -+ * Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/fs.h> -+#include <linux/miscdevice.h> -+#include <linux/types.h> -+#include <trace/syscall.h> -+#include <asm/unistd.h> -+ -+#include "dtrace.h" -+#include "dtrace_dev.h" -+#include "dt_test.h" -+ -+static dtrace_id_t pid = DTRACE_IDNONE; -+static int enabled; -+ -+/* -+ * Some arrays of structures of different sizes populated with -+ * unchanging randomly-chosen numbers, for padding tests. -+ */ -+ -+static struct dt_test_int_char -+{ -+ int foo; -+ char bar; -+} intish[2] __attribute__((used)) = { { 47204473, 48 }, -+ { 18472, 62 } }; -+ -+static struct dt_test_long_int -+{ -+ long foo; -+ int bar; -+} longish[2] __attribute__((used)) = { { 43737975, 240724 }, -+ { 24924709, 526 } }; -+ -+static struct dt_test_longlong_long -+{ -+ long long foo; -+ long bar; -+} longlongish[2] __attribute__((used)) = { { 4294479287, 4395957 }, -+ { 5239637, 249750 } }; -+ -+static struct dt_test_like_a_scatterlist -+{ -+ unsigned long a; -+ unsigned int b; -+ unsigned int c; -+ u64 d; -+ unsigned int e; -+} scatter_failure[2] __attribute__((used)) = { { .a = 1, .b = 2, -+ .c = 3, .d = 4, .e = 5 }, -+ { .a = 6, .b = 7, -+ .c = 8, .d = 9, .e = 10 } }; -+ -+void dt_test_provide(void *arg, const struct dtrace_probedesc *desc) -+{ -+ dtrace_id_t probe; -+ -+ probe = dtrace_probe_lookup(dt_test_id, "dt_test", NULL, "test"); -+ if (probe != DTRACE_IDNONE) -+ return; -+ -+ pid = dtrace_probe_create(dt_test_id, "dt_test", NULL, "test", 1, NULL); -+} -+ -+int dt_test_enable(void *arg, dtrace_id_t id, void *parg) -+{ -+ enabled = 1; -+ -+ return 0; -+} -+ -+void dt_test_disable(void *arg, dtrace_id_t id, void *parg) -+{ -+ enabled = 0; -+} -+ -+void dt_test_destroy(void *arg, dtrace_id_t id, void *parg) -+{ -+} -+ -+void probe_p(dtrace_id_t pid, uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, -+ uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6, -+ uintptr_t arg7, uintptr_t arg8, uintptr_t arg9) -+{ -+} -+ -+/* -+ * Direct calling into dtrace_probe() when passing more than 5 parameters to -+ * the probe requires a stub function. Otherwise we may not be able to get -+ * to the value of all arguments correctly. -+ */ -+void dt_test_probe(uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, -+ uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, -+ uintptr_t arg6, uintptr_t arg7, uintptr_t arg8, -+ uintptr_t arg9) -+{ -+ /* -+ * Yes, this is not nice. -+ * Not at all... -+ * But we're doing it anyway... -+ */ -+ typeof(probe_p) *probe_fn = (void *)&dtrace_probe; -+ -+ probe_fn(pid, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, -+ arg9); -+} -+ -+static long dt_test_ioctl(struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ if (enabled) { -+ dt_test_probe(cmd, arg, 2ULL, 3ULL, 4ULL, 5ULL, 6ULL, 7ULL, -+ 8ULL, 9ULL); -+ -+ return 0; -+ } -+ -+ return -EAGAIN; -+} -+ -+static int dt_test_open(struct inode *inode, struct file *file) -+{ -+ return 0; -+} -+ -+static int dt_test_close(struct inode *inode, struct file *file) -+{ -+ return 0; -+} -+ -+static const struct file_operations dt_test_fops = { -+ .owner = THIS_MODULE, -+ .unlocked_ioctl = dt_test_ioctl, -+ .open = dt_test_open, -+ .release = dt_test_close, -+}; -+ -+static struct miscdevice dt_test_dev = { -+ .minor = DT_DEV_DT_TEST_MINOR, -+ .name = "dt_test", -+ .nodename = "dtrace/provider/dt_test", -+ .fops = &dt_test_fops, -+}; -+ -+int dt_test_dev_init(void) -+{ -+ int ret = 0; -+ -+ ret = misc_register(&dt_test_dev); -+ if (ret) -+ pr_err("%s: Can't register misc device %d\n", -+ dt_test_dev.name, dt_test_dev.minor); -+ -+ return ret; -+} -+ -+void dt_test_dev_exit(void) -+{ -+ misc_deregister(&dt_test_dev); -+} -diff --git a/dtrace/dt_test_mod.c b/dtrace/dt_test_mod.c -new file mode 100644 -index 000000000000..d8af71665a37 ---- /dev/null -+++ b/dtrace/dt_test_mod.c -@@ -0,0 +1,52 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dt_test_mod.c -+ * DESCRIPTION: DTrace - test provider kernel module -+ * -+ * Copyright (c) 2011, 2017, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/module.h> -+ -+#include "dtrace.h" -+#include "dtrace_dev.h" -+#include "dt_test.h" -+ -+MODULE_AUTHOR("Kris Van Hees (kris.van.hees@oracle.com)"); -+MODULE_DESCRIPTION("DTrace Test Probe"); -+MODULE_VERSION("v0.1"); -+MODULE_LICENSE("GPL"); -+ -+static const struct dtrace_pattr dt_test_attr = { -+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, -+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, -+}; -+ -+static struct dtrace_pops dt_test_pops = { -+ .dtps_provide = dt_test_provide, -+ .dtps_provide_module = NULL, -+ .dtps_destroy_module = NULL, -+ .dtps_enable = dt_test_enable, -+ .dtps_disable = dt_test_disable, -+ .dtps_suspend = NULL, -+ .dtps_resume = NULL, -+ .dtps_getargdesc = NULL, -+ .dtps_getargval = NULL, -+ .dtps_usermode = NULL, -+ .dtps_destroy = dt_test_destroy -+}; -+ -+DT_PROVIDER_MODULE(dt_test, DTRACE_PRIV_USER) -diff --git a/dtrace/dtrace.h b/dtrace/dtrace.h -new file mode 100644 -index 000000000000..f632b910ac30 ---- /dev/null -+++ b/dtrace/dtrace.h -@@ -0,0 +1,35 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Dynamic Tracing for Linux -+ * -+ * Copyright (c) 2010, 2017, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#ifndef _DTRACE_H_ -+#define _DTRACE_H_ -+ -+#include <linux/dtrace_cpu.h> -+#include <linux/dtrace_os.h> -+#include <linux/sched.h> -+#include <linux/types.h> -+ -+#include "dtrace_debug.h" -+ -+#include <dtrace/types.h> -+ -+#include <linux/dtrace/dtrace.h> -+ -+#include <dtrace/provider.h> -+#include <dtrace/dtrace_impl.h> -+ -+#endif /* _DTRACE_H_ */ -diff --git a/dtrace/dtrace_actdesc.c b/dtrace/dtrace_actdesc.c -new file mode 100644 -index 000000000000..fada47a6e5eb ---- /dev/null -+++ b/dtrace/dtrace_actdesc.c -@@ -0,0 +1,91 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_actdesc.c -+ * DESCRIPTION: DTrace - action implementation -+ * -+ * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/slab.h> -+#include <linux/vmalloc.h> -+ -+#include "dtrace.h" -+ -+struct dtrace_actdesc *dtrace_actdesc_create(dtrace_actkind_t kind, -+ uint32_t ntuple, -+ uint64_t uarg, uint64_t arg) -+{ -+ struct dtrace_actdesc *act; -+ -+#ifdef FIXME -+ ASSERT(!DTRACEACT_ISPRINTFLIKE(kind) || -+ (arg != 0 && (uintptr_t)arg >= KERNELBASE) || -+ (arg == 0 && kind == DTRACEACT_PRINTA)); -+#else -+ ASSERT(!DTRACEACT_ISPRINTFLIKE(kind) || -+ (arg != 0) || -+ (arg == 0 && kind == DTRACEACT_PRINTA)); -+#endif -+ -+ act = kzalloc(sizeof(struct dtrace_actdesc), GFP_KERNEL); -+ if (act == NULL) -+ return NULL; -+ -+ act->dtad_kind = kind; -+ act->dtad_ntuple = ntuple; -+ act->dtad_uarg = uarg; -+ act->dtad_arg = arg; -+ act->dtad_refcnt = 1; -+ -+ return act; -+} -+ -+void dtrace_actdesc_hold(struct dtrace_actdesc *act) -+{ -+ ASSERT(act->dtad_refcnt >= 1); -+ -+ act->dtad_refcnt++; -+} -+ -+void dtrace_actdesc_release(struct dtrace_actdesc *act, -+ struct dtrace_vstate *vstate) -+{ -+ dtrace_actkind_t kind = act->dtad_kind; -+ struct dtrace_difo *dp; -+ -+ ASSERT(act->dtad_refcnt >= 1); -+ -+ if (--act->dtad_refcnt != 0) -+ return; -+ -+ dp = act->dtad_difo; -+ if (dp != NULL) -+ dtrace_difo_release(dp, vstate); -+ -+ if (DTRACEACT_ISPRINTFLIKE(kind)) { -+ char *str = (char *)(uintptr_t)act->dtad_arg; -+ -+#ifdef FIXME -+ ASSERT((str != NULL && (uintptr_t)str >= KERNELBASE) || -+ (str == NULL && act->dtad_kind == DTRACEACT_PRINTA)); -+#else -+ ASSERT((str != NULL) || -+ (str == NULL && act->dtad_kind == DTRACEACT_PRINTA)); -+#endif -+ -+ if (str != NULL) -+ vfree(str); -+ } -+ -+ kfree(act); -+} -diff --git a/dtrace/dtrace_anon.c b/dtrace/dtrace_anon.c -new file mode 100644 -index 000000000000..80d0d9c1a9fd ---- /dev/null -+++ b/dtrace/dtrace_anon.c -@@ -0,0 +1,144 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_anon.c -+ * DESCRIPTION: DTrace - Anonymous state implementation -+ * -+ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include "dtrace.h" -+ -+struct dtrace_anon dtrace_anon; -+ -+struct dtrace_state *dtrace_anon_grab(void) -+{ -+ struct dtrace_state *state; -+ -+ ASSERT(MUTEX_HELD(&dtrace_lock)); -+ -+ state = dtrace_anon.dta_state; -+ if (state == NULL) { -+ ASSERT(dtrace_anon.dta_enabling == NULL); -+ -+ return NULL; -+ } -+ -+ ASSERT(dtrace_anon.dta_enabling != NULL); -+ ASSERT(dtrace_retained != NULL); -+ -+ dtrace_enabling_destroy(dtrace_anon.dta_enabling); -+ dtrace_anon.dta_enabling = NULL; -+ dtrace_anon.dta_state = NULL; -+ -+ return state; -+} -+ -+void dtrace_anon_property(void) -+{ -+ int i, rv; -+ struct dtrace_state *state; -+ struct dof_hdr *dof; -+ char c[32]; /* enough for "dof-data-" + digits */ -+ -+ ASSERT(MUTEX_HELD(&dtrace_lock)); -+ ASSERT(MUTEX_HELD(&cpu_lock)); -+ -+ for (i = 0; ; i++) { -+ snprintf(c, sizeof(c), "dof-data-%d", i); -+ -+ dtrace_err_verbose = 1; -+ -+ dof = dtrace_dof_property(c); -+ if (dof == NULL) { -+ dtrace_err_verbose = 0; -+ break; -+ } -+ -+#ifdef FIXME -+ /* -+ * We want to create anonymous state, so we need to transition -+ * the kernel debugger to indicate that DTrace is active. If -+ * this fails (e.g. because the debugger has modified text in -+ * some way), we won't continue with the processing. -+ */ -+ if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE) != 0) { -+ pr_info("kernel debugger active; " -+ "anonymous enabling ignored."); -+ dtrace_dof_destroy(dof); -+ break; -+ } -+#endif -+ -+ /* -+ * If we haven't allocated an anonymous state, we'll do so now. -+ */ -+ state = dtrace_anon.dta_state; -+ if (state == NULL) { -+ state = dtrace_state_create(NULL); -+ dtrace_anon.dta_state = state; -+ -+ if (state == NULL) { -+ /* -+ * This basically shouldn't happen: there is no -+ * failure mode from dtrace_state_create(). -+ * Still, the interface allows for a failure -+ * mode, and we want to fail as gracefully as -+ * possible: we'll emit an error message and -+ * cease processing anonymous state in this -+ * case. -+ */ -+ pr_warn("failed to create anonymous state"); -+ dtrace_dof_destroy(dof); -+ break; -+ } -+ } -+ -+ rv = dtrace_dof_slurp(dof, &state->dts_vstate, current_cred(), -+ &dtrace_anon.dta_enabling, 0, TRUE); -+ -+ if (rv == 0) -+ rv = dtrace_dof_options(dof, state); -+ -+ dtrace_err_verbose = 0; -+ dtrace_dof_destroy(dof); -+ -+ if (rv != 0) { -+ /* -+ * This is malformed DOF; chuck any anonymous state -+ * that we created. -+ */ -+ ASSERT(dtrace_anon.dta_enabling == NULL); -+ dtrace_state_destroy(state); -+ dtrace_anon.dta_state = NULL; -+ break; -+ } -+ -+ ASSERT(dtrace_anon.dta_enabling != NULL); -+ } -+ -+ if (dtrace_anon.dta_enabling != NULL) { -+ int rval; -+ -+ /* -+ * dtrace_enabling_retain() can only fail because we are -+ * trying to retain more enablings than are allowed -- but -+ * we only have one anonymous enabling, and we are guaranteed -+ * to be allowed at least one retained enabling; we assert -+ * that dtrace_enabling_retain() returns success. -+ */ -+ rval = dtrace_enabling_retain(dtrace_anon.dta_enabling); -+ ASSERT(rval == 0); -+ -+ dtrace_enabling_dump(dtrace_anon.dta_enabling); -+ } -+} -diff --git a/dtrace/dtrace_buffer.c b/dtrace/dtrace_buffer.c -new file mode 100644 -index 000000000000..9e7faebc51b7 ---- /dev/null -+++ b/dtrace/dtrace_buffer.c -@@ -0,0 +1,490 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_buffer.c -+ * DESCRIPTION: DTrace - buffer implementation -+ * -+ * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/vmalloc.h> -+ -+#include "dtrace.h" -+ -+dtrace_optval_t dtrace_nonroot_maxsize = (16 * 1024 * 1024); -+ -+/* -+ * Note: called from cross call context. This function switches the two -+ * buffers on a given CPU. The atomicity of this operation is assured by -+ * disabling interrupts while the actual switch takes place; the disabling of -+ * interrupts serializes the execution with any execution of dtrace_probe() on -+ * the same CPU. -+ */ -+void dtrace_buffer_switch(struct dtrace_buffer *buf) -+{ -+ caddr_t tomax = buf->dtb_tomax; -+ caddr_t xamot = buf->dtb_xamot; -+ dtrace_icookie_t cookie; -+ -+ ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH)); -+ ASSERT(!(buf->dtb_flags & DTRACEBUF_RING)); -+ -+ local_irq_save(cookie); -+ -+ dt_dbg_buf("Switch (CPU %d): tomax %p (%lld) <-> xamot %p (%lld)\n", -+ smp_processor_id(), tomax, buf->dtb_offset, -+ xamot, buf->dtb_xamot_offset); -+ -+ buf->dtb_tomax = xamot; -+ buf->dtb_xamot = tomax; -+ buf->dtb_xamot_drops = buf->dtb_drops; -+ buf->dtb_xamot_offset = buf->dtb_offset; -+ buf->dtb_xamot_errors = buf->dtb_errors; -+ buf->dtb_xamot_flags = buf->dtb_flags; -+ buf->dtb_offset = 0; -+ buf->dtb_drops = 0; -+ buf->dtb_errors = 0; -+ buf->dtb_flags &= ~(DTRACEBUF_ERROR | DTRACEBUF_DROPPED); -+ -+ local_irq_restore(cookie); -+} -+ -+/* -+ * Note: called from cross call context. This function activates a buffer -+ * on a CPU. As with dtrace_buffer_switch(), the atomicity of the operation -+ * is guaranteed by the disabling of interrupts. -+ */ -+void dtrace_buffer_activate(struct dtrace_state *state) -+{ -+ struct dtrace_buffer *buf; -+ dtrace_icookie_t cookie; -+ -+ local_irq_save(cookie); -+ -+ buf = &state->dts_buffer[smp_processor_id()]; -+ -+ if (buf->dtb_tomax != NULL) -+ /* -+ * We might like to assert that the buffer is marked inactive, -+ * but this isn't necessarily true: the buffer for the CPU -+ * that processes the BEGIN probe has its buffer activated -+ * manually. In this case, we take the (harmless) action -+ * re-clearing the bit INACTIVE bit. -+ */ -+ buf->dtb_flags &= ~DTRACEBUF_INACTIVE; -+ -+ local_irq_restore(cookie); -+} -+ -+int dtrace_buffer_alloc(struct dtrace_buffer *bufs, size_t size, int flags, -+ processorid_t cpuid) -+{ -+ processorid_t cpu; -+ struct dtrace_buffer *buf; -+ -+ ASSERT(MUTEX_HELD(&dtrace_lock)); -+ ASSERT(MUTEX_HELD(&cpu_lock)); -+ -+#ifdef FIXME -+ if (size > dtrace_nonroot_maxsize && -+ !PRIV_POLICY_CHOICE(current_cred(), PRIV_ALL, FALSE)) -+ return -EFBIG; -+#endif -+ -+ for_each_online_cpu(cpu) { -+ if (cpuid != DTRACE_CPUALL && cpuid != cpu) -+ continue; -+ -+ buf = &bufs[cpu]; -+ -+ /* -+ * If there is already a buffer allocated for this CPU, it -+ * is only possible that this is a DR event. In this case, -+ * the buffer size must match our specified size. -+ */ -+ if (buf->dtb_tomax != NULL) { -+ ASSERT(buf->dtb_size == size); -+ continue; -+ } -+ -+ ASSERT(buf->dtb_xamot == NULL); -+ -+ buf->dtb_tomax = dtrace_vzalloc_try(size); -+ if (buf->dtb_tomax == NULL) -+ goto err; -+ -+ buf->dtb_size = size; -+ buf->dtb_flags = flags; -+ buf->dtb_offset = 0; -+ buf->dtb_drops = 0; -+ -+ if (flags & DTRACEBUF_NOSWITCH) -+ continue; -+ -+ buf->dtb_xamot = dtrace_vzalloc_try(size); -+ if (buf->dtb_xamot == NULL) -+ goto err; -+ } -+ -+ return 0; -+ -+err: -+ for_each_online_cpu(cpu) { -+ if (cpuid != DTRACE_CPUALL && cpuid != cpu) -+ continue; -+ -+ buf = &bufs[cpu]; -+ -+ if (buf->dtb_xamot != NULL) { -+ ASSERT(buf->dtb_tomax != NULL); -+ ASSERT(buf->dtb_size == size); -+ vfree(buf->dtb_xamot); -+ } -+ -+ if (buf->dtb_tomax != NULL) { -+ ASSERT(buf->dtb_size == size); -+ vfree(buf->dtb_tomax); -+ } -+ -+ buf->dtb_tomax = NULL; -+ buf->dtb_xamot = NULL; -+ buf->dtb_size = 0; -+ } -+ -+ return -ENOMEM; -+} -+void dtrace_buffer_drop(struct dtrace_buffer *buf) -+{ -+ buf->dtb_drops++; -+} -+ -+intptr_t dtrace_buffer_reserve(struct dtrace_buffer *buf, size_t needed, -+ size_t align, struct dtrace_state *state, -+ struct dtrace_mstate *mstate) -+{ -+ intptr_t offs = buf->dtb_offset, soffs; -+ intptr_t woffs; -+ caddr_t tomax; -+ size_t total; -+ -+ if (buf->dtb_flags & DTRACEBUF_INACTIVE) -+ return -1; -+ -+ tomax = buf->dtb_tomax; -+ if (tomax == NULL) { -+ dtrace_buffer_drop(buf); -+ return -1; -+ } -+ -+ if (!(buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL))) { -+ while (offs & (align - 1)) { -+ /* -+ * Assert that our alignment is off by a number which -+ * is itself sizeof (uint32_t) aligned. -+ */ -+ ASSERT(!((align - (offs & (align - 1))) & -+ (sizeof(uint32_t) - 1))); -+ DTRACE_STORE(uint32_t, tomax, offs, DTRACE_EPIDNONE); -+ dt_dbg_buf(" Store: %p[%ld .. %ld] <- EPIDNONE " -+ "(from %s::%d)\n", -+ buf, offs, offs + sizeof(uint32_t) - 1, -+ __func__, __LINE__); -+ offs += sizeof(uint32_t); -+ } -+ -+ soffs = offs + needed; -+ if (soffs > buf->dtb_size) { -+ dtrace_buffer_drop(buf); -+ return -1; -+ } -+ -+ if (mstate == NULL) { -+ dt_dbg_buf(" Reserve: %p[%ld .. %ld]\n", -+ buf, offs, offs + needed - 1); -+ return offs; -+ } -+ -+ mstate->dtms_scratch_base = (uintptr_t)tomax + soffs; -+ mstate->dtms_scratch_size = buf->dtb_size - soffs; -+ mstate->dtms_scratch_ptr = mstate->dtms_scratch_base; -+ -+ dt_dbg_buf(" Reserve: %p[%ld .. %ld]\n", -+ buf, offs, offs + needed - 1); -+ return offs; -+ } -+ -+ if (buf->dtb_flags & DTRACEBUF_FILL) { -+ if (state->dts_activity != DTRACE_ACTIVITY_COOLDOWN && -+ (buf->dtb_flags & DTRACEBUF_FULL)) -+ return -1; -+ -+ goto out; -+ } -+ -+ total = needed + (offs & (align - 1)); -+ -+ /* -+ * For a ring buffer, life is quite a bit more complicated. Before -+ * we can store any padding, we need to adjust our wrapping offset. -+ * (If we've never before wrapped or we're not about to, no adjustment -+ * is required.) -+ */ -+ if ((buf->dtb_flags & DTRACEBUF_WRAPPED) || -+ offs + total > buf->dtb_size) { -+ woffs = buf->dtb_xamot_offset; -+ -+ if (offs + total > buf->dtb_size) { -+ /* -+ * We can't fit in the end of the buffer. First, a -+ * sanity check that we can fit in the buffer at all. -+ */ -+ if (total > buf->dtb_size) { -+ dtrace_buffer_drop(buf); -+ return -1; -+ } -+ -+ /* -+ * We're going to be storing at the top of the buffer, -+ * so now we need to deal with the wrapped offset. We -+ * only reset our wrapped offset to 0 if it is -+ * currently greater than the current offset. If it -+ * is less than the current offset, it is because a -+ * previous allocation induced a wrap -- but the -+ * allocation didn't subsequently take the space due -+ * to an error or false predicate evaluation. In this -+ * case, we'll just leave the wrapped offset alone: if -+ * the wrapped offset hasn't been advanced far enough -+ * for this allocation, it will be adjusted in the -+ * lower loop. -+ */ -+ if (buf->dtb_flags & DTRACEBUF_WRAPPED) { -+ if (woffs >= offs) -+ woffs = 0; -+ } else -+ woffs = 0; -+ -+ /* -+ * Now we know that we're going to be storing to the -+ * top of the buffer and that there is room for us -+ * there. We need to clear the buffer from the current -+ * offset to the end (there may be old gunk there). -+ */ -+ while (offs < buf->dtb_size) -+ tomax[offs++] = 0; -+ -+ /* -+ * We need to set our offset to zero. And because we -+ * are wrapping, we need to set the bit indicating as -+ * much. We can also adjust our needed space back -+ * down to the space required by the ECB -- we know -+ * that the top of the buffer is aligned. -+ */ -+ offs = 0; -+ total = needed; -+ buf->dtb_flags |= DTRACEBUF_WRAPPED; -+ } else { -+ /* -+ * There is room for us in the buffer, so we simply -+ * need to check the wrapped offset. -+ */ -+ if (woffs < offs) { -+ /* -+ * The wrapped offset is less than the offset. -+ * This can happen if we allocated buffer space -+ * that induced a wrap, but then we didn't -+ * subsequently take the space due to an error -+ * or false predicate evaluation. This is -+ * okay; we know that _this_ allocation isn't -+ * going to induce a wrap. We still can't -+ * reset the wrapped offset to be zero, -+ * however: the space may have been trashed in -+ * the previous failed probe attempt. But at -+ * least the wrapped offset doesn't need to -+ * be adjusted at all... -+ */ -+ goto out; -+ } -+ } -+ -+ while (offs + total > woffs) { -+ dtrace_epid_t epid = *(uint32_t *)(tomax + woffs); -+ size_t size; -+ -+ if (epid == DTRACE_EPIDNONE) -+ size = sizeof(uint32_t); -+ else { -+ ASSERT(epid <= state->dts_necbs); -+ ASSERT(state->dts_ecbs[epid - 1] != NULL); -+ -+ size = state->dts_ecbs[epid - 1]->dte_size; -+ } -+ -+ ASSERT(woffs + size <= buf->dtb_size); -+ ASSERT(size != 0); -+ -+ if (woffs + size == buf->dtb_size) { -+ /* -+ * We've reached the end of the buffer; we want -+ * to set the wrapped offset to 0 and break -+ * out. However, if the offs is 0, then we're -+ * in a strange edge-condition: the amount of -+ * space that we want to reserve plus the size -+ * of the record that we're overwriting is -+ * space but subsequently don't consume it (due -+ * to a failed predicate or error) the wrapped -+ * offset will be 0 -- yet the EPID at offset 0 -+ * will not be committed. This situation is -+ * relatively easy to deal with: if we're in -+ * this case, the buffer is indistinguishable -+ * from one that hasn't wrapped; we need only -+ * finish the job by clearing the wrapped bit, -+ * explicitly setting the offset to be 0, and -+ * zero'ing out the old data in the buffer. -+ */ -+ if (offs == 0) { -+ buf->dtb_flags &= ~DTRACEBUF_WRAPPED; -+ buf->dtb_offset = 0; -+ woffs = total; -+ -+ while (woffs < buf->dtb_size) -+ tomax[woffs++] = 0; -+ } -+ -+ woffs = 0; -+ break; -+ } -+ -+ woffs += size; -+ } -+ -+ /* -+ * We have a wrapped offset. It may be that the wrapped offset -+ * has become zero -- that's okay. -+ */ -+ buf->dtb_xamot_offset = woffs; -+ } -+ -+out: -+ /* -+ * Now we can plow the buffer with any necessary padding. -+ */ -+ while (offs & (align - 1)) { -+ /* -+ * Assert that our alignment is off by a number which -+ * is itself sizeof(uint32_t) aligned. -+ */ -+ ASSERT(!((align - (offs & (align - 1))) & -+ (sizeof(uint32_t) - 1))); -+ DTRACE_STORE(uint32_t, tomax, offs, DTRACE_EPIDNONE); -+ dt_dbg_buf(" Store: %p[%ld .. %ld] <- EPIDNONE " -+ "(from %s::%d)\n", -+ buf, offs, offs + sizeof(uint32_t) - 1, -+ __func__, __LINE__); -+ offs += sizeof(uint32_t); -+ } -+ -+ if (buf->dtb_flags & DTRACEBUF_FILL) { -+ if (offs + needed > buf->dtb_size - state->dts_reserve) { -+ buf->dtb_flags |= DTRACEBUF_FULL; -+ return -1; -+ } -+ } -+ -+ if (mstate == NULL) { -+ dt_dbg_buf(" Reserve: %p[%ld .. %ld]\n", -+ buf, offs, offs + needed - 1); -+ return offs; -+ } -+ -+ /* -+ * For ring buffers and fill buffers, the scratch space is always -+ * the inactive buffer. -+ */ -+ mstate->dtms_scratch_base = (uintptr_t)buf->dtb_xamot; -+ mstate->dtms_scratch_size = buf->dtb_size; -+ mstate->dtms_scratch_ptr = mstate->dtms_scratch_base; -+ -+ dt_dbg_buf(" Reserve: %p[%ld .. %ld]\n", -+ buf, offs, offs + needed - 1); -+ return offs; -+} -+ -+void dtrace_buffer_polish(struct dtrace_buffer *buf) -+{ -+ ASSERT(buf->dtb_flags & DTRACEBUF_RING); -+ ASSERT(MUTEX_HELD(&dtrace_lock)); -+ -+ if (!(buf->dtb_flags & DTRACEBUF_WRAPPED)) -+ return; -+ -+ /* -+ * We need to polish the ring buffer. There are three cases: -+ * -+ * - The first (and presumably most common) is that there is no gap -+ * between the buffer offset and the wrapped offset. In this case, -+ * there is nothing in the buffer that isn't valid data; we can -+ * mark the buffer as polished and return. -+ * -+ * - The second (less common than the first but still more common -+ * than the third) is that there is a gap between the buffer offset -+ * and the wrapped offset, and the wrapped offset is larger than the -+ * buffer offset. This can happen because of an alignment issue, or -+ * can happen because of a call to dtrace_buffer_reserve() that -+ * didn't subsequently consume the buffer space. In this case, -+ * we need to zero the data from the buffer offset to the wrapped -+ * offset. -+ * -+ * - The third (and least common) is that there is a gap between the -+ * buffer offset and the wrapped offset, but the wrapped offset is -+ * _less_ than the buffer offset. This can only happen because a -+ * call to dtrace_buffer_reserve() induced a wrap, but the space -+ * was not subsequently consumed. In this case, we need to zero the -+ * space from the offset to the end of the buffer _and_ from the -+ * top of the buffer to the wrapped offset. -+ */ -+ if (buf->dtb_offset < buf->dtb_xamot_offset) -+ memset(buf->dtb_tomax + buf->dtb_offset, 0, -+ buf->dtb_xamot_offset - buf->dtb_offset); -+ -+ if (buf->dtb_offset > buf->dtb_xamot_offset) { -+ memset(buf->dtb_tomax + buf->dtb_offset, 0, -+ buf->dtb_size - buf->dtb_offset); -+ memset(buf->dtb_tomax, 0, buf->dtb_xamot_offset); -+ } -+} -+ -+void dtrace_buffer_free(struct dtrace_buffer *bufs) -+{ -+ int cpu; -+ -+ for_each_online_cpu(cpu) { -+ struct dtrace_buffer *buf = &bufs[cpu]; -+ -+ if (buf->dtb_tomax == NULL) { -+ ASSERT(buf->dtb_xamot == NULL); -+ ASSERT(buf->dtb_size == 0); -+ -+ continue; -+ } -+ -+ if (buf->dtb_xamot != NULL) { -+ ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH)); -+ -+ vfree(buf->dtb_xamot); -+ buf->dtb_xamot = NULL; -+ } -+ -+ vfree(buf->dtb_tomax); -+ buf->dtb_size = 0; -+ buf->dtb_tomax = NULL; -+ } -+} -diff --git a/dtrace/dtrace_debug.h b/dtrace/dtrace_debug.h -new file mode 100644 -index 000000000000..a55fd1a0436f ---- /dev/null -+++ b/dtrace/dtrace_debug.h -@@ -0,0 +1,118 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Dynamic Tracing for Linux -+ * -+ * Copyright (c) 2010, 2018, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#ifndef _DTRACE_DEBUG_H_ -+#define _DTRACE_DEBUG_H_ -+ -+#ifdef CONFIG_DT_DEBUG -+ -+/* -+ * Enable all output and use dynamic debug when supported. -+ */ -+# ifdef CONFIG_DYNAMIC_DEBUG -+ -+# define DT_DBG_AGG -+# define DT_DBG_BUF -+# define DT_DBG_DIF -+# define DT_DBG_DOF -+# define DT_DBG_ENABLE -+# define DT_DBG_IOCTL -+# define DT_DBG_PROBE -+# define DT_DBG_PROVIDER -+ -+# define dt_dbg_print(fmt, ...) pr_debug(fmt, ## __VA_ARGS__) -+ -+# else /* CONFIG_DYNAMIC_DEBUG */ -+ -+# undef DT_DBG_AGG -+# undef DT_DBG_BUF -+# undef DT_DBG_DIF -+# undef DT_DBG_DOF -+# undef DT_DBG_ENABLE -+# undef DT_DBG_IOCTL -+# undef DT_DBG_PROBE -+# undef DT_DBG_PROVIDER -+ -+# define dt_dbg_print(fmt, ...) pr_info(fmt, ## __VA_ARGS__) -+ -+# endif /* CONFIG_DYNAMIC_DEBUG */ -+ -+#else /* CONFIG_DT_DEBUG */ -+ -+# undef DT_DBG_AGG -+# undef DT_DBG_BUF -+# undef DT_DBG_DIF -+# undef DT_DBG_DOF -+# undef DT_DBG_ENABLE -+# undef DT_DBG_IOCTL -+# undef DT_DBG_PROBE -+# undef DT_DBG_PROVIDER -+ -+#endif /* CONFIG_DT_DEBUG */ -+ -+/* -+ * Here are the actual actions for the various debug cases. -+ */ -+#ifdef DT_DBG_AGG -+# define dt_dbg_agg(fmt, ...) dt_dbg_print(fmt, ## __VA_ARGS__) -+#else -+# define dt_dbg_agg(fmt, ...) -+#endif -+ -+#ifdef DT_DBG_BUF -+# define dt_dbg_buf(fmt, ...) dt_dbg_print(fmt, ## __VA_ARGS__) -+#else -+# define dt_dbg_buf(fmt, ...) -+#endif -+ -+#ifdef DT_DBG_DIF -+# define dt_dbg_dif(fmt, ...) dt_dbg_print(fmt, ## __VA_ARGS__) -+#else -+# define dt_dbg_dif(fmt, ...) -+#endif -+ -+#ifdef DT_DBG_DOF -+# define dt_dbg_dof(fmt, ...) dt_dbg_print(fmt, ## __VA_ARGS__) -+#else -+# define dt_dbg_dof(fmt, ...) -+#endif -+ -+#ifdef DT_DBG_ENABLE -+# define dt_dbg_enable(fmt, ...) dt_dbg_print(fmt, ## __VA_ARGS__) -+#else -+# define dt_dbg_enable(fmt, ...) -+#endif -+ -+#ifdef DT_DBG_IOCTL -+# define dt_dbg_ioctl(fmt, ...) dt_dbg_print(fmt, ## __VA_ARGS__) -+#else -+# define dt_dbg_ioctl(fmt, ...) -+#endif -+ -+#ifdef DT_DBG_PROBE -+# define dt_dbg_probe(fmt, ...) dt_dbg_print(fmt, ## __VA_ARGS__) -+#else -+# define dt_dbg_probe(fmt, ...) -+#endif -+ -+#ifdef DT_DBG_PROVIDER -+# define dt_dbg_prov(fmt, ...) dt_dbg_print(fmt, ## __VA_ARGS__) -+#else -+# define dt_dbg_prov(fmt, ...) -+#endif -+ -+#endif /* _DTRACE_DEBUG_H_ */ -diff --git a/dtrace/dtrace_dev.c b/dtrace/dtrace_dev.c -new file mode 100644 -index 000000000000..0e52c936b9e0 ---- /dev/null -+++ b/dtrace/dtrace_dev.c -@@ -0,0 +1,1599 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_dev.c -+ * DESCRIPTION: DTrace - Framework device driver -+ * -+ * Copyright (c) 2010, 2018, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/delay.h> -+#include <dtrace/types.h> -+#include <linux/dtrace/ioctl.h> -+#include <linux/fs.h> -+#include <linux/jiffies.h> -+#include <linux/kernel.h> -+#include <linux/miscdevice.h> -+#include <linux/slab.h> -+#include <linux/vmalloc.h> -+#include <linux/uaccess.h> -+ -+#include "ctf_api.h" -+#include "dtrace.h" -+#include "dtrace_dev.h" -+ -+extern char *dtrace_helptrace_buffer; -+extern int dtrace_helptrace_bufsize; -+extern int dtrace_helptrace_enabled; -+ -+int dtrace_opens; -+int dtrace_err_verbose; -+ -+struct dtrace_pops dtrace_provider_ops = { -+ (void (*)(void *, const struct dtrace_probedesc *))dtrace_nullop, -+ (void (*)(void *, struct module *))dtrace_nullop, -+ (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop, -+ (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, -+ (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, -+ (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, -+ NULL, -+ NULL, -+ NULL, -+ (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, -+ (void (*)(void *, struct module *))dtrace_nullop, -+}; -+ -+static size_t dtrace_retain_max = 1024; -+ -+struct dtrace_toxrange *dtrace_toxrange; -+int dtrace_toxranges; -+static int dtrace_toxranges_max; -+ -+struct kmem_cache *dtrace_state_cachep; -+ -+struct user_namespace *init_user_namespace; -+ -+static struct dtrace_pattr dtrace_provider_attr = { -+{ DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, -+{ DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, -+{ DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, -+}; -+ -+DEFINE_MUTEX(dtrace_lock); -+ -+void dtrace_nullop(void) -+{ -+} -+ -+int dtrace_enable_nullop(void) -+{ -+ return 0; -+} -+ -+ -+#ifdef CONFIG_DT_DEBUG -+static void dtrace_ioctl_sizes(void) -+{ -+#define DBG_PRINT(x) dt_dbg_ioctl("Size of %s: %lx\n", #x, sizeof(x)) -+ DBG_PRINT(struct dtrace_providerdesc); -+ DBG_PRINT(struct dtrace_probedesc); -+ DBG_PRINT(struct dtrace_bufdesc); -+ DBG_PRINT(struct dtrace_eprobedesc); -+ DBG_PRINT(struct dtrace_argdesc); -+ DBG_PRINT(struct dtrace_conf); -+ DBG_PRINT(struct dtrace_status); -+ DBG_PRINT(processorid_t); -+ DBG_PRINT(struct dtrace_aggdesc); -+ DBG_PRINT(struct dtrace_fmtdesc); -+ DBG_PRINT(struct dof_hdr); -+#undef DBG_PRINT -+} -+ -+#endif -+ -+static int dtrace_open(struct inode *inode, struct file *file) -+{ -+ struct dtrace_state *state; -+ uint32_t priv; -+ kuid_t uid; -+ -+ dtrace_cred2priv(file->f_cred, &priv, &uid); -+ if (priv == DTRACE_PRIV_NONE) -+ return -EACCES; -+ -+#ifdef CONFIG_DT_DEBUG -+ dtrace_ioctl_sizes(); -+#endif -+ mutex_lock(&module_mutex); -+ mutex_lock(&dtrace_provider_lock); -+ dtrace_probe_provide(NULL, NULL); -+ mutex_unlock(&dtrace_provider_lock); -+ mutex_unlock(&module_mutex); -+ -+ mutex_lock(&cpu_lock); -+ mutex_lock(&dtrace_lock); -+ -+ /* -+ * Do not let a consumer continue if it is not possible to enable -+ * DTrace. -+ */ -+ if (dtrace_enable() != 0) { -+ mutex_unlock(&dtrace_lock); -+ mutex_unlock(&cpu_lock); -+ return -EBUSY; -+ } -+ -+ dtrace_opens++; -+ dtrace_membar_producer(); -+ -+ state = dtrace_state_create(file); -+ mutex_unlock(&cpu_lock); -+ -+ if (state == NULL) { -+ if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) -+ dtrace_disable(); -+ mutex_unlock(&dtrace_lock); -+ -+ return -EAGAIN; -+ } -+ -+ file->private_data = state; -+ mutex_unlock(&dtrace_lock); -+ -+ return 0; -+} -+ -+static long dtrace_ioctl(struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ struct dtrace_state *state; -+ int rval; -+ void __user *argp = (void __user *)arg; -+ -+ state = (struct dtrace_state *) file->private_data; -+ if (state->dts_anon) { -+ ASSERT(dtrace_anon.dta_state == NULL); -+ state = state->dts_anon; -+ } -+ -+ switch (cmd) { -+ case DTRACEIOC_PROVIDER: { -+ struct dtrace_providerdesc pvd; -+ struct dtrace_provider *pvp; -+ -+ dt_dbg_ioctl("IOCTL PROVIDER (cmd %#x), argp %p\n", cmd, argp); -+ -+ if (copy_from_user(&pvd, argp, sizeof(pvd)) != 0) -+ return -EFAULT; -+ -+ pvd.dtvd_name[DTRACE_PROVNAMELEN - 1] = '\0'; -+ mutex_lock(&dtrace_provider_lock); -+ -+ for (pvp = dtrace_provider; pvp != NULL; pvp = pvp->dtpv_next) { -+ if (strcmp(pvp->dtpv_name, pvd.dtvd_name) == 0) -+ break; -+ } -+ -+ mutex_unlock(&dtrace_provider_lock); -+ -+ dt_dbg_ioctl(" Provider '%s' %sfound\n", -+ pvd.dtvd_name, pvp ? "" : "not "); -+ if (pvp == NULL) -+ return -ESRCH; -+ -+ memcpy(&pvd.dtvd_priv, &pvp->dtpv_priv, -+ sizeof(struct dtrace_ppriv)); -+ memcpy(&pvd.dtvd_attr, &pvp->dtpv_attr, -+ sizeof(struct dtrace_pattr)); -+ -+ if (copy_to_user(argp, &pvd, sizeof(pvd)) != 0) -+ return -EFAULT; -+ -+ return 0; -+ } -+ -+ case DTRACEIOC_EPROBE: { -+ struct dtrace_eprobedesc epdesc; -+ struct dtrace_ecb *ecb; -+ struct dtrace_action *act; -+ void *buf; -+ size_t size; -+ uint8_t *dest; -+ int nrecs; -+ -+ dt_dbg_ioctl("IOCTL EPROBE (cmd %#x), argp %p\n", cmd, argp); -+ -+ if (copy_from_user(&epdesc, argp, sizeof(epdesc)) != 0) -+ return -EFAULT; -+ -+ mutex_lock(&dtrace_lock); -+ -+ ecb = dtrace_epid2ecb(state, epdesc.dtepd_epid); -+ if (ecb == NULL) { -+ mutex_unlock(&dtrace_lock); -+ return -EINVAL; -+ } -+ -+ if (ecb->dte_probe == NULL) { -+ mutex_unlock(&dtrace_lock); -+ return -EINVAL; -+ } -+ -+ epdesc.dtepd_probeid = ecb->dte_probe->dtpr_id; -+ epdesc.dtepd_uarg = ecb->dte_uarg; -+ epdesc.dtepd_size = ecb->dte_size; -+ -+ nrecs = epdesc.dtepd_nrecs; -+ epdesc.dtepd_nrecs = 0; -+ for (act = ecb->dte_action; act != NULL; act = act->dta_next) { -+ if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple) -+ continue; -+ -+ epdesc.dtepd_nrecs++; -+ } -+ -+ /* -+ * Now that we have the size, we need to allocate a temporary -+ * buffer in which to store the complete description. We need -+ * the temporary buffer to be able to drop dtrace_lock() -+ * across the copy_to_user(), below. -+ */ -+ size = sizeof(struct dtrace_eprobedesc) + -+ (epdesc.dtepd_nrecs * sizeof(struct dtrace_recdesc)); -+ -+ buf = vmalloc(size); -+ if (buf == NULL) -+ return -ENOMEM; -+ -+ dest = buf; -+ memcpy(dest, &epdesc, sizeof(epdesc)); -+ dest += offsetof(struct dtrace_eprobedesc, dtepd_rec[0]); -+ -+ for (act = ecb->dte_action; act != NULL; act = act->dta_next) { -+ if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple) -+ continue; -+ -+ if (nrecs-- == 0) -+ break; -+ -+ memcpy(dest, &act->dta_rec, -+ sizeof(struct dtrace_recdesc)); -+ dest += sizeof(struct dtrace_recdesc); -+ } -+ -+ mutex_unlock(&dtrace_lock); -+ -+ if (copy_to_user(argp, buf, -+ (uintptr_t)(dest - (uint8_t *)buf)) != 0) { -+ vfree(buf); -+ return -EFAULT; -+ } -+ -+ vfree(buf); -+ return 0; -+ } -+ -+ case DTRACEIOC_AGGDESC: { -+ struct dtrace_aggdesc aggdesc; -+ struct dtrace_action *act; -+ struct dtrace_aggregation *agg; -+ int nrecs; -+ uint32_t offs; -+ struct dtrace_recdesc *lrec; -+ void *buf; -+ size_t size; -+ uint8_t *dest; -+ -+ dt_dbg_ioctl("IOCTL AGGDESC (cmd %#x), argp %p\n", cmd, argp); -+ -+ if (copy_from_user(&aggdesc, argp, sizeof(aggdesc)) != 0) -+ return -EFAULT; -+ -+ mutex_lock(&dtrace_lock); -+ -+ agg = dtrace_aggid2agg(state, aggdesc.dtagd_id); -+ if (agg == NULL) { -+ mutex_unlock(&dtrace_lock); -+ return -EINVAL; -+ } -+ -+ aggdesc.dtagd_epid = agg->dtag_ecb->dte_epid; -+ -+ nrecs = aggdesc.dtagd_nrecs; -+ aggdesc.dtagd_nrecs = 0; -+ -+ offs = agg->dtag_base; -+ lrec = &agg->dtag_action.dta_rec; -+ aggdesc.dtagd_size = lrec->dtrd_offset + lrec->dtrd_size - -+ offs; -+ -+ for (act = agg->dtag_first; ; act = act->dta_next) { -+ ASSERT(act->dta_intuple || -+ DTRACEACT_ISAGG(act->dta_kind)); -+ -+ /* -+ * If this action has a record size of zero, it -+ * denotes an argument to the aggregating action. -+ * Because the presence of this record doesn't (or -+ * shouldn't) affect the way the data is interpreted, -+ * we don't copy it out to save user-level the -+ * confusion of dealing with a zero-length record. -+ */ -+ if (act->dta_rec.dtrd_size == 0) { -+ ASSERT(agg->dtag_hasarg); -+ continue; -+ } -+ -+ aggdesc.dtagd_nrecs++; -+ -+ if (act == &agg->dtag_action) -+ break; -+ } -+ -+ /* -+ * Now that we have the size, we need to allocate a temporary -+ * buffer in which to store the complete description. We need -+ * the temporary buffer to be able to drop dtrace_lock() -+ * across the copyout(), below. -+ */ -+ size = sizeof(struct dtrace_aggdesc) + -+ (aggdesc.dtagd_nrecs * sizeof(struct dtrace_recdesc)); -+ -+ buf = vmalloc(size); -+ if (buf == NULL) -+ return -ENOMEM; -+ -+ dest = buf; -+ memcpy(dest, &aggdesc, sizeof(aggdesc)); -+ dest += offsetof(struct dtrace_aggdesc, dtagd_rec[0]); -+ -+ for (act = agg->dtag_first; ; act = act->dta_next) { -+ struct dtrace_recdesc rec = act->dta_rec; -+ -+ /* -+ * See the comment in the above loop for why we pass -+ * over zero-length records. -+ */ -+ if (rec.dtrd_size == 0) { -+ ASSERT(agg->dtag_hasarg); -+ continue; -+ } -+ -+ if (nrecs-- == 0) -+ break; -+ -+ rec.dtrd_offset -= offs; -+ memcpy(dest, &rec, sizeof(rec)); -+ dest += sizeof(struct dtrace_recdesc); -+ -+ if (act == &agg->dtag_action) -+ break; -+ } -+ -+ mutex_unlock(&dtrace_lock); -+ -+ if (copy_to_user(argp, buf, -+ (uintptr_t)(dest - (uint8_t *)buf)) != 0) { -+ vfree(buf); -+ return -EFAULT; -+ } -+ -+ vfree(buf); -+ return 0; -+ } -+ -+ case DTRACEIOC_ENABLE: { -+ struct dof_hdr *dof; -+ struct dtrace_enabling *enab = NULL; -+ struct dtrace_vstate *vstate; -+ int err = 0; -+ int rv; -+ -+ dt_dbg_ioctl("IOCTL ENABLE (cmd %#x), argp %p\n", cmd, argp); -+ -+ rv = 0; -+ -+ /* -+ * If a NULL argument has been passed, we take this as our -+ * cue to reevaluate our enablings. -+ */ -+ if (argp == NULL) { -+ dtrace_enabling_matchall(); -+ -+ return 0; -+ } -+ -+ dof = dtrace_dof_copyin(argp, &rval); -+ if (dof == NULL) -+ return rval; -+ -+ mutex_lock(&cpu_lock); -+ mutex_lock(&dtrace_lock); -+ vstate = &state->dts_vstate; -+ -+ if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) { -+ mutex_unlock(&dtrace_lock); -+ mutex_unlock(&cpu_lock); -+ dtrace_dof_destroy(dof); -+ return -EBUSY; -+ } -+ -+ if (dtrace_dof_slurp(dof, vstate, file->f_cred, &enab, 0, -+ TRUE) != 0) { -+ mutex_unlock(&dtrace_lock); -+ mutex_unlock(&cpu_lock); -+ dtrace_dof_destroy(dof); -+ return -EINVAL; -+ } -+ -+ rval = dtrace_dof_options(dof, state); -+ if (rval != 0) { -+ dtrace_enabling_destroy(enab); -+ mutex_unlock(&dtrace_lock); -+ mutex_unlock(&cpu_lock); -+ dtrace_dof_destroy(dof); -+ return rval; -+ } -+ -+ err = dtrace_enabling_match(enab, &rv); -+ if (err == 0) -+ err = dtrace_enabling_retain(enab); -+ else -+ dtrace_enabling_destroy(enab); -+ -+ mutex_unlock(&dtrace_lock); -+ mutex_unlock(&cpu_lock); -+ dtrace_dof_destroy(dof); -+ -+ return err == 0 ? rv : err; -+ } -+ -+ case DTRACEIOC_REPLICATE: { -+ struct dtrace_repldesc desc; -+ struct dtrace_probedesc *match = &desc.dtrpd_match; -+ struct dtrace_probedesc *create = &desc.dtrpd_create; -+ int err; -+ -+ dt_dbg_ioctl("IOCTL REPLICATE (cmd %#x), argp %p\n", -+ cmd, argp); -+ -+ if (copy_from_user(&desc, argp, sizeof(desc)) != 0) -+ return -EFAULT; -+ -+ match->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; -+ match->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; -+ match->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; -+ match->dtpd_name[DTRACE_NAMELEN - 1] = '\0'; -+ -+ create->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; -+ create->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; -+ create->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; -+ create->dtpd_name[DTRACE_NAMELEN - 1] = '\0'; -+ -+ mutex_lock(&dtrace_lock); -+ err = dtrace_enabling_replicate(state, match, create); -+ mutex_unlock(&dtrace_lock); -+ -+ return err; -+ } -+ -+ case DTRACEIOC_PROBEMATCH: -+ case DTRACEIOC_PROBES: { -+ int id; -+ struct dtrace_probe *probe = NULL; -+ struct dtrace_probedesc desc; -+ struct dtrace_probekey pkey; -+ uint32_t priv; -+ kuid_t uid; -+ -+ dt_dbg_ioctl("IOCTL %s (cmd %#x), argp %p\n", -+ cmd == DTRACEIOC_PROBES ? "PROBES" -+ : "PROBEMATCH", -+ cmd, argp); -+ -+ if (copy_from_user(&desc, argp, sizeof(desc)) != 0) -+ return -EFAULT; -+ -+ desc.dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; -+ desc.dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; -+ desc.dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; -+ desc.dtpd_name[DTRACE_NAMELEN - 1] = '\0'; -+ dt_dbg_ioctl(" Probe ID %d %s:%s:%s:%s\n", -+ desc.dtpd_id, desc.dtpd_provider, desc.dtpd_mod, -+ desc.dtpd_func, desc.dtpd_name); -+ -+ /* -+ * Before we attempt to match this probe, we want to give -+ * all providers the opportunity to provide it. -+ */ -+ if (desc.dtpd_id == DTRACE_IDNONE) { -+ mutex_lock(&module_mutex); -+ mutex_lock(&dtrace_provider_lock); -+ dtrace_probe_provide(&desc, NULL); -+ mutex_unlock(&dtrace_provider_lock); -+ mutex_unlock(&module_mutex); -+ } -+ -+ if (cmd == DTRACEIOC_PROBEMATCH) { -+ dtrace_probekey(&desc, &pkey); -+ pkey.dtpk_id = DTRACE_IDNONE; -+ } -+ -+ dtrace_cred2priv(file->f_cred, &priv, &uid); -+ -+ mutex_lock(&dtrace_lock); -+ -+ id = desc.dtpd_id; -+ if (cmd == DTRACEIOC_PROBEMATCH) { -+ int m = 0; -+ -+ while ((probe = dtrace_probe_get_next(&id)) -+ != NULL) { -+ m = dtrace_match_probe(probe, &pkey, priv, uid); -+ if (m) -+ break; -+ -+ id++; -+ } -+ -+ if (m < 0) { -+ mutex_unlock(&dtrace_lock); -+ return -EINVAL; -+ } -+ } else { -+ while ((probe = dtrace_probe_get_next(&id)) -+ != NULL) { -+ if (dtrace_match_priv(probe, priv, uid)) -+ break; -+ -+ id++; -+ } -+ } -+ -+ if (probe == NULL) { -+ mutex_unlock(&dtrace_lock); -+ return -ESRCH; -+ } -+ -+ dtrace_probe_description(probe, &desc); -+ mutex_unlock(&dtrace_lock); -+ -+ if (copy_to_user(argp, &desc, sizeof(desc)) != 0) -+ return -EFAULT; -+ -+ return 0; -+ } -+ -+ case DTRACEIOC_PROBEARG: { -+ struct dtrace_argdesc desc; -+ struct dtrace_probe *probe; -+ struct dtrace_provider *prov; -+ -+ dt_dbg_ioctl("IOCTL PROBEARG (cmd %#x), argp %p\n", cmd, argp); -+ -+ if (copy_from_user(&desc, argp, sizeof(desc)) != 0) -+ return -EFAULT; -+ -+ if (desc.dtargd_id == DTRACE_IDNONE) -+ return -EINVAL; -+ -+ if (desc.dtargd_ndx == DTRACE_ARGNONE) -+ return -EINVAL; -+ -+ mutex_lock(&module_mutex); -+ mutex_lock(&dtrace_provider_lock); -+ mutex_lock(&dtrace_lock); -+ -+ probe = dtrace_probe_lookup_id(desc.dtargd_id); -+ if (probe == NULL) { -+ mutex_unlock(&dtrace_lock); -+ mutex_unlock(&dtrace_provider_lock); -+ mutex_unlock(&module_mutex); -+ -+ return -EINVAL; -+ } -+ -+ mutex_unlock(&dtrace_lock); -+ -+ prov = probe->dtpr_provider; -+ -+ if (prov->dtpv_pops.dtps_getargdesc == NULL) { -+ /* -+ * There isn't any typed information for this probe. -+ * Set the argument number to DTRACE_ARGNONE. -+ */ -+ desc.dtargd_ndx = DTRACE_ARGNONE; -+ } else { -+ desc.dtargd_native[0] = '\0'; -+ desc.dtargd_xlate[0] = '\0'; -+ desc.dtargd_mapping = desc.dtargd_ndx; -+ -+ prov->dtpv_pops.dtps_getargdesc( -+ prov->dtpv_arg, probe->dtpr_id, -+ probe->dtpr_arg, &desc); -+ } -+ -+ mutex_unlock(&dtrace_provider_lock); -+ mutex_unlock(&module_mutex); -+ -+ if (copy_to_user(argp, &desc, sizeof(desc)) != 0) -+ return -EFAULT; -+ -+ return 0; -+ } -+ -+ case DTRACEIOC_GO: { -+ processorid_t cpuid; -+ -+ dt_dbg_ioctl("IOCTL GO (cmd %#x), argp %p\n", cmd, argp); -+ -+ rval = dtrace_state_go(state, &cpuid); -+ -+ if (rval != 0) -+ return rval; -+ -+ if (copy_to_user(argp, &cpuid, sizeof(cpuid)) != 0) -+ return -EFAULT; -+ -+ return 0; -+ } -+ -+ case DTRACEIOC_STOP: { -+ processorid_t cpuid; -+ -+ dt_dbg_ioctl("IOCTL STOP (cmd %#x), argp %p\n", cmd, argp); -+ -+ mutex_lock(&dtrace_lock); -+ rval = dtrace_state_stop(state, &cpuid); -+ mutex_unlock(&dtrace_lock); -+ -+ if (rval != 0) -+ return rval; -+ -+ if (copy_to_user(argp, &cpuid, sizeof(cpuid)) != 0) -+ return -EFAULT; -+ -+ return 0; -+ } -+ -+ case DTRACEIOC_DOFGET: { -+ struct dof_hdr hdr, *dof; -+ uint64_t len; -+ -+ dt_dbg_ioctl("IOCTL DOFGET (cmd %#x), argp %p\n", cmd, argp); -+ -+ if (copy_from_user(&hdr, argp, sizeof(hdr)) != 0) -+ return -EFAULT; -+ -+ mutex_lock(&dtrace_lock); -+ dof = dtrace_dof_create(state); -+ mutex_unlock(&dtrace_lock); -+ if (dof == NULL) -+ return -ENOMEM; -+ -+ len = min(hdr.dofh_loadsz, dof->dofh_loadsz); -+ rval = copy_to_user(argp, dof, len); -+ dtrace_dof_destroy(dof); -+ -+ return rval == 0 ? 0 : -EFAULT; -+ } -+ -+ case DTRACEIOC_AGGSNAP: -+ case DTRACEIOC_BUFSNAP: { -+ struct dtrace_bufdesc desc; -+ caddr_t cached; -+ struct dtrace_buffer *buf; -+ -+ dt_dbg_ioctl("IOCTL %s (cmd %#x), argp %p\n", -+ cmd == DTRACEIOC_AGGSNAP ? "AGGSNAP" -+ : "BUFSNAP", -+ cmd, argp); -+ -+ if (copy_from_user(&desc, argp, sizeof(desc)) != 0) -+ return -EFAULT; -+ -+ if (desc.dtbd_cpu < 0 || desc.dtbd_cpu >= NR_CPUS) -+ return -EINVAL; -+ -+ mutex_lock(&dtrace_lock); -+ -+ if (cmd == DTRACEIOC_BUFSNAP) -+ buf = &state->dts_buffer[desc.dtbd_cpu]; -+ else -+ buf = &state->dts_aggbuffer[desc.dtbd_cpu]; -+ -+ if (buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL)) { -+ size_t sz = buf->dtb_offset; -+ -+ if (state->dts_activity != DTRACE_ACTIVITY_STOPPED) { -+ mutex_unlock(&dtrace_lock); -+ return -EBUSY; -+ } -+ -+ /* -+ * If this buffer has already been consumed, we're -+ * going to indicate that there's nothing left here -+ * to consume. -+ */ -+ if (buf->dtb_flags & DTRACEBUF_CONSUMED) { -+ mutex_unlock(&dtrace_lock); -+ -+ desc.dtbd_size = 0; -+ desc.dtbd_drops = 0; -+ desc.dtbd_errors = 0; -+ desc.dtbd_oldest = 0; -+ sz = sizeof(desc); -+ -+ if (copy_to_user(argp, &desc, sz) != 0) -+ return -EFAULT; -+ -+ return 0; -+ } -+ -+ /* -+ * If this is a ring buffer that has wrapped, we want -+ * to copy the whole thing out. -+ */ -+ if (buf->dtb_flags & DTRACEBUF_WRAPPED) { -+ dtrace_buffer_polish(buf); -+ sz = buf->dtb_size; -+ } -+ -+ if (copy_to_user(desc.dtbd_data, buf->dtb_tomax, -+ sz) != 0) { -+ mutex_unlock(&dtrace_lock); -+ return -EFAULT; -+ } -+ -+ desc.dtbd_size = sz; -+ desc.dtbd_drops = buf->dtb_drops; -+ desc.dtbd_errors = buf->dtb_errors; -+ desc.dtbd_oldest = buf->dtb_xamot_offset; -+ -+ mutex_unlock(&dtrace_lock); -+ -+ if (copy_to_user(argp, &desc, sizeof(desc)) != 0) -+ return -EFAULT; -+ -+ buf->dtb_flags |= DTRACEBUF_CONSUMED; -+ -+ return 0; -+ } -+ -+ if (buf->dtb_tomax == NULL) { -+ ASSERT(buf->dtb_xamot == NULL); -+ mutex_unlock(&dtrace_lock); -+ return -ENOENT; -+ } -+ -+ cached = buf->dtb_tomax; -+ -+ dtrace_xcall(desc.dtbd_cpu, -+ (dtrace_xcall_t)dtrace_buffer_switch, buf); -+ -+ state->dts_errors += buf->dtb_xamot_errors; -+ -+ /* -+ * If the buffers did not actually switch, then the cross call -+ * did not take place -- presumably because the given CPU is -+ * not in the ready set. If this is the case, we'll return -+ * ENOENT. -+ */ -+ if (buf->dtb_tomax == cached) { -+ ASSERT(buf->dtb_xamot != cached); -+ mutex_unlock(&dtrace_lock); -+ return -ENOENT; -+ } -+ -+ ASSERT(cached == buf->dtb_xamot); -+ -+ /* -+ * We have our snapshot; now copy it out. -+ */ -+ if (copy_to_user(desc.dtbd_data, buf->dtb_xamot, -+ buf->dtb_xamot_offset) != 0) { -+ mutex_unlock(&dtrace_lock); -+ return -EFAULT; -+ } -+ -+ desc.dtbd_size = buf->dtb_xamot_offset; -+ desc.dtbd_drops = buf->dtb_xamot_drops; -+ desc.dtbd_errors = buf->dtb_xamot_errors; -+ desc.dtbd_oldest = 0; -+ -+ mutex_unlock(&dtrace_lock); -+ -+ /* -+ * Finally, copy out the buffer description. -+ */ -+ if (copy_to_user(argp, &desc, sizeof(desc)) != 0) -+ return -EFAULT; -+ -+ return 0; -+ } -+ -+ case DTRACEIOC_CONF: { -+ struct dtrace_conf conf; -+ -+ dt_dbg_ioctl("IOCTL CONF (cmd %#x), argp %p\n", cmd, argp); -+ -+ memset(&conf, 0, sizeof(conf)); -+ conf.dtc_difversion = DIF_VERSION; -+ conf.dtc_difintregs = DIF_DIR_NREGS; -+ conf.dtc_diftupregs = DIF_DTR_NREGS; -+ conf.dtc_ctfmodel = CTF_MODEL_NATIVE; -+ conf.dtc_maxbufs = nr_cpu_ids; -+ -+ if (copy_to_user(argp, &conf, sizeof(conf)) != 0) -+ return -EFAULT; -+ -+ return 0; -+ } -+ -+ case DTRACEIOC_STATUS: { -+ struct dtrace_status stat; -+ struct dtrace_dstate *dstate; -+ int i, j; -+ uint64_t nerrs; -+ -+ dt_dbg_ioctl("IOCTL STATUS (cmd %#x), argp %p\n", cmd, argp); -+ -+ /* -+ * See the comment in dtrace_state_deadman() for the reason -+ * for setting dts_laststatus to UINT64_MAX before setting -+ * it to the correct value. -+ */ -+ state->dts_laststatus = ns_to_ktime(UINT64_MAX); -+ dtrace_membar_producer(); -+ state->dts_laststatus = dtrace_gethrtime(); -+ -+ memset(&stat, 0, sizeof(stat)); -+ -+ mutex_lock(&dtrace_lock); -+ -+ if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) { -+ mutex_unlock(&dtrace_lock); -+ return -ENOENT; -+ } -+ -+ if (state->dts_activity == DTRACE_ACTIVITY_DRAINING) -+ stat.dtst_exiting = 1; -+ -+ nerrs = state->dts_errors; -+ dstate = &state->dts_vstate.dtvs_dynvars; -+ -+ for (i = 0; i < NR_CPUS; i++) { -+ struct dtrace_dstate_percpu *dcpu; -+ -+ dcpu = &dstate->dtds_percpu[i]; -+ stat.dtst_dyndrops += dcpu->dtdsc_drops; -+ stat.dtst_dyndrops_dirty += dcpu->dtdsc_dirty_drops; -+ stat.dtst_dyndrops_rinsing += dcpu->dtdsc_rinsing_drops; -+ -+ if (state->dts_buffer[i].dtb_flags & DTRACEBUF_FULL) -+ stat.dtst_filled++; -+ -+ nerrs += state->dts_buffer[i].dtb_errors; -+ -+ for (j = 0; j < state->dts_nspeculations; j++) { -+ struct dtrace_speculation *spec; -+ struct dtrace_buffer *buf; -+ -+ spec = &state->dts_speculations[j]; -+ buf = &spec->dtsp_buffer[i]; -+ stat.dtst_specdrops += buf->dtb_xamot_drops; -+ } -+ } -+ -+ stat.dtst_specdrops_busy = state->dts_speculations_busy; -+ stat.dtst_specdrops_unavail = state->dts_speculations_unavail; -+ stat.dtst_stkstroverflows = state->dts_stkstroverflows; -+ stat.dtst_dblerrors = state->dts_dblerrors; -+ stat.dtst_killed = (state->dts_activity == -+ DTRACE_ACTIVITY_KILLED); -+ stat.dtst_errors = nerrs; -+ -+ mutex_unlock(&dtrace_lock); -+ -+ if (copy_to_user(argp, &stat, sizeof(stat)) != 0) -+ return -EFAULT; -+ -+ return 0; -+ } -+ -+ case DTRACEIOC_FORMAT: { -+ struct dtrace_fmtdesc fmt; -+ char *str; -+ int len; -+ -+ dt_dbg_ioctl("IOCTL FORMAT (cmd %#x), argp %p\n", cmd, argp); -+ -+ if (copy_from_user(&fmt, argp, sizeof(fmt)) != 0) -+ return -EFAULT; -+ -+ mutex_lock(&dtrace_lock); -+ -+ if (fmt.dtfd_format == 0 || -+ fmt.dtfd_format > state->dts_nformats) { -+ mutex_unlock(&dtrace_lock); -+ return -EINVAL; -+ } -+ -+ /* -+ * Format strings are allocated contiguously and they are -+ * never freed; if a format index is less than the number -+ * of formats, we can assert that the format map is non-NULL -+ * and that the format for the specified index is non-NULL. -+ */ -+ ASSERT(state->dts_formats != NULL); -+ str = state->dts_formats[fmt.dtfd_format - 1]; -+ ASSERT(str != NULL); -+ -+ len = strlen(str) + 1; -+ -+ if (len > fmt.dtfd_length) { -+ fmt.dtfd_length = len; -+ -+ if (copy_to_user(argp, &fmt, sizeof(fmt)) != 0) { -+ mutex_unlock(&dtrace_lock); -+ return -EINVAL; -+ } -+ } else { -+ if (copy_to_user(fmt.dtfd_string, str, len) != 0) { -+ mutex_unlock(&dtrace_lock); -+ return -EINVAL; -+ } -+ } -+ -+ mutex_unlock(&dtrace_lock); -+ -+ return 0; -+ } -+ -+ default: -+ dt_dbg_ioctl("IOCTL ??? (cmd %#x), argp %p\n", -+ cmd, argp); -+ break; -+ } -+ -+ return -ENOTTY; -+} -+ -+static int dtrace_close(struct inode *inode, struct file *file) -+{ -+ struct dtrace_state *state = file->private_data; -+ -+ mutex_lock(&cpu_lock); -+ mutex_lock(&dtrace_lock); -+ -+ /* -+ * If there is anonymous state, destroy that first. -+ */ -+ if (state->dts_anon) { -+ ASSERT(dtrace_anon.dta_state == NULL); -+ dtrace_state_destroy(state->dts_anon); -+ } -+ -+ dtrace_state_destroy(state); -+ ASSERT(dtrace_opens > 0); -+ -+ if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) -+ dtrace_disable(); -+ -+ mutex_unlock(&dtrace_lock); -+ mutex_unlock(&cpu_lock); -+ -+ return 0; -+} -+ -+static int dtrace_helper_open(struct inode *inode, struct file *file) -+{ -+ return 0; -+} -+ -+static long dtrace_helper_ioctl(struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ int rval; -+ struct dof_helper help, *dhp = NULL; -+ void __user *argp = (void __user *)arg; -+ -+ switch (cmd) { -+ case DTRACEHIOC_ADDDOF: -+ dt_dbg_ioctl("Helper IOCTL ADDDOF (cmd %#x), argp %p\n", -+ cmd, argp); -+ -+ if (copy_from_user(&help, argp, sizeof(help)) != 0) { -+ dtrace_dof_error(NULL, "failed to copy DOF helper"); -+ return -EFAULT; -+ } -+ -+ dhp = &help; -+ argp = (void __user *)help.dofhp_dof; -+ -+ /* fallthrough */ -+ -+ case DTRACEHIOC_ADD: { -+ struct dof_hdr *dof = dtrace_dof_copyin(argp, &rval); -+ -+ if (dof == NULL) -+ return rval; -+ -+ if (cmd == DTRACEHIOC_ADD) -+ dt_dbg_ioctl("Helper IOCTL ADD (cmd %#x), argp %p\n", -+ cmd, argp); -+ -+ mutex_lock(&dtrace_lock); -+ -+ /* -+ * The dtrace_helper_slurp() routine takes responsibility for -+ * the dof -- it may free it now, or it may save it and free it -+ * later. -+ */ -+ rval = dtrace_helper_slurp(dof, dhp); -+ if (rval == -1) -+ rval = -EINVAL; -+ -+ mutex_unlock(&dtrace_lock); -+ -+ dt_dbg_ioctl("Helper IOCTL %s returning %d\n", -+ cmd == DTRACEHIOC_ADD ? "ADD" -+ : "ADDDOF", -+ rval); -+ -+ return rval; -+ } -+ -+ case DTRACEHIOC_REMOVE: -+ dt_dbg_ioctl("Helper IOCTL REMOVE (cmd %#x), argp %p\n", -+ cmd, argp); -+ -+ mutex_lock(&dtrace_lock); -+ -+ rval = dtrace_helper_destroygen((uintptr_t)argp); -+ -+ mutex_unlock(&dtrace_lock); -+ -+ dt_dbg_ioctl("Helper IOCTL REMOVE returning %d\n", rval); -+ -+ return rval; -+ default: -+ dt_dbg_ioctl("Helper IOCTL ??? (cmd %#x), argp %p\n", -+ cmd, argp); -+ break; -+ } -+ -+ return -ENOTTY; -+} -+ -+static int dtrace_helper_close(struct inode *inode, struct file *file) -+{ -+ return 0; -+} -+ -+static const struct file_operations dtrace_fops = { -+ .owner = THIS_MODULE, -+ .unlocked_ioctl = dtrace_ioctl, -+ .open = dtrace_open, -+ .release = dtrace_close, -+}; -+ -+static const struct file_operations helper_fops = { -+ .owner = THIS_MODULE, -+ .unlocked_ioctl = dtrace_helper_ioctl, -+ .compat_ioctl = dtrace_helper_ioctl, -+ .open = dtrace_helper_open, -+ .release = dtrace_helper_close, -+}; -+ -+static struct miscdevice dtrace_dev = { -+ .minor = DT_DEV_DTRACE_MINOR, -+ .name = "dtrace", -+ .nodename = "dtrace/dtrace", -+ .fops = &dtrace_fops, -+}; -+ -+static struct miscdevice helper_dev = { -+ .minor = DT_DEV_HELPER_MINOR, -+ .name = "helper", -+ .nodename = "dtrace/helper", -+ .fops = &helper_fops, -+}; -+ -+static void dtrace_module_loaded(struct module *mp) -+{ -+ struct dtrace_provider *prv; -+ -+ mutex_lock(&module_mutex); -+ mutex_lock(&dtrace_provider_lock); -+ -+ /* -+ * Give all providers a chance to register probes for this module. -+ */ -+ for (prv = dtrace_provider; prv != NULL; prv = prv->dtpv_next) -+ prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, mp); -+ -+ mutex_unlock(&dtrace_provider_lock); -+ mutex_unlock(&module_mutex); -+ -+ /* -+ * If we have any retained enablings, we need to match against them. -+ */ -+ mutex_lock(&dtrace_lock); -+ -+ if (dtrace_retained == NULL) { -+ mutex_unlock(&dtrace_lock); -+ return; -+ } -+ -+ mutex_unlock(&dtrace_lock); -+ dtrace_enabling_matchall(); -+} -+ -+static void dtrace_module_unloaded(struct module *mp) -+{ -+ struct dtrace_probe template, *probe, *first, *next; -+ struct dtrace_provider *prv; -+ -+ template.dtpr_mod = mp->name; -+ -+ mutex_lock(&module_mutex); -+ mutex_lock(&dtrace_provider_lock); -+ mutex_lock(&dtrace_lock); -+ -+ if (dtrace_bymod == NULL) { -+ /* -+ * The DTrace module is loaded (obviously) but not attached; -+ * we don't have any work to do. -+ */ -+ mutex_unlock(&dtrace_lock); -+ mutex_unlock(&dtrace_provider_lock); -+ mutex_unlock(&module_mutex); -+ return; -+ } -+ -+ for (probe = first = dtrace_hash_lookup(dtrace_bymod, &template); -+ probe != NULL; probe = probe->dtpr_nextmod) { -+ if (probe->dtpr_ecb != NULL) { -+ mutex_unlock(&dtrace_lock); -+ mutex_unlock(&dtrace_provider_lock); -+ mutex_unlock(&module_mutex); -+ -+ /* -+ * This shouldn't _actually_ be possible -- we're -+ * unloading a module that has an enabled probe in it. -+ * (It's normally up to the provider to make sure that -+ * this can't happen.) However, because dtps_enable() -+ * doesn't have a failure mode, there can be an -+ * enable/unload race. Upshot: we don't want to -+ * assert, but we're not going to disable the -+ * probe, either. -+ */ -+ if (dtrace_err_verbose) { -+ pr_warn("unloaded module '%s' " -+ "had enabled probes", mp->name); -+ } -+ -+ return; -+ } -+ } -+ -+ probe = first; -+ -+ for (first = NULL; probe != NULL; probe = next) { -+ dtrace_probe_remove_id(probe->dtpr_id); -+ -+ next = probe->dtpr_nextmod; -+ dtrace_hash_remove(dtrace_bymod, probe); -+ dtrace_hash_remove(dtrace_byfunc, probe); -+ dtrace_hash_remove(dtrace_byname, probe); -+ -+ if (first == NULL) { -+ first = probe; -+ probe->dtpr_nextmod = NULL; -+ } else { -+ probe->dtpr_nextmod = first; -+ first = probe; -+ } -+ } -+ -+ /* -+ * We've removed all of the module's probes from the hash chains and -+ * from the probe array. Now issue a dtrace_sync() to be sure that -+ * everyone has cleared out from any probe array processing. -+ */ -+ dtrace_sync(); -+ -+ for (probe = first; probe != NULL; probe = first) { -+ first = probe->dtpr_nextmod; -+ prv = probe->dtpr_provider; -+ prv->dtpv_pops.dtps_destroy(prv->dtpv_arg, probe->dtpr_id, -+ probe->dtpr_arg); -+ kfree(probe->dtpr_mod); -+ kfree(probe->dtpr_func); -+ kfree(probe->dtpr_name); -+ kfree(probe); -+ } -+ -+ /* -+ * Notify providers to cleanup per-module data for this module. -+ */ -+ for (prv = dtrace_provider; prv != NULL; prv = prv->dtpv_next) -+ if (prv->dtpv_pops.dtps_destroy_module != NULL) -+ prv->dtpv_pops.dtps_destroy_module(prv->dtpv_arg, mp); -+ -+ mutex_unlock(&dtrace_lock); -+ mutex_unlock(&dtrace_provider_lock); -+ mutex_unlock(&module_mutex); -+} -+ -+/* -+ * Register a toxic range. -+ */ -+static void dtrace_toxrange_add(uintptr_t base, uintptr_t limit) -+{ -+ if (dtrace_toxranges >= dtrace_toxranges_max) { -+ int osize, nsize; -+ struct dtrace_toxrange *range; -+ -+ osize = dtrace_toxranges_max * sizeof(struct dtrace_toxrange); -+ -+ if (osize == 0) { -+ ASSERT(dtrace_toxrange == NULL); -+ ASSERT(dtrace_toxranges_max == 0); -+ -+ dtrace_toxranges_max = 1; -+ } else -+ dtrace_toxranges_max <<= 1; -+ -+ nsize = dtrace_toxranges_max * sizeof(struct dtrace_toxrange); -+ range = vzalloc(nsize); -+ if (range == NULL) { -+ pr_warn("Failed to add toxic range: out of memory\n"); -+ return; -+ } -+ -+ if (dtrace_toxrange != NULL) { -+ ASSERT(osize != 0); -+ -+ memcpy(range, dtrace_toxrange, osize); -+ vfree(dtrace_toxrange); -+ } -+ -+ dtrace_toxrange = range; -+ } -+ -+ ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_base == (uintptr_t)NULL); -+ ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_limit == (uintptr_t)NULL); -+ -+ dtrace_toxrange[dtrace_toxranges].dtt_base = base; -+ dtrace_toxrange[dtrace_toxranges].dtt_limit = limit; -+ dtrace_toxranges++; -+} -+ -+/* -+ * Check if an address falls within a toxic region. -+ */ -+int dtrace_istoxic(uintptr_t kaddr, size_t size) -+{ -+ uintptr_t taddr, tsize; -+ int i; -+ -+ for (i = 0; i < dtrace_toxranges; i++) { -+ taddr = dtrace_toxrange[i].dtt_base; -+ tsize = dtrace_toxrange[i].dtt_limit - taddr; -+ -+ if (kaddr - taddr < tsize) { -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); -+ this_cpu_core->cpuc_dtrace_illval = kaddr; -+ return 1; -+ } -+ -+ if (taddr - kaddr < size) { -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); -+ this_cpu_core->cpuc_dtrace_illval = kaddr; -+ return 1; -+ } -+ } -+ -+ return 0; -+} -+ -+static int dtrace_mod_notifier(struct notifier_block *nb, unsigned long val, -+ void *args) -+{ -+ struct module *mp = args; -+ -+ if (!mp) -+ return NOTIFY_DONE; -+ -+ switch (val) { -+ case MODULE_STATE_LIVE: -+ dtrace_module_loaded(mp); -+ break; -+ -+ case MODULE_STATE_GOING: -+ dtrace_module_unloaded(mp); -+ break; -+ } -+ -+ return NOTIFY_DONE; -+} -+ -+static struct notifier_block dtrace_modmgmt = { -+ .notifier_call = dtrace_mod_notifier, -+}; -+ -+/* -+ * Initialize the DTrace core. -+ * -+ * Equivalent to: dtrace_attach() -+ */ -+int dtrace_dev_init(void) -+{ -+ dtrace_provider_id_t id; -+ int rc = 0; -+ struct cred *cred; -+ -+ /* -+ * Register the device for the DTrace core. -+ */ -+ rc = misc_register(&dtrace_dev); -+ if (rc) { -+ pr_err("%s: Can't register misc device %d\n", -+ dtrace_dev.name, dtrace_dev.minor); -+ -+ return rc; -+ } -+ -+ /* -+ * Register the device for the DTrace helper. -+ */ -+ rc = misc_register(&helper_dev); -+ if (rc) { -+ pr_err("%s: Can't register misc device %d\n", -+ helper_dev.name, helper_dev.minor); -+ -+ misc_deregister(&dtrace_dev); -+ return rc; -+ } -+ -+ mutex_lock(&cpu_lock); -+ mutex_lock(&module_mutex); -+ mutex_lock(&dtrace_provider_lock); -+ mutex_lock(&dtrace_lock); -+ -+ rc = dtrace_probe_init(); -+ if (rc) { -+ pr_err("Failed to initialize DTrace core\n"); -+ -+ goto errout; -+ } -+ -+#if IS_ENABLED(CONFIG_DT_FASTTRAP) -+ dtrace_helpers_cleanup = dtrace_helpers_destroy; -+ dtrace_helpers_fork = dtrace_helpers_duplicate; -+#endif -+#ifdef FIXME -+ dtrace_cpu_init = dtrace_cpu_setup_initial; -+ dtrace_cpustart_init = dtrace_suspend; -+ dtrace_cpustart_fini = dtrace_resume; -+ dtrace_debugger_init = dtrace_suspend; -+ dtrace_debugger_fini = dtrace_resume; -+ -+ register_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL); -+#endif -+ -+#ifdef FIXME -+ dtrace_taskq = taskq_create("dtrace_taskq", 1, maxclsyspri, 1, INT_MAX, -+ 0); -+#endif -+ -+ dtrace_state_cachep = kmem_cache_create("dtrace_state_cache", -+ sizeof(struct dtrace_dstate_percpu) * NR_CPUS, -+ 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); -+ -+ /* From now on the failures are results of failed allocations. */ -+ rc = -ENOMEM; -+ -+ /* -+ * Create the probe hashtables. -+ */ -+ dtrace_bymod = dtrace_hash_create( -+ offsetof(struct dtrace_probe, dtpr_mod), -+ offsetof(struct dtrace_probe, dtpr_nextmod), -+ offsetof(struct dtrace_probe, dtpr_prevmod)); -+ if (dtrace_bymod == NULL) -+ goto errout; -+ -+ dtrace_byfunc = dtrace_hash_create( -+ offsetof(struct dtrace_probe, dtpr_func), -+ offsetof(struct dtrace_probe, dtpr_nextfunc), -+ offsetof(struct dtrace_probe, dtpr_prevfunc)); -+ if (dtrace_byfunc == NULL) -+ goto errout; -+ -+ dtrace_byname = dtrace_hash_create( -+ offsetof(struct dtrace_probe, dtpr_name), -+ offsetof(struct dtrace_probe, dtpr_nextname), -+ offsetof(struct dtrace_probe, dtpr_prevname)); -+ if (dtrace_byname == NULL) -+ goto errout; -+ -+ /* -+ * Initialize cred. -+ */ -+ cred = prepare_kernel_cred(NULL); -+ if (cred == NULL) -+ goto errout; -+ -+ init_user_namespace = cred->user_ns; -+ put_cred(cred); -+ -+ /* -+ * Ensure that the X configuration parameter has a legal value. -+ */ -+ if (dtrace_retain_max < 1) { -+ pr_warn("Illegal value (%lu) for dtrace_retain_max; " -+ "setting to 1", (unsigned long)dtrace_retain_max); -+ -+ dtrace_retain_max = 1; -+ } -+ -+ /* -+ * Discover our toxic ranges. -+ */ -+ dtrace_toxic_ranges(dtrace_toxrange_add); -+ -+ /* -+ * Register ourselves as a provider. -+ */ -+ dtrace_register("dtrace", &dtrace_provider_attr, DTRACE_PRIV_NONE, 0, -+ &dtrace_provider_ops, NULL, &id); -+ -+ ASSERT(dtrace_provider != NULL); -+ ASSERT((dtrace_provider_id_t)dtrace_provider == id); -+ -+ /* -+ * Create BEGIN, END, and ERROR probes. -+ */ -+ dtrace_probeid_begin = dtrace_probe_create( -+ (dtrace_provider_id_t)dtrace_provider, NULL, -+ NULL, "BEGIN", 0, NULL); -+ if (dtrace_probeid_begin == DTRACE_IDNONE) -+ goto errout; -+ -+ dtrace_probeid_end = dtrace_probe_create( -+ (dtrace_provider_id_t)dtrace_provider, NULL, -+ NULL, "END", 0, NULL); -+ if (dtrace_probeid_end == DTRACE_IDNONE) -+ goto errout; -+ -+ dtrace_probeid_error = dtrace_probe_create( -+ (dtrace_provider_id_t)dtrace_provider, NULL, -+ NULL, "ERROR", 1, NULL); -+ if (dtrace_probeid_error == DTRACE_IDNONE) -+ goto errout; -+ -+ dtrace_anon_property(); -+ -+ /* -+ * If DTrace helper tracing is enabled, we need to allocate a trace -+ * buffer. -+ */ -+ if (dtrace_helptrace_enabled) { -+ ASSERT(dtrace_helptrace_buffer == NULL); -+ -+ dtrace_helptrace_buffer = vzalloc(dtrace_helptrace_bufsize); -+ -+ if (dtrace_helptrace_buffer == NULL) { -+ pr_warn("Cannot allocate helptrace buffer; " -+ "disabling dtrace_helptrace\n"); -+ dtrace_helptrace_enabled = 0; -+ } -+ } -+ -+#ifdef FIXME -+ /* -+ * There is usually code here to handle the case where there already -+ * are providers when we get to this code. On Linux, that does not -+ * seem to be possible since the DTrace core module (this code) is -+ * loaded as a dependency for each provider, and thus this -+ * initialization code is executed prior to the initialization code of -+ * the first provider causing the core to be loaded. -+ */ -+#endif -+ -+ if (register_module_notifier(&dtrace_modmgmt)) -+ goto errout; -+ -+ mutex_unlock(&dtrace_lock); -+ mutex_unlock(&dtrace_provider_lock); -+ mutex_unlock(&module_mutex); -+ mutex_unlock(&cpu_lock); -+ -+ return 0; -+ -+errout: -+ if (dtrace_provider != NULL) -+ (void) dtrace_unregister((dtrace_provider_id_t)dtrace_provider); -+ -+ dtrace_hash_destroy(dtrace_bymod); -+ dtrace_hash_destroy(dtrace_byfunc); -+ dtrace_hash_destroy(dtrace_byname); -+ -+ misc_deregister(&helper_dev); -+ misc_deregister(&dtrace_dev); -+ -+ mutex_unlock(&dtrace_lock); -+ mutex_unlock(&dtrace_provider_lock); -+ mutex_unlock(&module_mutex); -+ mutex_unlock(&cpu_lock); -+ -+ return rc; -+} -+ -+void dtrace_dev_exit(void) -+{ -+ mutex_lock(&cpu_lock); -+ mutex_lock(&module_mutex); -+ mutex_lock(&dtrace_provider_lock); -+ mutex_lock(&dtrace_lock); -+ -+ dtrace_unregister((dtrace_provider_id_t)dtrace_provider); -+ dtrace_provider = NULL; -+ -+ dtrace_probe_exit(); -+ -+ unregister_module_notifier(&dtrace_modmgmt); -+ -+#if IS_ENABLED(CONFIG_DT_FASTTRAP) -+ dtrace_helpers_cleanup = NULL; -+ dtrace_helpers_fork = NULL; -+#endif -+#ifdef FIXME -+ dtrace_cpu_init = NULL; -+ dtrace_cpustart_init = NULL; -+ dtrace_cpustart_fini = NULL; -+ dtrace_debugger_init = NULL; -+ dtrace_debugger_fini = NULL; -+ -+ unregister_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL); -+#endif -+ -+ mutex_unlock(&cpu_lock); -+ -+ dtrace_hash_destroy(dtrace_bymod); -+ dtrace_hash_destroy(dtrace_byfunc); -+ dtrace_hash_destroy(dtrace_byname); -+ dtrace_bymod = NULL; -+ dtrace_byfunc = NULL; -+ dtrace_byname = NULL; -+ -+ /* -+ * If DTrace helper tracing is enabled, we need to free the trace -+ * buffer. -+ */ -+ if (dtrace_helptrace_enabled || dtrace_helptrace_buffer) -+ vfree(dtrace_helptrace_buffer); -+ -+ kmem_cache_destroy(dtrace_state_cachep); -+ -+ mutex_unlock(&dtrace_lock); -+ mutex_unlock(&dtrace_provider_lock); -+ mutex_unlock(&module_mutex); -+ -+ misc_deregister(&helper_dev); -+ misc_deregister(&dtrace_dev); -+} -diff --git a/dtrace/dtrace_dev.h b/dtrace/dtrace_dev.h -new file mode 100644 -index 000000000000..11ae2deb17a1 ---- /dev/null -+++ b/dtrace/dtrace_dev.h -@@ -0,0 +1,35 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Dynamic Tracing for Linux -+ * -+ * Copyright (c) 2010, 2017, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#ifndef _DTRACE_DEV_H_ -+#define _DTRACE_DEV_H_ -+ -+#define DT_DEV_DTRACE_MINOR (16) -+#define DT_DEV_HELPER_MINOR (DT_DEV_DTRACE_MINOR + 1) -+#define DT_DEV_PROFILE_MINOR (DT_DEV_HELPER_MINOR + 1) -+#define DT_DEV_SYSTRACE_MINOR (DT_DEV_PROFILE_MINOR + 1) -+#define DT_DEV_FBT_MINOR (DT_DEV_SYSTRACE_MINOR + 1) -+#define DT_DEV_SDT_MINOR (DT_DEV_FBT_MINOR + 1) -+#define DT_DEV_FASTTRAP_MINOR (DT_DEV_SDT_MINOR + 1) -+#define DT_DEV_LOCKSTAT_MINOR (DT_DEV_FASTTRAP_MINOR + 1) -+#define DT_DEV_DT_TEST_MINOR (DT_DEV_LOCKSTAT_MINOR + 1) -+#define DT_DEV_DT_PERF_MINOR (DT_DEV_DT_TEST_MINOR + 1) -+ -+extern int dtrace_dev_init(void); -+extern void dtrace_dev_exit(void); -+ -+#endif /* _DTRACE_DEV_H_ */ -diff --git a/dtrace/dtrace_dif.c b/dtrace/dtrace_dif.c -new file mode 100644 -index 000000000000..ae7f01b4ed9b ---- /dev/null -+++ b/dtrace/dtrace_dif.c -@@ -0,0 +1,4905 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_dif.c -+ * DESCRIPTION: DTrace - DIF object implementation -+ * -+ * Copyright (c) 2010, 2018, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/dtrace_cpu.h> -+#include <linux/dtrace_task_impl.h> -+#include <linux/fdtable.h> -+#include <linux/hardirq.h> -+#include <linux/if_arp.h> -+#include <linux/if_ether.h> -+#include <linux/if_infiniband.h> -+#include <linux/in6.h> -+#include <linux/inet.h> -+#include <linux/kdev_t.h> -+#include <linux/slab.h> -+#include <linux/socket.h> -+#include <linux/vmalloc.h> -+#include <net/ipv6.h> -+#include <asm/byteorder.h> -+ -+#include <linux/mount.h> -+ -+#include "dtrace.h" -+ -+size_t dtrace_global_maxsize = 16 * 1024; -+ -+/* -+ * This externally visible variable (accessible through the backtick (`) -+ * syntax is provided as a source of well-known, zero-filled memory. Some -+ * translators use this in their implementation. -+ */ -+const char dtrace_zero[256] = { 0, }; -+ -+uint64_t dtrace_vtime_references; -+ -+static const char hexdigits[] = "0123456789abcdef"; -+ -+static int dtrace_difo_err(uint_t pc, const char *format, ...) -+{ -+ char buf[256]; -+ -+ if (dtrace_err_verbose) { -+ va_list alist; -+ size_t len = strlen(format); -+ -+ pr_err("dtrace DIF object error: [%u]: ", pc); -+ -+ if (len >= 256 - sizeof(KERN_ERR)) { -+ pr_err("<invalid format string>"); -+ return 1; -+ } -+ -+ memcpy(buf, KERN_ERR, sizeof(KERN_ERR)); -+ memcpy(buf + sizeof(KERN_ERR), format, len); -+ -+ va_start(alist, format); -+ vprintk(buf, alist); -+ va_end(alist); -+ } -+ -+ return 1; -+} -+ -+/* -+ * Validate a DTrace DIF object by checking the IR instructions. The following -+ * rules are currently enforced by dtrace_difo_validate(): -+ * -+ * 1. Each instruction must have a valid opcode -+ * 2. Each register, string, variable, or subroutine reference must be valid -+ * 3. No instruction can modify register %r0 (must be zero) -+ * 4. All instruction reserved bits must be set to zero -+ * 5. The last instruction must be a "ret" instruction -+ * 6. All branch targets must reference a valid instruction _after_ the branch -+ */ -+int dtrace_difo_validate(struct dtrace_difo *dp, struct dtrace_vstate *vstate, -+ uint_t nregs, const struct cred *cr) -+{ -+ int err = 0, i; -+ int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err; -+ int kcheckload = 0; -+ uint_t pc; -+ -+ kcheckload = cr == NULL || -+ (vstate->dtvs_state->dts_cred.dcr_visible & -+ DTRACE_CRV_KERNEL) == 0; -+ -+ dp->dtdo_destructive = 0; -+ -+ for (pc = 0; pc < dp->dtdo_len && err == 0; pc++) { -+ dif_instr_t instr = dp->dtdo_buf[pc]; -+ uint_t r1 = DIF_INSTR_R1(instr); -+ uint_t r2 = DIF_INSTR_R2(instr); -+ uint_t rd = DIF_INSTR_RD(instr); -+ uint_t rs = DIF_INSTR_RS(instr); -+ uint_t label = DIF_INSTR_LABEL(instr); -+ uint_t v = DIF_INSTR_VAR(instr); -+ uint_t subr = DIF_INSTR_SUBR(instr); -+ uint_t diftype = DIF_INSTR_TYPE(instr); -+ uint_t op = DIF_INSTR_OP(instr); -+ -+ switch (op) { -+ case DIF_OP_OR: -+ case DIF_OP_XOR: -+ case DIF_OP_AND: -+ case DIF_OP_SLL: -+ case DIF_OP_SRL: -+ case DIF_OP_SRA: -+ case DIF_OP_SUB: -+ case DIF_OP_ADD: -+ case DIF_OP_MUL: -+ case DIF_OP_SDIV: -+ case DIF_OP_UDIV: -+ case DIF_OP_SREM: -+ case DIF_OP_UREM: -+ case DIF_OP_COPYS: -+ if (r1 >= nregs) -+ err += efunc(pc, "invalid register %u\n", r1); -+ if (r2 >= nregs) -+ err += efunc(pc, "invalid register %u\n", r2); -+ if (rd >= nregs) -+ err += efunc(pc, "invalid register %u\n", rd); -+ if (rd == 0) -+ err += efunc(pc, "cannot write to %r0\n"); -+ break; -+ case DIF_OP_NOT: -+ case DIF_OP_MOV: -+ case DIF_OP_ALLOCS: -+ if (r1 >= nregs) -+ err += efunc(pc, "invalid register %u\n", r1); -+ if (r2 != 0) -+ err += efunc(pc, "non-zero reserved bits\n"); -+ if (rd >= nregs) -+ err += efunc(pc, "invalid register %u\n", rd); -+ if (rd == 0) -+ err += efunc(pc, "cannot write to %r0\n"); -+ break; -+ case DIF_OP_LDSB: -+ case DIF_OP_LDSH: -+ case DIF_OP_LDSW: -+ case DIF_OP_LDUB: -+ case DIF_OP_LDUH: -+ case DIF_OP_LDUW: -+ case DIF_OP_LDX: -+ if (r1 >= nregs) -+ err += efunc(pc, "invalid register %u\n", r1); -+ if (r2 != 0) -+ err += efunc(pc, "non-zero reserved bits\n"); -+ if (rd >= nregs) -+ err += efunc(pc, "invalid register %u\n", rd); -+ if (rd == 0) -+ err += efunc(pc, "cannot write to %r0\n"); -+ if (kcheckload) -+ dp->dtdo_buf[pc] = DIF_INSTR_LOAD( -+ op + DIF_OP_RLDSB - -+ DIF_OP_LDSB, -+ r1, rd); -+ break; -+ case DIF_OP_RLDSB: -+ case DIF_OP_RLDSH: -+ case DIF_OP_RLDSW: -+ case DIF_OP_RLDUB: -+ case DIF_OP_RLDUH: -+ case DIF_OP_RLDUW: -+ case DIF_OP_RLDX: -+ if (r1 >= nregs) -+ err += efunc(pc, "invalid register %u\n", r1); -+ if (r2 != 0) -+ err += efunc(pc, "non-zero reserved bits\n"); -+ if (rd >= nregs) -+ err += efunc(pc, "invalid register %u\n", rd); -+ if (rd == 0) -+ err += efunc(pc, "cannot write to %r0\n"); -+ break; -+ case DIF_OP_ULDSB: -+ case DIF_OP_ULDSH: -+ case DIF_OP_ULDSW: -+ case DIF_OP_ULDUB: -+ case DIF_OP_ULDUH: -+ case DIF_OP_ULDUW: -+ case DIF_OP_ULDX: -+ if (r1 >= nregs) -+ err += efunc(pc, "invalid register %u\n", r1); -+ if (r2 != 0) -+ err += efunc(pc, "non-zero reserved bits\n"); -+ if (rd >= nregs) -+ err += efunc(pc, "invalid register %u\n", rd); -+ if (rd == 0) -+ err += efunc(pc, "cannot write to %r0\n"); -+ break; -+ case DIF_OP_STB: -+ case DIF_OP_STH: -+ case DIF_OP_STW: -+ case DIF_OP_STX: -+ if (r1 >= nregs) -+ err += efunc(pc, "invalid register %u\n", r1); -+ if (r2 != 0) -+ err += efunc(pc, "non-zero reserved bits\n"); -+ if (rd >= nregs) -+ err += efunc(pc, "invalid register %u\n", rd); -+ if (rd == 0) -+ err += efunc(pc, "cannot write to 0 address\n"); -+ break; -+ case DIF_OP_CMP: -+ case DIF_OP_SCMP: -+ if (r1 >= nregs) -+ err += efunc(pc, "invalid register %u\n", r1); -+ if (r2 >= nregs) -+ err += efunc(pc, "invalid register %u\n", r2); -+ if (rd != 0) -+ err += efunc(pc, "non-zero reserved bits\n"); -+ break; -+ case DIF_OP_TST: -+ if (r1 >= nregs) -+ err += efunc(pc, "invalid register %u\n", r1); -+ if (r2 != 0 || rd != 0) -+ err += efunc(pc, "non-zero reserved bits\n"); -+ break; -+ case DIF_OP_BA: -+ case DIF_OP_BE: -+ case DIF_OP_BNE: -+ case DIF_OP_BG: -+ case DIF_OP_BGU: -+ case DIF_OP_BGE: -+ case DIF_OP_BGEU: -+ case DIF_OP_BL: -+ case DIF_OP_BLU: -+ case DIF_OP_BLE: -+ case DIF_OP_BLEU: -+ if (label >= dp->dtdo_len) -+ err += efunc(pc, "invalid branch target %u\n", -+ label); -+ if (label <= pc) -+ err += efunc(pc, "backward branch to %u\n", -+ label); -+ break; -+ case DIF_OP_RET: -+ if (r1 != 0 || r2 != 0) -+ err += efunc(pc, "non-zero reserved bits\n"); -+ if (rd >= nregs) -+ err += efunc(pc, "invalid register %u\n", rd); -+ break; -+ case DIF_OP_NOP: -+ case DIF_OP_POPTS: -+ case DIF_OP_FLUSHTS: -+ if (r1 != 0 || r2 != 0 || rd != 0) -+ err += efunc(pc, "non-zero reserved bits\n"); -+ break; -+ case DIF_OP_SETX: -+ if (DIF_INSTR_INTEGER(instr) >= dp->dtdo_intlen) -+ err += efunc(pc, "invalid integer ref %u\n", -+ DIF_INSTR_INTEGER(instr)); -+ if (rd >= nregs) -+ err += efunc(pc, "invalid register %u\n", rd); -+ if (rd == 0) -+ err += efunc(pc, "cannot write to %r0\n"); -+ break; -+ case DIF_OP_SETS: -+ if (DIF_INSTR_STRING(instr) >= dp->dtdo_strlen) -+ err += efunc(pc, "invalid string ref %u\n", -+ DIF_INSTR_STRING(instr)); -+ if (rd >= nregs) -+ err += efunc(pc, "invalid register %u\n", rd); -+ if (rd == 0) -+ err += efunc(pc, "cannot write to %r0\n"); -+ break; -+ case DIF_OP_LDGA: -+ case DIF_OP_LDTA: -+ if (r1 > DIF_VAR_ARRAY_MAX) -+ err += efunc(pc, "invalid array %u\n", r1); -+ if (r2 >= nregs) -+ err += efunc(pc, "invalid register %u\n", r2); -+ if (rd >= nregs) -+ err += efunc(pc, "invalid register %u\n", rd); -+ if (rd == 0) -+ err += efunc(pc, "cannot write to %r0\n"); -+ break; -+ case DIF_OP_LDGS: -+ case DIF_OP_LDTS: -+ case DIF_OP_LDLS: -+ case DIF_OP_LDGAA: -+ case DIF_OP_LDTAA: -+ if (v < DIF_VAR_OTHER_MIN || v > DIF_VAR_OTHER_MAX) -+ err += efunc(pc, "invalid variable %u\n", v); -+ if (rd >= nregs) -+ err += efunc(pc, "invalid register %u\n", rd); -+ if (rd == 0) -+ err += efunc(pc, "cannot write to %r0\n"); -+ break; -+ case DIF_OP_STGS: -+ case DIF_OP_STTS: -+ case DIF_OP_STLS: -+ case DIF_OP_STGAA: -+ case DIF_OP_STTAA: -+ if (v < DIF_VAR_OTHER_UBASE || v > DIF_VAR_OTHER_MAX) -+ err += efunc(pc, "invalid variable %u\n", v); -+ if (rs >= nregs) -+ err += efunc(pc, "invalid register %u\n", rd); -+ break; -+ case DIF_OP_CALL: -+ if (subr > DIF_SUBR_MAX) -+ err += efunc(pc, "invalid subr %u\n", subr); -+ if (rd >= nregs) -+ err += efunc(pc, "invalid register %u\n", rd); -+ if (rd == 0) -+ err += efunc(pc, "cannot write to %r0\n"); -+ -+ if (subr == DIF_SUBR_COPYOUT || -+ subr == DIF_SUBR_COPYOUTSTR) -+ dp->dtdo_destructive = 1; -+ break; -+ case DIF_OP_PUSHTR: -+ if (diftype != DIF_TYPE_STRING && diftype != DIF_TYPE_CTF) -+ err += efunc(pc, "invalid ref type %u\n", -+ diftype); -+ if (r2 >= nregs) -+ err += efunc(pc, "invalid register %u\n", r2); -+ if (rs >= nregs) -+ err += efunc(pc, "invalid register %u\n", rs); -+ break; -+ case DIF_OP_PUSHTV: -+ if (diftype != DIF_TYPE_CTF) -+ err += efunc(pc, "invalid val type %u\n", -+ diftype); -+ if (r2 >= nregs) -+ err += efunc(pc, "invalid register %u\n", r2); -+ if (rs >= nregs) -+ err += efunc(pc, "invalid register %u\n", rs); -+ break; -+ default: -+ err += efunc(pc, "invalid opcode %u\n", -+ DIF_INSTR_OP(instr)); -+ } -+ } -+ -+ if (dp->dtdo_len != 0 && -+ DIF_INSTR_OP(dp->dtdo_buf[dp->dtdo_len - 1]) != DIF_OP_RET) { -+ err += efunc(dp->dtdo_len - 1, -+ "expected 'ret' as last DIF instruction\n"); -+ } -+ -+ if (!(dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) { -+ /* -+ * If we're not returning by reference, the size must be either -+ * 0 or the size of one of the base types. -+ */ -+ switch (dp->dtdo_rtype.dtdt_size) { -+ case 0: -+ case sizeof(uint8_t): -+ case sizeof(uint16_t): -+ case sizeof(uint32_t): -+ case sizeof(uint64_t): -+ break; -+ -+ default: -+ err += efunc(dp->dtdo_len - 1, "bad return size\n"); -+ } -+ } -+ -+ for (i = 0; i < dp->dtdo_varlen && err == 0; i++) { -+ struct dtrace_difv *v = &dp->dtdo_vartab[i], -+ *existing = NULL; -+ struct dtrace_diftype *vt, *et; -+ uint_t id, ndx; -+ -+ if (v->dtdv_scope != DIFV_SCOPE_GLOBAL && -+ v->dtdv_scope != DIFV_SCOPE_THREAD && -+ v->dtdv_scope != DIFV_SCOPE_LOCAL) { -+ err += efunc(i, "unrecognized variable scope %d\n", -+ v->dtdv_scope); -+ break; -+ } -+ -+ if (v->dtdv_kind != DIFV_KIND_ARRAY && -+ v->dtdv_kind != DIFV_KIND_SCALAR) { -+ err += efunc(i, "unrecognized variable type %d\n", -+ v->dtdv_kind); -+ break; -+ } -+ -+ id = v->dtdv_id; -+ if (id > DIF_VARIABLE_MAX) { -+ err += efunc(i, "%d exceeds variable id limit\n", id); -+ break; -+ } -+ -+ if (id < DIF_VAR_OTHER_UBASE) -+ continue; -+ -+ /* -+ * For user-defined variables, we need to check that this -+ * definition is identical to any previous definition that we -+ * encountered. -+ */ -+ ndx = id - DIF_VAR_OTHER_UBASE; -+ -+ switch (v->dtdv_scope) { -+ case DIFV_SCOPE_GLOBAL: -+ if (ndx < vstate->dtvs_nglobals) { -+ struct dtrace_statvar *svar; -+ -+ svar = vstate->dtvs_globals[ndx]; -+ if (svar != NULL) -+ existing = &svar->dtsv_var; -+ } -+ -+ break; -+ -+ case DIFV_SCOPE_THREAD: -+ if (ndx < vstate->dtvs_ntlocals) -+ existing = &vstate->dtvs_tlocals[ndx]; -+ break; -+ -+ case DIFV_SCOPE_LOCAL: -+ if (ndx < vstate->dtvs_nlocals) { -+ struct dtrace_statvar *svar; -+ -+ svar = vstate->dtvs_locals[ndx]; -+ if (svar != NULL) -+ existing = &svar->dtsv_var; -+ } -+ -+ break; -+ } -+ -+ vt = &v->dtdv_type; -+ -+ if (vt->dtdt_flags & DIF_TF_BYREF) { -+ if (vt->dtdt_size == 0) { -+ err += efunc(i, "zero-sized variable\n"); -+ break; -+ } -+ -+ if (v->dtdv_scope == DIFV_SCOPE_GLOBAL && -+ vt->dtdt_size > dtrace_global_maxsize) { -+ err += efunc(i, "oversized by-ref global\n"); -+ break; -+ } -+ } -+ -+ if (existing == NULL || existing->dtdv_id == 0) -+ continue; -+ -+ ASSERT(existing->dtdv_id == v->dtdv_id); -+ ASSERT(existing->dtdv_scope == v->dtdv_scope); -+ -+ if (existing->dtdv_kind != v->dtdv_kind) -+ err += efunc(i, "%d changed variable kind\n", id); -+ -+ et = &existing->dtdv_type; -+ -+ if (vt->dtdt_flags != et->dtdt_flags) { -+ err += efunc(i, "%d changed variable type flags\n", id); -+ break; -+ } -+ -+ if (vt->dtdt_size != 0 && vt->dtdt_size != et->dtdt_size) { -+ err += efunc(i, "%d changed variable type size\n", id); -+ break; -+ } -+ } -+ -+ return err; -+} -+ -+/* -+ * Validate a DTrace DIF object that it is to be used as a helper. Helpers -+ * are much more constrained than normal DIFOs. Specifically, they may -+ * not: -+ * -+ * 1. Make calls to subroutines other than copyin(), copyinstr() or -+ * miscellaneous string routines -+ * 2. Access DTrace variables other than the args[] array, and the -+ * curthread, pid, ppid, tid, execname, zonename, uid and gid variables. -+ * 3. Have thread-local variables. -+ * 4. Have dynamic variables. -+ */ -+int dtrace_difo_validate_helper(struct dtrace_difo *dp) -+{ -+ int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err; -+ int err = 0; -+ uint_t pc; -+ -+ for (pc = 0; pc < dp->dtdo_len; pc++) { -+ dif_instr_t instr = dp->dtdo_buf[pc]; -+ uint_t v = DIF_INSTR_VAR(instr); -+ uint_t subr = DIF_INSTR_SUBR(instr); -+ uint_t op = DIF_INSTR_OP(instr); -+ -+ switch (op) { -+ case DIF_OP_OR: -+ case DIF_OP_XOR: -+ case DIF_OP_AND: -+ case DIF_OP_SLL: -+ case DIF_OP_SRL: -+ case DIF_OP_SRA: -+ case DIF_OP_SUB: -+ case DIF_OP_ADD: -+ case DIF_OP_MUL: -+ case DIF_OP_SDIV: -+ case DIF_OP_UDIV: -+ case DIF_OP_SREM: -+ case DIF_OP_UREM: -+ case DIF_OP_COPYS: -+ case DIF_OP_NOT: -+ case DIF_OP_MOV: -+ case DIF_OP_RLDSB: -+ case DIF_OP_RLDSH: -+ case DIF_OP_RLDSW: -+ case DIF_OP_RLDUB: -+ case DIF_OP_RLDUH: -+ case DIF_OP_RLDUW: -+ case DIF_OP_RLDX: -+ case DIF_OP_ULDSB: -+ case DIF_OP_ULDSH: -+ case DIF_OP_ULDSW: -+ case DIF_OP_ULDUB: -+ case DIF_OP_ULDUH: -+ case DIF_OP_ULDUW: -+ case DIF_OP_ULDX: -+ case DIF_OP_STB: -+ case DIF_OP_STH: -+ case DIF_OP_STW: -+ case DIF_OP_STX: -+ case DIF_OP_ALLOCS: -+ case DIF_OP_CMP: -+ case DIF_OP_SCMP: -+ case DIF_OP_TST: -+ case DIF_OP_BA: -+ case DIF_OP_BE: -+ case DIF_OP_BNE: -+ case DIF_OP_BG: -+ case DIF_OP_BGU: -+ case DIF_OP_BGE: -+ case DIF_OP_BGEU: -+ case DIF_OP_BL: -+ case DIF_OP_BLU: -+ case DIF_OP_BLE: -+ case DIF_OP_BLEU: -+ case DIF_OP_RET: -+ case DIF_OP_NOP: -+ case DIF_OP_POPTS: -+ case DIF_OP_FLUSHTS: -+ case DIF_OP_SETX: -+ case DIF_OP_SETS: -+ case DIF_OP_LDGA: -+ case DIF_OP_LDLS: -+ case DIF_OP_STGS: -+ case DIF_OP_STLS: -+ case DIF_OP_PUSHTR: -+ case DIF_OP_PUSHTV: -+ break; -+ -+ case DIF_OP_LDGS: -+ if (v >= DIF_VAR_OTHER_UBASE) -+ break; -+ -+ if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9) -+ break; -+ -+ if (v == DIF_VAR_CURTHREAD || v == DIF_VAR_PID || -+ v == DIF_VAR_PPID || v == DIF_VAR_TID || -+ v == DIF_VAR_EXECNAME || v == DIF_VAR_ZONENAME || -+ v == DIF_VAR_UID || v == DIF_VAR_GID) -+ break; -+ -+ err += efunc(pc, "illegal variable %u\n", v); -+ break; -+ -+ case DIF_OP_LDTA: -+ case DIF_OP_LDGAA: -+ case DIF_OP_LDTAA: -+ err += efunc(pc, "illegal dynamic variable load\n"); -+ break; -+ -+ case DIF_OP_STTS: -+ case DIF_OP_STGAA: -+ case DIF_OP_STTAA: -+ err += efunc(pc, "illegal dynamic variable store\n"); -+ break; -+ -+ case DIF_OP_CALL: -+ if (subr == DIF_SUBR_ALLOCA || -+ subr == DIF_SUBR_BCOPY || -+ subr == DIF_SUBR_COPYIN || -+ subr == DIF_SUBR_COPYINTO || -+ subr == DIF_SUBR_COPYINSTR || -+ subr == DIF_SUBR_INDEX || -+ subr == DIF_SUBR_INET_NTOA || -+ subr == DIF_SUBR_INET_NTOA6 || -+ subr == DIF_SUBR_INET_NTOP || -+ subr == DIF_SUBR_LINK_NTOP || -+ subr == DIF_SUBR_LLTOSTR || -+ subr == DIF_SUBR_RINDEX || -+ subr == DIF_SUBR_STRCHR || -+ subr == DIF_SUBR_STRJOIN || -+ subr == DIF_SUBR_STRRCHR || -+ subr == DIF_SUBR_STRSTR || -+ subr == DIF_SUBR_HTONS || -+ subr == DIF_SUBR_HTONL || -+ subr == DIF_SUBR_HTONLL || -+ subr == DIF_SUBR_NTOHS || -+ subr == DIF_SUBR_NTOHL || -+ subr == DIF_SUBR_NTOHLL) -+ break; -+ -+ err += efunc(pc, "invalid subr %u\n", subr); -+ break; -+ -+ default: -+ err += efunc(pc, "invalid opcode %u\n", -+ DIF_INSTR_OP(instr)); -+ } -+ } -+ -+ return err; -+} -+ -+/* -+ * Returns 1 if the expression in the DIF object can be cached on a per-thread -+ * basis; 0 if not. -+ */ -+int dtrace_difo_cacheable(struct dtrace_difo *dp) -+{ -+ int i; -+ -+ if (dp == NULL) -+ return 0; -+ -+ for (i = 0; i < dp->dtdo_varlen; i++) { -+ struct dtrace_difv *v = &dp->dtdo_vartab[i]; -+ -+ if (v->dtdv_scope != DIFV_SCOPE_GLOBAL) -+ continue; -+ -+ switch (v->dtdv_id) { -+ case DIF_VAR_CURTHREAD: -+ case DIF_VAR_PID: -+ case DIF_VAR_TID: -+ case DIF_VAR_EXECNAME: -+ case DIF_VAR_ZONENAME: -+ break; -+ -+ default: -+ return 0; -+ } -+ } -+ -+ /* -+ * This DIF object may be cacheable. Now we need to look for any -+ * array loading instructions, any memory loading instructions, or -+ * any stores to thread-local variables. -+ */ -+ for (i = 0; i < dp->dtdo_len; i++) { -+ uint_t op = DIF_INSTR_OP(dp->dtdo_buf[i]); -+ -+ if ((op >= DIF_OP_LDSB && op <= DIF_OP_LDX) || -+ (op >= DIF_OP_ULDSB && op <= DIF_OP_ULDX) || -+ (op >= DIF_OP_RLDSB && op <= DIF_OP_RLDX) || -+ op == DIF_OP_LDGA || op == DIF_OP_STTS) -+ return 0; -+ } -+ -+ return 1; -+} -+ -+/* -+ * This routine calculates the dynamic variable chunksize for a given DIF -+ * object. The calculation is not fool-proof, and can probably be tricked by -+ * malicious DIF -- but it works for all compiler-generated DIF. Because this -+ * calculation is likely imperfect, dtrace_dynvar() is able to gracefully fail -+ * if a dynamic variable size exceeds the chunksize. -+ */ -+static void dtrace_difo_chunksize(struct dtrace_difo *dp, -+ struct dtrace_vstate *vstate) -+{ -+ uint64_t sval = 0; -+ struct dtrace_key tupregs[DIF_DTR_NREGS + 2]; /* + thread + id */ -+ const dif_instr_t *text = dp->dtdo_buf; -+ uint_t pc, srd = 0; -+ uint_t ttop = 0; -+ size_t size, ksize; -+ uint_t id, i; -+ -+ for (pc = 0; pc < dp->dtdo_len; pc++) { -+ dif_instr_t instr = text[pc]; -+ uint_t op = DIF_INSTR_OP(instr); -+ uint_t rd = DIF_INSTR_RD(instr); -+ uint_t r1 = DIF_INSTR_R1(instr); -+ uint_t nkeys = 0; -+ uchar_t scope; -+ struct dtrace_key *key = tupregs; -+ -+ switch (op) { -+ case DIF_OP_SETX: -+ sval = dp->dtdo_inttab[DIF_INSTR_INTEGER(instr)]; -+ srd = rd; -+ continue; -+ -+ case DIF_OP_STTS: -+ key = &tupregs[DIF_DTR_NREGS]; -+ key[0].dttk_size = 0; -+ key[1].dttk_size = 0; -+ nkeys = 2; -+ scope = DIFV_SCOPE_THREAD; -+ break; -+ -+ case DIF_OP_STGAA: -+ case DIF_OP_STTAA: -+ nkeys = ttop; -+ -+ if (DIF_INSTR_OP(instr) == DIF_OP_STTAA) -+ key[nkeys++].dttk_size = 0; -+ -+ key[nkeys++].dttk_size = 0; -+ -+ if (op == DIF_OP_STTAA) -+ scope = DIFV_SCOPE_THREAD; -+ else -+ scope = DIFV_SCOPE_GLOBAL; -+ -+ break; -+ -+ case DIF_OP_PUSHTR: -+ if (ttop == DIF_DTR_NREGS) -+ return; -+ -+ /* -+ * If the register for the size of the "pushtr" is %r0 -+ * (or the value is 0) and the type is a string, we'll -+ * use the system-wide default string size. -+ */ -+ if ((srd == 0 || sval == 0) && r1 == DIF_TYPE_STRING) -+ tupregs[ttop++].dttk_size = -+ dtrace_strsize_default; -+ else { -+ if (srd == 0) -+ return; -+ -+ tupregs[ttop++].dttk_size = sval; -+ } -+ -+ break; -+ -+ case DIF_OP_PUSHTV: -+ if (ttop == DIF_DTR_NREGS) -+ return; -+ -+ tupregs[ttop++].dttk_size = 0; -+ break; -+ -+ case DIF_OP_FLUSHTS: -+ ttop = 0; -+ break; -+ -+ case DIF_OP_POPTS: -+ if (ttop != 0) -+ ttop--; -+ break; -+ } -+ -+ sval = 0; -+ srd = 0; -+ -+ if (nkeys == 0) -+ continue; -+ -+ /* -+ * We have a dynamic variable allocation; calculate its size. -+ */ -+ for (ksize = 0, i = 0; i < nkeys; i++) -+ ksize += P2ROUNDUP(key[i].dttk_size, sizeof(uint64_t)); -+ -+ size = sizeof(struct dtrace_dynvar); -+ size += sizeof(struct dtrace_key) * (nkeys - 1); -+ size += ksize; -+ -+ /* -+ * Now we need to determine the size of the stored data. -+ */ -+ id = DIF_INSTR_VAR(instr); -+ -+ for (i = 0; i < dp->dtdo_varlen; i++) { -+ struct dtrace_difv *v = &dp->dtdo_vartab[i]; -+ -+ if (v->dtdv_id == id && v->dtdv_scope == scope) { -+ size += v->dtdv_type.dtdt_size; -+ break; -+ } -+ } -+ -+ if (i == dp->dtdo_varlen) -+ return; -+ -+ /* -+ * We have the size. If this is larger than the chunk size -+ * for our dynamic variable state, reset the chunk size. -+ */ -+ size = P2ROUNDUP(size, sizeof(uint64_t)); -+ -+ if (size > vstate->dtvs_dynvars.dtds_chunksize) -+ vstate->dtvs_dynvars.dtds_chunksize = size; -+ } -+} -+ -+void dtrace_difo_hold(struct dtrace_difo *dp) -+{ -+ int i; -+ -+ dp->dtdo_refcnt++; -+ ASSERT(dp->dtdo_refcnt != 0); -+ -+ for (i = 0; i < dp->dtdo_varlen; i++) { -+ struct dtrace_difv *v = &dp->dtdo_vartab[i]; -+ -+ if (v->dtdv_id != DIF_VAR_VTIMESTAMP) -+ continue; -+ -+ if (dtrace_vtime_references++ == 0) -+ dtrace_vtime_enable(); -+ } -+} -+ -+void dtrace_difo_init(struct dtrace_difo *dp, struct dtrace_vstate *vstate) -+{ -+ int i, oldsvars, osz, nsz, otlocals, ntlocals; -+ uint_t id; -+ -+ ASSERT(MUTEX_HELD(&dtrace_lock)); -+ ASSERT(dp->dtdo_buf != NULL && dp->dtdo_len != 0); -+ -+ for (i = 0; i < dp->dtdo_varlen; i++) { -+ struct dtrace_difv *v = &dp->dtdo_vartab[i]; -+ struct dtrace_statvar *svar, ***svarp; -+ size_t dsize = 0; -+ uint8_t scope = v->dtdv_scope; -+ int *np; -+ -+ id = v->dtdv_id; -+ if (id < DIF_VAR_OTHER_UBASE) -+ continue; -+ -+ id -= DIF_VAR_OTHER_UBASE; -+ -+ switch (scope) { -+ case DIFV_SCOPE_THREAD: -+ while (id >= (otlocals = vstate->dtvs_ntlocals)) { -+ struct dtrace_difv *tlocals; -+ -+ ntlocals = otlocals << 1; -+ if (ntlocals == 0) -+ ntlocals = 1; -+ -+ osz = otlocals * sizeof(struct dtrace_difv); -+ nsz = ntlocals * sizeof(struct dtrace_difv); -+ -+ tlocals = vzalloc(nsz); -+ -+ if (osz != 0) { -+ memcpy(tlocals, vstate->dtvs_tlocals, -+ osz); -+ vfree(vstate->dtvs_tlocals); -+ } -+ -+ vstate->dtvs_tlocals = tlocals; -+ vstate->dtvs_ntlocals = ntlocals; -+ } -+ -+ vstate->dtvs_tlocals[id] = *v; -+ continue; -+ -+ case DIFV_SCOPE_LOCAL: -+ np = &vstate->dtvs_nlocals; -+ svarp = &vstate->dtvs_locals; -+ -+ if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) -+ dsize = NR_CPUS * -+ (v->dtdv_type.dtdt_size + -+ sizeof(uint64_t)); -+ else -+ dsize = NR_CPUS * sizeof(uint64_t); -+ -+ break; -+ -+ case DIFV_SCOPE_GLOBAL: -+ np = &vstate->dtvs_nglobals; -+ svarp = &vstate->dtvs_globals; -+ -+ if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) -+ dsize = v->dtdv_type.dtdt_size + -+ sizeof(uint64_t); -+ -+ break; -+ -+ default: -+ ASSERT(0); -+ continue; /* not reached */ -+ } -+ -+ while (id >= (oldsvars = *np)) { -+ struct dtrace_statvar **statics; -+ int newsvars, oldsize, newsize; -+ -+ newsvars = oldsvars << 1; -+ if (newsvars == 0) -+ newsvars = 1; -+ -+ oldsize = oldsvars * sizeof(struct dtrace_statvar *); -+ newsize = newsvars * sizeof(struct dtrace_statvar *); -+ -+ statics = vzalloc(newsize); -+ -+ if (oldsize != 0) { -+ memcpy(statics, *svarp, oldsize); -+ vfree(*svarp); -+ } -+ -+ *svarp = statics; -+ *np = newsvars; -+ } -+ -+ svar = (*svarp)[id]; -+ if (svar == NULL) { -+ svar = kzalloc(sizeof(struct dtrace_statvar), -+ GFP_KERNEL); -+ svar->dtsv_var = *v; -+ -+ svar->dtsv_size = dsize; -+ if (svar->dtsv_size != 0) { -+ svar->dtsv_data = -+ (uint64_t)(uintptr_t)vzalloc(dsize); -+ } -+ -+ (*svarp)[id] = svar; -+ } -+ -+ svar->dtsv_refcnt++; -+ } -+ -+ dtrace_difo_chunksize(dp, vstate); -+ dtrace_difo_hold(dp); -+} -+ -+struct dtrace_difo *dtrace_difo_duplicate(struct dtrace_difo *dp, -+ struct dtrace_vstate *vstate) -+{ -+ struct dtrace_difo *new; -+ size_t sz; -+ -+ ASSERT(dp->dtdo_buf != NULL); -+ ASSERT(dp->dtdo_refcnt != 0); -+ -+ new = kzalloc(sizeof(struct dtrace_difo), GFP_KERNEL); -+ -+ ASSERT(dp->dtdo_buf != NULL); -+ sz = dp->dtdo_len * sizeof(dif_instr_t); -+ new->dtdo_buf = vmalloc(sz); -+ memcpy(new->dtdo_buf, dp->dtdo_buf, sz); -+ new->dtdo_len = dp->dtdo_len; -+ -+ if (dp->dtdo_strtab != NULL) { -+ ASSERT(dp->dtdo_strlen != 0); -+ new->dtdo_strtab = vmalloc(dp->dtdo_strlen); -+ memcpy(new->dtdo_strtab, dp->dtdo_strtab, dp->dtdo_strlen); -+ new->dtdo_strlen = dp->dtdo_strlen; -+ } -+ -+ if (dp->dtdo_inttab != NULL) { -+ ASSERT(dp->dtdo_intlen != 0); -+ sz = dp->dtdo_intlen * sizeof(uint64_t); -+ new->dtdo_inttab = vmalloc(sz); -+ memcpy(new->dtdo_inttab, dp->dtdo_inttab, sz); -+ new->dtdo_intlen = dp->dtdo_intlen; -+ } -+ -+ if (dp->dtdo_vartab != NULL) { -+ ASSERT(dp->dtdo_varlen != 0); -+ sz = dp->dtdo_varlen * sizeof(struct dtrace_difv); -+ new->dtdo_vartab = vmalloc(sz); -+ memcpy(new->dtdo_vartab, dp->dtdo_vartab, sz); -+ new->dtdo_varlen = dp->dtdo_varlen; -+ } -+ -+ dtrace_difo_init(new, vstate); -+ -+ return new; -+} -+ -+void dtrace_difo_destroy(struct dtrace_difo *dp, struct dtrace_vstate *vstate) -+{ -+ int i; -+ -+ ASSERT(dp->dtdo_refcnt == 0); -+ -+ for (i = 0; i < dp->dtdo_varlen; i++) { -+ struct dtrace_difv *v = &dp->dtdo_vartab[i]; -+ struct dtrace_statvar *svar, **svarp; -+ uint_t id; -+ uint8_t scope = v->dtdv_scope; -+ int *np; -+ -+ switch (scope) { -+ case DIFV_SCOPE_THREAD: -+ continue; -+ -+ case DIFV_SCOPE_LOCAL: -+ np = &vstate->dtvs_nlocals; -+ svarp = vstate->dtvs_locals; -+ break; -+ -+ case DIFV_SCOPE_GLOBAL: -+ np = &vstate->dtvs_nglobals; -+ svarp = vstate->dtvs_globals; -+ break; -+ -+ default: -+ BUG(); -+ } -+ -+ id = v->dtdv_id; -+ if (id < DIF_VAR_OTHER_UBASE) -+ continue; -+ -+ id -= DIF_VAR_OTHER_UBASE; -+ ASSERT(id < *np); -+ -+ svar = svarp[id]; -+ ASSERT(svar != NULL); -+ ASSERT(svar->dtsv_refcnt > 0); -+ -+ if (--svar->dtsv_refcnt > 0) -+ continue; -+ -+ if (svar->dtsv_size != 0) { -+ ASSERT((void *)(uintptr_t)svar->dtsv_data != NULL); -+ vfree((void *)(uintptr_t)svar->dtsv_data); -+ } -+ -+ kfree(svar); -+ svarp[id] = NULL; -+ } -+ -+ vfree(dp->dtdo_buf); -+ vfree(dp->dtdo_inttab); -+ vfree(dp->dtdo_strtab); -+ vfree(dp->dtdo_vartab); -+ kfree(dp); -+} -+ -+void dtrace_difo_release(struct dtrace_difo *dp, struct dtrace_vstate *vstate) -+{ -+ int i; -+ -+ ASSERT(MUTEX_HELD(&dtrace_lock)); -+ ASSERT(dp->dtdo_refcnt != 0); -+ -+ for (i = 0; i < dp->dtdo_varlen; i++) { -+ struct dtrace_difv *v = &dp->dtdo_vartab[i]; -+ -+ if (v->dtdv_id != DIF_VAR_VTIMESTAMP) -+ continue; -+ -+ ASSERT(dtrace_vtime_references > 0); -+ -+ if (--dtrace_vtime_references == 0) -+ dtrace_vtime_disable(); -+ } -+ -+ if (--dp->dtdo_refcnt == 0) -+ dtrace_difo_destroy(dp, vstate); -+} -+ -+/* -+ * The key for a thread-local variable consists of the lower 60 bits of the -+ * task pid, prefixed by a 4 bits indicating whether a hard_irq is active. -+ * This accounts for a case where some older drivers re-enable interrupts -+ * and can nest in hard irq context. -+ * -+ * All per-cpu idle threads share same pid 0. In this special case we replace -+ * the pid with cpu id (an idle thread is bound to a single cpu). If pid is -+ * not 0 then a NR_CPUS is added. This assures that the thread key for idle -+ * thread never conflicts with regular pids in range 0..NR_CPUS. -+ * -+ * We add DIF_VARIABLE_MAX to the pid to assure that the thread key is never -+ * equal to a variable identifier. This is necessary (but not sufficient) to -+ * assure that global associative arrays never collide with thread-local -+ * variables. To guarantee that they cannot collide, we must also define the -+ * order for keying dynamic variables. That order is: -+ * -+ * [ key0 ] ... [ keyn ] [ variable-key ] [ tls-key ] -+ * -+ * Because the variable-key and the tls-key are in orthogonal spaces, there is -+ * no way for a global variable key signature to match a thread-local key -+ * signature. -+ */ -+#define DTRACE_TLS_THRKEY(where) \ -+ { \ -+ uint_t intr = hardirq_count() >> HARDIRQ_SHIFT; \ -+ uint_t cpu = (current->flags & PF_IDLE) ? \ -+ smp_processor_id() : NR_CPUS; \ -+ \ -+ (where) = ((current->pid + cpu + DIF_VARIABLE_MAX) & \ -+ (((uint64_t)1 << 60) - 1)) | \ -+ ((uint64_t)intr << 60); \ -+ } -+ -+#ifndef FIXME -+# define DTRACE_ALIGNCHECK(addr, size, flags) -+#endif -+ -+/* -+ * Test whether a range of memory starting at testaddr of size testsz falls -+ * within the range of memory described by addr, sz. We take care to avoid -+ * problems with overflow and underflow of the unsigned quantities, and -+ * disallow all negative sizes. Ranges of size 0 are allowed. -+ */ -+#define DTRACE_INRANGE(testaddr, testsz, baseaddr, basesz) \ -+ ((testaddr) - (baseaddr) < (basesz) && \ -+ (testaddr) + (testsz) - (baseaddr) <= (basesz) && \ -+ (testaddr) + (testsz) >= (testaddr)) -+ -+#define DTRACE_LOADFUNC(bits) \ -+ uint##bits##_t dtrace_load##bits(uintptr_t addr) \ -+ { \ -+ size_t size = bits / NBBY; \ -+ uint##bits##_t rval; \ -+ int i; \ -+ volatile uint16_t *flags = (volatile uint16_t *) \ -+ &this_cpu_core->cpuc_dtrace_flags; \ -+ \ -+ /* \ -+ * Deviation from the OpenSolaris code... Protect \ -+ * against dereferencing the NULL pointer since that \ -+ * really causes us a lot of grief (crash). \ -+ */ \ -+ if (addr == 0) { \ -+ *flags |= CPU_DTRACE_BADADDR; \ -+ this_cpu_core->cpuc_dtrace_illval = addr; \ -+ return 0; \ -+ } \ -+ \ -+ DTRACE_ALIGNCHECK(addr, size, flags); \ -+ \ -+ for (i = 0; i < dtrace_toxranges; i++) { \ -+ if (addr >= dtrace_toxrange[i].dtt_limit) \ -+ continue; \ -+ \ -+ if (addr + size <= dtrace_toxrange[i].dtt_base) \ -+ continue; \ -+ \ -+ /* \ -+ * This address falls within a toxic region. \ -+ */ \ -+ *flags |= CPU_DTRACE_BADADDR; \ -+ this_cpu_core->cpuc_dtrace_illval = addr; \ -+ return 0; \ -+ } \ -+ \ -+ *flags |= CPU_DTRACE_NOFAULT; \ -+ rval = *((volatile uint##bits##_t *)addr); \ -+ *flags &= ~CPU_DTRACE_NOFAULT; \ -+ \ -+ return !(*flags & CPU_DTRACE_FAULT) ? rval : 0; \ -+ } -+ -+/* -+ * Use the DTRACE_LOADFUNC macro to define functions for each of loading a -+ * uint8_t, a uint16_t, a uint32_t and a uint64_t. -+ */ -+DTRACE_LOADFUNC(8) -+DTRACE_LOADFUNC(16) -+DTRACE_LOADFUNC(32) -+DTRACE_LOADFUNC(64) -+ -+#define DT_BSWAP_8(x) ((x) & 0xff) -+#define DT_BSWAP_16(x) ((DT_BSWAP_8(x) << 8) | DT_BSWAP_8((x) >> 8)) -+#define DT_BSWAP_32(x) ((DT_BSWAP_16(x) << 16) | DT_BSWAP_16((x) >> 16)) -+#define DT_BSWAP_64(x) ((DT_BSWAP_32(x) << 32) | DT_BSWAP_32((x) >> 32)) -+ -+static int dtrace_inscratch(uintptr_t dest, size_t size, -+ struct dtrace_mstate *mstate) -+{ -+ if (dest < mstate->dtms_scratch_base) -+ return 0; -+ -+ if (dest + size < dest) -+ return 0; -+ -+ if (dest + size > mstate->dtms_scratch_ptr) -+ return 0; -+ -+ return 1; -+} -+ -+static int dtrace_canstore_statvar(uint64_t addr, size_t sz, -+ struct dtrace_statvar **svars, int nsvars) -+{ -+ int i; -+ -+ for (i = 0; i < nsvars; i++) { -+ struct dtrace_statvar *svar = svars[i]; -+ -+ if (svar == NULL || svar->dtsv_size == 0) -+ continue; -+ -+ if (DTRACE_INRANGE(addr, sz, svar->dtsv_data, svar->dtsv_size)) -+ return 1; -+ } -+ -+ return 0; -+} -+ -+/* -+ * Check to see if the address is within a memory region to which a store may -+ * be issued. This includes the DTrace scratch areas, and any DTrace variable -+ * region. The caller of dtrace_canstore() is responsible for performing any -+ * alignment checks that are needed before stores are actually executed. -+ */ -+static int dtrace_canstore(uint64_t addr, size_t sz, -+ struct dtrace_mstate *mstate, -+ struct dtrace_vstate *vstate) -+{ -+ /* -+ * First, check to see if the address is in scratch space... -+ */ -+ if (DTRACE_INRANGE(addr, sz, mstate->dtms_scratch_base, -+ mstate->dtms_scratch_size)) -+ return 1; -+ -+ /* -+ * Now check to see if it's a dynamic variable. This check will pick -+ * up both thread-local variables and any global dynamically-allocated -+ * variables. -+ */ -+ if (DTRACE_INRANGE(addr, sz, (uintptr_t)vstate->dtvs_dynvars.dtds_base, -+ vstate->dtvs_dynvars.dtds_size)) { -+ struct dtrace_dstate *dstate = &vstate->dtvs_dynvars; -+ uintptr_t base = (uintptr_t)dstate->dtds_base + -+ (dstate->dtds_hashsize * -+ sizeof(struct dtrace_dynhash)); -+ uintptr_t chunkoffs; -+ uint64_t num; -+ -+ /* -+ * Before we assume that we can store here, we need to make -+ * sure that it isn't in our metadata -- storing to our -+ * dynamic variable metadata would corrupt our state. For -+ * the range to not include any dynamic variable metadata, -+ * it must: -+ * -+ * (1) Start above the hash table that is at the base of -+ * the dynamic variable space -+ * -+ * (2) Have a starting chunk offset that is beyond the -+ * dtrace_dynvar_t that is at the base of every chunk -+ * -+ * (3) Not span a chunk boundary -+ */ -+ if (addr < base) -+ return 0; -+ -+ num = addr - base; -+ chunkoffs = do_div(num, dstate->dtds_chunksize); -+ -+ if (chunkoffs < sizeof(struct dtrace_dynvar)) -+ return 0; -+ -+ if (chunkoffs + sz > dstate->dtds_chunksize) -+ return 0; -+ -+ return 1; -+ } -+ -+ /* -+ * Finally, check the static local and global variables. These checks -+ * take the longest, so we perform them last. -+ */ -+ if (dtrace_canstore_statvar(addr, sz, vstate->dtvs_locals, -+ vstate->dtvs_nlocals)) -+ return 1; -+ -+ if (dtrace_canstore_statvar(addr, sz, vstate->dtvs_globals, -+ vstate->dtvs_nglobals)) -+ return 1; -+ -+ return 0; -+} -+ -+/* -+ * Convenience routine to check to see if the address is within a memory -+ * region in which a load may be issued given the user's privilege level; -+ * if not, it sets the appropriate error flags and loads 'addr' into the -+ * illegal value slot. -+ * -+ * DTrace subroutines (DIF_SUBR_*) should use this helper to implement -+ * appropriate memory access protection. -+ */ -+int -+dtrace_canload(uintptr_t addr, size_t sz, struct dtrace_mstate *mstate, -+ struct dtrace_vstate *vstate) -+{ -+ volatile uintptr_t *illval = &this_cpu_core->cpuc_dtrace_illval; -+ -+ /* -+ * If we hold the privilege to read from kernel memory, then -+ * everything is readable. -+ */ -+ if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) -+ return 1; -+ -+ /* -+ * You can obviously read that which you can store. -+ */ -+ if (dtrace_canstore(addr, sz, mstate, vstate)) -+ return 1; -+ -+ /* -+ * We're allowed to read from our own string table. -+ */ -+ if (DTRACE_INRANGE(addr, sz, (uintptr_t)mstate->dtms_difo->dtdo_strtab, -+ mstate->dtms_difo->dtdo_strlen)) -+ return 1; -+ -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV); -+ *illval = addr; -+ -+ return 0; -+} -+ -+/* -+ * Convenience routine to check to see if a given string is within a memory -+ * region in which a load may be issued given the user's privilege level; -+ * this exists so that we don't need to issue unnecessary dtrace_strlen() -+ * calls in the event that the user has all privileges. -+ */ -+static int -+dtrace_strcanload(uint64_t addr, size_t sz, struct dtrace_mstate *mstate, -+ struct dtrace_vstate *vstate) -+{ -+ size_t strsz; -+ -+ /* -+ * If we hold the privilege to read from kernel memory, then -+ * everything is readable. -+ */ -+ if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) -+ return 1; -+ -+ strsz = 1 + dtrace_strlen((char *)(uintptr_t)addr, sz); -+ if (dtrace_canload(addr, strsz, mstate, vstate)) -+ return 1; -+ -+ return 0; -+} -+ -+/* -+ * Convenience routine to check to see if a given variable is within a memory -+ * region in which a load may be issued given the user's privilege level. -+ */ -+int dtrace_vcanload(void *src, struct dtrace_diftype *diftype, -+ struct dtrace_mstate *mstate, -+ struct dtrace_vstate *vstate) -+{ -+ size_t sz; -+ -+ ASSERT(diftype->dtdt_flags & DIF_TF_BYREF); -+ -+ /* -+ * If we hold the privilege to read from kernel memory, then -+ * everything is readable. -+ */ -+ if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) -+ return 1; -+ -+ if (diftype->dtdt_kind == DIF_TYPE_STRING) -+ sz = dtrace_strlen( -+ src, -+ vstate->dtvs_state->dts_options[DTRACEOPT_STRSIZE] -+ ) + 1; -+ else -+ sz = diftype->dtdt_size; -+ -+ return dtrace_canload((uintptr_t)src, sz, mstate, vstate); -+} -+ -+/* -+ * Copy src to dst using safe memory accesses. The src is assumed to be unsafe -+ * memory specified by the DIF program. The dst is assumed to be safe memory -+ * that we can store to directly because it is managed by DTrace. As with -+ * standard bcopy, overlapping copies are handled properly. -+ */ -+static void dtrace_bcopy(const void *src, void *dst, size_t len) -+{ -+ if (len != 0) { -+ uint8_t *s1 = dst; -+ const uint8_t *s2 = src; -+ -+ if (s1 <= s2) { -+ do { -+ *s1++ = dtrace_load8((uintptr_t)s2++); -+ } while (--len != 0); -+ } else { -+ s2 += len; -+ s1 += len; -+ -+ do { -+ *--s1 = dtrace_load8((uintptr_t)--s2); -+ } while (--len != 0); -+ } -+ } -+} -+ -+/* -+ * Copy src to dst using safe memory accesses, up to either the specified -+ * length, or the point that a nul byte is encountered. The src is assumed to -+ * be unsafe memory specified by the DIF program. The dst is assumed to be -+ * safe memory that we can store to directly because it is managed by DTrace. -+ * Unlike dtrace_bcopy(), overlapping regions are not handled. -+ */ -+static void dtrace_strcpy(const void *src, void *dst, size_t len) -+{ -+ if (len != 0) { -+ uint8_t *s1 = dst, c; -+ const uint8_t *s2 = src; -+ -+ do { -+ *s1++ = c = dtrace_load8((uintptr_t)s2++); -+ } while (--len != 0 && c != '\0'); -+ } -+} -+/* -+ * Copy src to dst, deriving the size and type from the specified (BYREF) -+ * variable type. The src is assumed to be unsafe memory specified by the DIF -+ * program. The dst is assumed to be DTrace variable memory that is of the -+ * specified type; we assume that we can store to directly. -+ */ -+static void dtrace_vcopy(void *src, void *dst, struct dtrace_diftype *diftype) -+{ -+ ASSERT(diftype->dtdt_flags & DIF_TF_BYREF); -+ -+ if (diftype->dtdt_kind == DIF_TYPE_STRING) -+ dtrace_strcpy(src, dst, diftype->dtdt_size); -+ else -+ dtrace_bcopy(src, dst, diftype->dtdt_size); -+} -+ -+/* -+ * Compare s1 to s2 using safe memory accesses. The s1 data is assumed to be -+ * unsafe memory specified by the DIF program. The s2 data is assumed to be -+ * safe memory that we can access directly because it is managed by DTrace. -+ */ -+static int dtrace_bcmp(const void *s1, const void *s2, size_t len) -+{ -+ volatile uint16_t *flags; -+ -+ flags = (volatile uint16_t *)&this_cpu_core->cpuc_dtrace_flags; -+ -+ if (s1 == s2) -+ return 0; -+ -+ if (s1 == NULL || s2 == NULL) -+ return 1; -+ -+ if (s1 != s2 && len != 0) { -+ const uint8_t *ps1 = s1; -+ const uint8_t *ps2 = s2; -+ -+ do { -+ if (dtrace_load8((uintptr_t)ps1++) != *ps2++) -+ return 1; -+ } while (--len != 0 && !(*flags & CPU_DTRACE_FAULT)); -+ } -+ -+ return 0; -+} -+ -+/* -+ * Zero the specified region using a simple byte-by-byte loop. Note that this -+ * is for safe DTrace-managed memory only. -+ */ -+void dtrace_bzero(void *dst, size_t len) -+{ -+ uchar_t *cp; -+ -+ for (cp = dst; len != 0; len--) -+ *cp++ = 0; -+} -+ -+#define DTRACE_DYNHASH_FREE 0 -+#define DTRACE_DYNHASH_SINK 1 -+#define DTRACE_DYNHASH_VALID 2 -+ -+/* -+ * Depending on the value of the op parameter, this function looks-up, -+ * allocates or deallocates an arbitrarily-keyed dynamic variable. If an -+ * allocation is requested, this function will return a pointer to a -+ * dtrace_dynvar_t corresponding to the allocated variable -- or NULL if no -+ * variable can be allocated. If NULL is returned, the appropriate counter -+ * will be incremented. -+ */ -+static struct dtrace_dynvar *dtrace_dynvar(struct dtrace_dstate *dstate, -+ uint_t nkeys, -+ struct dtrace_key *key, -+ size_t dsize, -+ enum dtrace_dynvar_op op, -+ struct dtrace_mstate *mstate, -+ struct dtrace_vstate *vstate) -+{ -+ uint64_t hashval = DTRACE_DYNHASH_VALID; -+ struct dtrace_dynhash *hash = dstate->dtds_hash; -+ struct dtrace_dynvar *free, *new_free, *next, *dvar, *start, -+ *prev = NULL; -+ processorid_t me = smp_processor_id(), cpu = me; -+ struct dtrace_dstate_percpu *dcpu = &dstate->dtds_percpu[me]; -+ size_t bucket, ksize; -+ size_t chunksize = dstate->dtds_chunksize; -+ uintptr_t kdata, lock; -+ enum dtrace_dstate_state nstate; -+ uint_t i; -+ -+ ASSERT(nkeys != 0); -+ -+ /* -+ * Hash the key. As with aggregations, we use Jenkins' "One-at-a-time" -+ * algorithm. For the by-value portions, we perform the algorithm in -+ * 16-bit chunks (as opposed to 8-bit chunks). This speeds things up a -+ * bit, and seems to have only a minute effect on distribution. For -+ * the by-reference data, we perform "One-at-a-time" iterating (safely) -+ * over each referenced byte. It's painful to do this, but it's much -+ * better than pathological hash distribution. The efficacy of the -+ * hashing algorithm (and a comparison with other algorithms) may be -+ * found by running the ::dtrace_dynstat MDB dcmd. -+ */ -+ for (i = 0; i < nkeys; i++) { -+ if (key[i].dttk_size == 0) { -+ uint64_t val = key[i].dttk_value; -+ -+ hashval += (val >> 48) & 0xffff; -+ hashval += (hashval << 10); -+ hashval ^= (hashval >> 6); -+ -+ hashval += (val >> 32) & 0xffff; -+ hashval += (hashval << 10); -+ hashval ^= (hashval >> 6); -+ -+ hashval += (val >> 16) & 0xffff; -+ hashval += (hashval << 10); -+ hashval ^= (hashval >> 6); -+ -+ hashval += val & 0xffff; -+ hashval += (hashval << 10); -+ hashval ^= (hashval >> 6); -+ } else { -+ /* -+ * This is incredibly painful, but it beats the hell -+ * out of the alternative. -+ */ -+ uint64_t j, size = key[i].dttk_size; -+ uintptr_t base = (uintptr_t)key[i].dttk_value; -+ -+ if (!dtrace_canload(base, size, mstate, vstate)) -+ break; -+ -+ for (j = 0; j < size; j++) { -+ hashval += dtrace_load8(base + j); -+ hashval += (hashval << 10); -+ hashval ^= (hashval >> 6); -+ } -+ } -+ } -+ -+ if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT)) -+ return NULL; -+ -+ hashval += (hashval << 3); -+ hashval ^= (hashval >> 11); -+ hashval += (hashval << 15); -+ -+ /* -+ * There is a remote chance (ideally, 1 in 2^31) that our hashval -+ * comes out to be one of our two sentinel hash values. If this -+ * actually happens, we set the hashval to be a value known to be a -+ * non-sentinel value. -+ */ -+ if (hashval == DTRACE_DYNHASH_FREE || hashval == DTRACE_DYNHASH_SINK) -+ hashval = DTRACE_DYNHASH_VALID; -+ -+ /* -+ * Yes, it's painful to do a divide here. If the cycle count becomes -+ * important here, tricks can be pulled to reduce it. (However, it's -+ * critical that hash collisions be kept to an absolute minimum; -+ * they're much more painful than a divide.) It's better to have a -+ * solution that generates few collisions and still keeps things -+ * relatively simple. -+ * -+ * Linux cannot do a straight 64-bit divide without gcc requiring -+ * linking in code that the kernel doesn't link, so we need to use an -+ * alternative. -+ * -+ * bucket = hashval % dstate->dtds_hashsize; -+ */ -+ { -+ uint64_t num; -+ -+ num = hashval; -+ bucket = do_div(num, dstate->dtds_hashsize); -+ } -+ -+ if (op == DTRACE_DYNVAR_DEALLOC) { -+ volatile uintptr_t *lockp = &hash[bucket].dtdh_lock; -+ -+ for (;;) { -+ while ((lock = *lockp) & 1) -+ continue; -+ -+ if (cmpxchg(lockp, lock, (lock + 1)) == lock) -+ break; -+ } -+ -+ dtrace_membar_producer(); -+ } -+ -+top: -+ prev = NULL; -+ lock = hash[bucket].dtdh_lock; -+ -+ dtrace_membar_consumer(); -+ -+ start = hash[bucket].dtdh_chain; -+ ASSERT(start != NULL && (start->dtdv_hashval == DTRACE_DYNHASH_SINK || -+ start->dtdv_hashval != DTRACE_DYNHASH_FREE || -+ op != DTRACE_DYNVAR_DEALLOC)); -+ -+ for (dvar = start; dvar != NULL; dvar = dvar->dtdv_next) { -+ struct dtrace_tuple *dtuple = &dvar->dtdv_tuple; -+ struct dtrace_key *dkey = &dtuple->dtt_key[0]; -+ -+ if (dvar->dtdv_hashval != hashval) { -+ if (dvar->dtdv_hashval == DTRACE_DYNHASH_SINK) { -+ /* -+ * We've reached the sink, and therefore the -+ * end of the hash chain; we can kick out of -+ * the loop knowing that we have seen a valid -+ * snapshot of state. -+ */ -+ ASSERT(dvar->dtdv_next == NULL); -+ ASSERT(dvar == &dtrace_dynhash_sink); -+ break; -+ } -+ -+ if (dvar->dtdv_hashval == DTRACE_DYNHASH_FREE) { -+ /* -+ * We've gone off the rails: somewhere along -+ * the line, one of the members of this hash -+ * chain was deleted. Note that we could also -+ * detect this by simply letting this loop run -+ * to completion, as we would eventually hit -+ * the end of the dirty list. However, we -+ * want to avoid running the length of the -+ * dirty list unnecessarily (it might be quite -+ * long), so we catch this as early as -+ * possible by detecting the hash marker. In -+ * this case, we simply set dvar to NULL and -+ * break; the conditional after the loop will -+ * send us back to top. -+ */ -+ dvar = NULL; -+ break; -+ } -+ -+ goto next; -+ } -+ -+ if (dtuple->dtt_nkeys != nkeys) -+ goto next; -+ -+ for (i = 0; i < nkeys; i++, dkey++) { -+ if (dkey->dttk_size != key[i].dttk_size) -+ goto next; /* size or type mismatch */ -+ -+ if (dkey->dttk_size != 0) { -+ if (dtrace_bcmp( -+ (void *)(uintptr_t)key[i].dttk_value, -+ (void *)(uintptr_t)dkey->dttk_value, -+ dkey->dttk_size)) -+ goto next; -+ } else { -+ if (dkey->dttk_value != key[i].dttk_value) -+ goto next; -+ } -+ } -+ -+ if (op != DTRACE_DYNVAR_DEALLOC) -+ return dvar; -+ -+ ASSERT(dvar->dtdv_next == NULL || -+ dvar->dtdv_next->dtdv_hashval != DTRACE_DYNHASH_FREE); -+ -+ if (prev != NULL) { -+ ASSERT(hash[bucket].dtdh_chain != dvar); -+ ASSERT(start != dvar); -+ ASSERT(prev->dtdv_next == dvar); -+ prev->dtdv_next = dvar->dtdv_next; -+ } else { -+ if (cmpxchg(&hash[bucket].dtdh_chain, start, -+ dvar->dtdv_next) != start) { -+ /* -+ * We have failed to atomically swing the -+ * hash table head pointer, presumably because -+ * of a conflicting allocation on another CPU. -+ * We need to reread the hash chain and try -+ * again. -+ */ -+ goto top; -+ } -+ } -+ -+ dtrace_membar_producer(); -+ -+ /* -+ * Now set the hash value to indicate that it's free. -+ */ -+ ASSERT(hash[bucket].dtdh_chain != dvar); -+ dvar->dtdv_hashval = DTRACE_DYNHASH_FREE; -+ -+ dtrace_membar_producer(); -+ -+ /* -+ * Set the next pointer to point at the dirty list, and -+ * atomically swing the dirty pointer to the newly freed dvar. -+ */ -+ do { -+ next = dcpu->dtdsc_dirty; -+ dvar->dtdv_next = next; -+ } while (cmpxchg(&dcpu->dtdsc_dirty, next, dvar) != next); -+ -+ /* -+ * Finally, unlock this hash bucket. -+ */ -+ ASSERT(hash[bucket].dtdh_lock == lock); -+ ASSERT(lock & 1); -+ hash[bucket].dtdh_lock++; -+ -+ return NULL; -+next: -+ prev = dvar; -+ continue; -+ } -+ -+ if (dvar == NULL) { -+ /* -+ * If dvar is NULL, it is because we went off the rails: -+ * one of the elements that we traversed in the hash chain -+ * was deleted while we were traversing it. In this case, -+ * we assert that we aren't doing a dealloc (deallocs lock -+ * the hash bucket to prevent themselves from racing with -+ * one another), and retry the hash chain traversal. -+ */ -+ ASSERT(op != DTRACE_DYNVAR_DEALLOC); -+ goto top; -+ } -+ -+ if (op != DTRACE_DYNVAR_ALLOC) { -+ /* -+ * If we are not to allocate a new variable, we want to -+ * return NULL now. Before we return, check that the value -+ * of the lock word hasn't changed. If it has, we may have -+ * seen an inconsistent snapshot. -+ */ -+ if (op == DTRACE_DYNVAR_NOALLOC) { -+ if (hash[bucket].dtdh_lock != lock) -+ goto top; -+ } else { -+ ASSERT(op == DTRACE_DYNVAR_DEALLOC); -+ ASSERT(hash[bucket].dtdh_lock == lock); -+ ASSERT(lock & 1); -+ hash[bucket].dtdh_lock++; -+ } -+ -+ return NULL; -+ } -+ -+ /* -+ * We need to allocate a new dynamic variable. The size we need is the -+ * size of dtrace_dynvar plus the size of nkeys dtrace_key_t's plus the -+ * size of any auxiliary key data (rounded up to 8-byte alignment) plus -+ * the size of any referred-to data (dsize). We then round the final -+ * size up to the chunksize for allocation. -+ */ -+ for (ksize = 0, i = 0; i < nkeys; i++) -+ ksize += P2ROUNDUP(key[i].dttk_size, sizeof(uint64_t)); -+ -+ /* -+ * This should be pretty much impossible, but could happen if, say, -+ * strange DIF specified the tuple. Ideally, this should be an -+ * assertion and not an error condition -- but that requires that the -+ * chunksize calculation in dtrace_difo_chunksize() be absolutely -+ * bullet-proof. (That is, it must not be able to be fooled by -+ * malicious DIF.) Given the lack of backwards branches in DIF, -+ * solving this would presumably not amount to solving the Halting -+ * Problem -- but it still seems awfully hard. -+ */ -+ if (sizeof(struct dtrace_dynvar) + -+ sizeof(struct dtrace_key) * (nkeys - 1) + -+ ksize + dsize > chunksize) { -+ dcpu->dtdsc_drops++; -+ return NULL; -+ } -+ -+ nstate = DTRACE_DSTATE_EMPTY; -+ -+ do { -+retry: -+ free = dcpu->dtdsc_free; -+ -+ if (free == NULL) { -+ struct dtrace_dynvar *clean = dcpu->dtdsc_clean; -+ void *rval; -+ -+ if (clean == NULL) { -+ /* -+ * We're out of dynamic variable space on -+ * this CPU. Unless we have tried all CPUs, -+ * we'll try to allocate from a different -+ * CPU. -+ */ -+ switch (dstate->dtds_state) { -+ case DTRACE_DSTATE_CLEAN: { -+ enum dtrace_dstate_state *sp = -+ (enum dtrace_dstate_state *) -+ &dstate->dtds_state; -+ -+ if (++cpu >= NR_CPUS) -+ cpu = 0; -+ -+ if (dcpu->dtdsc_dirty != NULL && -+ nstate == DTRACE_DSTATE_EMPTY) -+ nstate = DTRACE_DSTATE_DIRTY; -+ -+ if (dcpu->dtdsc_rinsing != NULL) -+ nstate = DTRACE_DSTATE_RINSING; -+ -+ dcpu = &dstate->dtds_percpu[cpu]; -+ -+ if (cpu != me) -+ goto retry; -+ -+ cmpxchg(sp, DTRACE_DSTATE_CLEAN, -+ nstate); -+ -+ /* -+ * To increment the correct bean -+ * counter, take another lap. -+ */ -+ goto retry; -+ } -+ -+ case DTRACE_DSTATE_DIRTY: -+ dcpu->dtdsc_dirty_drops++; -+ break; -+ -+ case DTRACE_DSTATE_RINSING: -+ dcpu->dtdsc_rinsing_drops++; -+ break; -+ -+ case DTRACE_DSTATE_EMPTY: -+ dcpu->dtdsc_drops++; -+ break; -+ } -+ -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_DROP); -+ return NULL; -+ } -+ -+ /* -+ * The clean list appears to be non-empty. We want to -+ * move the clean list to the free list; we start by -+ * moving the clean pointer aside. -+ */ -+ if (cmpxchg(&dcpu->dtdsc_clean, clean, NULL) != clean) -+ /* -+ * We are in one of two situations: -+ * -+ * (a) The clean list was switched to the -+ * free list by another CPU. -+ * -+ * (b) The clean list was added to by the -+ * cleansing cyclic. -+ * -+ * In either of these situations, we can -+ * just reattempt the free list allocation. -+ */ -+ goto retry; -+ -+ ASSERT(clean->dtdv_hashval == DTRACE_DYNHASH_FREE); -+ -+ /* -+ * Now we'll move the clean list to the free list. -+ * It's impossible for this to fail: the only way -+ * the free list can be updated is through this -+ * code path, and only one CPU can own the clean list. -+ * Thus, it would only be possible for this to fail if -+ * this code were racing with dtrace_dynvar_clean(). -+ * (That is, if dtrace_dynvar_clean() updated the clean -+ * list, and we ended up racing to update the free -+ * list.) This race is prevented by the dtrace_sync() -+ * in dtrace_dynvar_clean() -- which flushes the -+ * owners of the clean lists out before resetting -+ * the clean lists. -+ */ -+ rval = cmpxchg(&dcpu->dtdsc_free, NULL, clean); -+ ASSERT(rval == NULL); -+ -+ goto retry; -+ } -+ -+ dvar = free; -+ new_free = dvar->dtdv_next; -+ } while (cmpxchg(&dcpu->dtdsc_free, free, new_free) != free); -+ -+ /* -+ * We have now allocated a new chunk. We copy the tuple keys into the -+ * tuple array and copy any referenced key data into the data space -+ * following the tuple array. As we do this, we relocate dttk_value -+ * in the final tuple to point to the key data address in the chunk. -+ */ -+ kdata = (uintptr_t)&dvar->dtdv_tuple.dtt_key[nkeys]; -+ dvar->dtdv_data = (void *)(kdata + ksize); -+ dvar->dtdv_tuple.dtt_nkeys = nkeys; -+ -+ for (i = 0; i < nkeys; i++) { -+ struct dtrace_key *dkey = &dvar->dtdv_tuple.dtt_key[i]; -+ size_t kesize = key[i].dttk_size; -+ -+ if (kesize != 0) { -+ dtrace_bcopy( -+ (const void *)(uintptr_t)key[i].dttk_value, -+ (void *)kdata, kesize); -+ dkey->dttk_value = kdata; -+ kdata += P2ROUNDUP(kesize, sizeof(uint64_t)); -+ } else -+ dkey->dttk_value = key[i].dttk_value; -+ -+ dkey->dttk_size = kesize; -+ } -+ -+ ASSERT(dvar->dtdv_hashval == DTRACE_DYNHASH_FREE); -+ dvar->dtdv_hashval = hashval; -+ dvar->dtdv_next = start; -+ -+ if (cmpxchg(&hash[bucket].dtdh_chain, start, dvar) == start) -+ return dvar; -+ -+ /* -+ * The cas has failed. Either another CPU is adding an element to -+ * this hash chain, or another CPU is deleting an element from this -+ * hash chain. The simplest way to deal with both of these cases -+ * (though not necessarily the most efficient) is to free our -+ * allocated block and tail-call ourselves. Note that the free is -+ * to the dirty list and _not_ to the free list. This is to prevent -+ * races with allocators, above. -+ */ -+ dvar->dtdv_hashval = DTRACE_DYNHASH_FREE; -+ -+ dtrace_membar_producer(); -+ -+ do { -+ free = dcpu->dtdsc_dirty; -+ dvar->dtdv_next = free; -+ } while (cmpxchg(&dcpu->dtdsc_dirty, free, dvar) != free); -+ -+ return dtrace_dynvar(dstate, nkeys, key, dsize, op, mstate, vstate); -+} -+ -+/* -+ * Return a string. In the event that the user lacks the privilege to access -+ * arbitrary kernel memory, we copy the string out to scratch memory so that we -+ * don't fail access checking. -+ * -+ * dtrace_dif_variable() uses this routine as a helper for various -+ * builtin values such as 'execname' and 'probefunc.' -+ */ -+static uintptr_t dtrace_dif_varstr(uintptr_t addr, struct dtrace_state *state, -+ struct dtrace_mstate *mstate) -+{ -+ uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; -+ uintptr_t ret; -+ size_t strsz; -+ -+ /* -+ * The easy case: this probe is allowed to read all of memory, so -+ * we can just return this as a vanilla pointer. -+ */ -+ if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) -+ return addr; -+ -+ /* -+ * This is the tougher case: we copy the string in question from -+ * kernel memory into scratch memory and return it that way: this -+ * ensures that we won't trip up when access checking tests the -+ * BYREF return value. -+ */ -+ strsz = dtrace_strlen((char *)addr, size) + 1; -+ -+ if (mstate->dtms_scratch_ptr + strsz > -+ mstate->dtms_scratch_base + mstate->dtms_scratch_size) { -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); -+ return (uintptr_t)NULL; -+ } -+ -+ dtrace_strcpy((const void *)addr, (void *)mstate->dtms_scratch_ptr, -+ strsz); -+ ret = mstate->dtms_scratch_ptr; -+ mstate->dtms_scratch_ptr += strsz; -+ -+ return ret; -+} -+ -+/* -+ * This function implements the DIF emulator's variable lookups. The emulator -+ * passes a reserved variable identifier and optional built-in array index. -+ * -+ * This function is annotated to be always inlined in dtrace_dif_emulate() -+ * because (1) that is the only place where it is called from, and (2) it has -+ * come to our attention that some GCC versions inline it automatically while -+ * others do not and that messes up the number of frames to skip (aframes). -+ */ -+static __always_inline uint64_t dtrace_dif_variable(struct dtrace_mstate *mstate, -+ struct dtrace_state *state, -+ uint64_t v, uint64_t ndx) -+{ -+ /* -+ * If we're accessing one of the uncached arguments, we'll turn this -+ * into a reference in the args array. -+ */ -+ if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9) { -+ ndx = v - DIF_VAR_ARG0; -+ v = DIF_VAR_ARGS; -+ } -+ -+ switch (v) { -+ case DIF_VAR_ARGS: -+ ASSERT(mstate->dtms_present & DTRACE_MSTATE_ARGS); -+ -+ if (ndx >= DTRACE_MSTATE_ARGS_MAX) { -+ int aframes = -+ mstate->dtms_probe->dtpr_aframes + 1; -+ struct dtrace_provider *pv; -+ uint64_t val; -+ -+ pv = mstate->dtms_probe->dtpr_provider; -+ if (pv->dtpv_pops.dtps_getargval != NULL) -+ val = pv->dtpv_pops.dtps_getargval( -+ pv->dtpv_arg, -+ mstate->dtms_probe->dtpr_id, -+ mstate->dtms_probe->dtpr_arg, -+ ndx, aframes); -+ else -+ val = dtrace_getarg(ndx, aframes); -+ -+ /* -+ * This is regrettably required to keep the compiler -+ * from tail-optimizing the call to dtrace_getarg(). -+ * The condition always evaluates to true, but the -+ * compiler has no way of figuring that out a priori. -+ * (None of this would be necessary if the compiler -+ * could be relied upon to _always_ tail-optimize -+ * the call to dtrace_getarg() -- but it can't.) -+ */ -+ if (mstate->dtms_probe != NULL) -+ return val; -+ -+ ASSERT(0); -+ } -+ -+ return mstate->dtms_arg[ndx]; -+ -+ case DIF_VAR_UREGS: { -+ if (!dtrace_priv_proc(state)) -+ return 0; -+ -+ return dtrace_getreg(current, ndx); -+ } -+ -+ case DIF_VAR_CURTHREAD: -+ if (!dtrace_priv_kernel(state)) -+ return 0; -+ -+ return (uint64_t)(uintptr_t)current; -+ -+ case DIF_VAR_TIMESTAMP: -+ if (!(mstate->dtms_present & DTRACE_MSTATE_TIMESTAMP)) { -+ mstate->dtms_timestamp = dtrace_gethrtime(); -+ mstate->dtms_present |= DTRACE_MSTATE_TIMESTAMP; -+ } -+ -+ return ktime_to_ns(mstate->dtms_timestamp); -+ -+ case DIF_VAR_WALLTIMESTAMP: -+ return ktime_to_ns(dtrace_get_walltime()); -+ -+ case DIF_VAR_VTIMESTAMP: -+ ASSERT(dtrace_vtime_references != 0); -+ -+ if (current->dt_task != NULL) -+ return ktime_to_ns(current->dt_task->dt_vtime); -+ -+ /* -+ * This is not ideal but without any data available -+ * there is no reasonable default value for vtimestamp -+ * variable. -+ */ -+ return ktime_to_ns(0); -+ -+ case DIF_VAR_IPL: -+ if (!dtrace_priv_kernel(state)) -+ return 0; -+ -+ if (!(mstate->dtms_present & DTRACE_MSTATE_IPL)) { -+ mstate->dtms_ipl = dtrace_getipl(); -+ mstate->dtms_present |= DTRACE_MSTATE_IPL; -+ } -+ -+ return mstate->dtms_ipl; -+ -+ case DIF_VAR_EPID: -+ ASSERT(mstate->dtms_present & DTRACE_MSTATE_EPID); -+ ASSERT(mstate->dtms_present & DTRACE_MSTATE_EPID); -+ -+ return mstate->dtms_epid; -+ -+ case DIF_VAR_ID: -+ ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); -+ return mstate->dtms_probe->dtpr_id; -+ -+ case DIF_VAR_STACKDEPTH: -+ if (!dtrace_priv_kernel(state)) -+ return 0; -+ if (!(mstate->dtms_present & DTRACE_MSTATE_STACKDEPTH)) { -+ int aframes = mstate->dtms_probe->dtpr_aframes + 2; -+ -+ mstate->dtms_stackdepth = dtrace_getstackdepth( -+ mstate, aframes); -+ mstate->dtms_present |= DTRACE_MSTATE_STACKDEPTH; -+ } -+ -+ return mstate->dtms_stackdepth; -+ -+ case DIF_VAR_USTACKDEPTH: -+ if (!dtrace_priv_proc(state)) -+ return 0; -+ -+ if (!(mstate->dtms_present & DTRACE_MSTATE_USTACKDEPTH)) { -+ /* -+ * See comment in DIF_VAR_PID. -+ */ -+ if (DTRACE_ANCHORED(mstate->dtms_probe) && -+ in_interrupt()) -+ mstate->dtms_ustackdepth = 0; -+ else -+ mstate->dtms_ustackdepth = -+ dtrace_getustackdepth(); -+ -+ mstate->dtms_present |= DTRACE_MSTATE_USTACKDEPTH; -+ } -+ -+ return mstate->dtms_ustackdepth; -+ -+ case DIF_VAR_CALLER: -+ if (!dtrace_priv_kernel(state)) -+ return 0; -+ -+ if (!(mstate->dtms_present & DTRACE_MSTATE_CALLER)) { -+ int aframes = mstate->dtms_probe->dtpr_aframes + 1; -+ -+ if (!DTRACE_ANCHORED(mstate->dtms_probe)) { -+ /* -+ * If this is an unanchored probe, we are -+ * required to go through the slow path: -+ * dtrace_caller() only guarantees correct -+ * results for anchored probes. -+ */ -+ uint64_t caller[2]; -+ -+ dtrace_getpcstack(caller, 2, aframes, -+ (uint32_t *)(uintptr_t) -+ mstate->dtms_arg[0]); -+ mstate->dtms_caller = caller[1]; -+ } else if ((mstate->dtms_caller = -+ dtrace_caller(aframes, 0)) == -1) { -+ /* -+ * We have failed to do this the quick way; -+ * we must resort to the slower approach of -+ * calling dtrace_getpcstack(). -+ */ -+ uint64_t caller; -+ -+ dtrace_getpcstack(&caller, 1, aframes, NULL); -+ mstate->dtms_caller = caller; -+ } -+ -+ mstate->dtms_present |= DTRACE_MSTATE_CALLER; -+ } -+ -+ return mstate->dtms_caller; -+ -+ case DIF_VAR_UCALLER: -+ if (!dtrace_priv_proc(state)) -+ return 0; -+ -+ if (!(mstate->dtms_present & DTRACE_MSTATE_UCALLER)) { -+ uint64_t ustack[4]; -+ -+ /* -+ * dtrace_getupcstack() fills in the first uint64_t with -+ * the current PID, and the second uint64_t with the -+ * current TGID. The third uint64_t will be the -+ * program counter at user-level. The fourth uint64_t -+ * will contain the caller, which is what we're after. -+ */ -+ ustack[3] = 0; -+ dtrace_getupcstack(ustack, 4); -+ -+ mstate->dtms_ucaller = ustack[3]; -+ mstate->dtms_present |= DTRACE_MSTATE_UCALLER; -+ } -+ -+ return mstate->dtms_ucaller; -+ -+ case DIF_VAR_PROBEPROV: -+ ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); -+ -+ return dtrace_dif_varstr( -+ (uintptr_t)mstate->dtms_probe->dtpr_provider->dtpv_name, -+ state, mstate); -+ -+ case DIF_VAR_PROBEMOD: -+ ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); -+ return dtrace_dif_varstr( -+ (uintptr_t)mstate->dtms_probe->dtpr_mod, state, -+ mstate); -+ -+ case DIF_VAR_PROBEFUNC: -+ ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); -+ -+ return dtrace_dif_varstr( -+ (uintptr_t)mstate->dtms_probe->dtpr_func, state, -+ mstate); -+ -+ case DIF_VAR_PROBENAME: -+ ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); -+ -+ return dtrace_dif_varstr( -+ (uintptr_t)mstate->dtms_probe->dtpr_name, state, -+ mstate); -+ -+ case DIF_VAR_PID: -+ if (!dtrace_priv_proc(state)) -+ return 0; -+ -+ /* -+ * It is always safe to dereference current, it always points -+ * to a valid task_struct. -+ */ -+ return (uint64_t)current->tgid; -+ -+ case DIF_VAR_PPID: -+ if (!dtrace_priv_proc(state)) -+ return 0; -+ -+ /* -+ * It is always safe to dereference current, it always points -+ * to a valid task_struct. -+ * -+ * Additionally, it is safe to dereference one's parent, since -+ * it is never NULL after process birth. -+ */ -+ return (uint64_t)current->real_parent->tgid; -+ -+ case DIF_VAR_TID: -+ return (uint64_t)current->pid; -+ -+ case DIF_VAR_EXECNAME: -+ if (!dtrace_priv_proc(state)) -+ return 0; -+ -+ /* -+ * It is always safe to dereference current, it always points -+ * to a valid task_struct. -+ */ -+ return dtrace_dif_varstr((uintptr_t)current->comm, state, -+ mstate); -+ -+ case DIF_VAR_ZONENAME: -+ return 0; -+ -+ case DIF_VAR_UID: -+ if (!dtrace_priv_proc(state)) -+ return 0; -+ -+ /* -+ * It is always safe to dereference current, it always points -+ * to a valid task_struct. -+ * -+ * Additionally, it is safe to dereference one's own process -+ * credential, since this is never NULL after process birth. -+ */ -+ return (uint64_t)from_kuid(current_user_ns(), -+ current_real_cred()->uid); -+ -+ case DIF_VAR_GID: -+ if (!dtrace_priv_proc(state)) -+ return 0; -+ -+ /* -+ * It is always safe to dereference current, it always points -+ * to a valid task_struct. -+ * -+ * Additionally, it is safe to dereference one's own process -+ * credential, since this is never NULL after process birth. -+ */ -+ return (uint64_t)from_kgid(current_user_ns(), -+ current_real_cred()->gid); -+ -+ case DIF_VAR_ERRNO: { -+ int64_t arg0; -+ -+ ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); -+ -+ if (!dtrace_priv_proc(state)) -+ return 0; -+ -+ /* -+ * We need to do some magic here to get the correct semantics -+ * for the 'errno' variable. It can only have a non-zero value -+ * when executing a system call, and for Linux, only after the -+ * actual system call implementation has completed, indicating -+ * in its return value either an error code (-2048 < errno < 0) -+ * or a valid result. So, the only time we can expect a valid -+ * value in errno is during the processing of any return probe -+ * in the syscall provider. In all other cases, it should have -+ * the value 0. -+ * -+ * So, we only look at probes that match: syscall:::return -+ */ -+ if (strncmp(mstate->dtms_probe->dtpr_provider->dtpv_name, -+ "syscall", 7) != 0) -+ return 0; -+ if (strncmp(mstate->dtms_probe->dtpr_name, "return", 6) != 0) -+ return 0; -+ -+ /* -+ * Error number is present if arg0 lies between 0 and -2048, -+ * exclusive. -+ */ -+ arg0 = (int64_t)mstate->dtms_arg[ndx]; -+ if (arg0 < 0 && arg0 > -2048) -+ return (uint64_t)-arg0; -+ -+ return 0; -+ } -+ -+ case DIF_VAR_CURCPU: -+ return (uint64_t)(uintptr_t)this_cpu_info; -+ -+ default: -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); -+ return 0; -+ } -+} -+ -+#define DTRACE_V4MAPPED_OFFSET (sizeof(uint32_t) * 3) -+ -+/* -+ * Emulate the execution of DTrace ID subroutines invoked by the call opcode. -+ * Notice that we don't bother validating the proper number of arguments or -+ * their types in the tuple stack. This isn't needed because all argument -+ * interpretation is safe because of our load safety -- the worst that can -+ * happen is that a bogus program can obtain bogus results. -+ */ -+static void dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, -+ struct dtrace_key *tupregs, int nargs, -+ struct dtrace_mstate *mstate, -+ struct dtrace_state *state) -+{ -+ volatile uint16_t *flags = &this_cpu_core->cpuc_dtrace_flags; -+ volatile uintptr_t *illval = &this_cpu_core->cpuc_dtrace_illval; -+ struct dtrace_vstate *vstate = &state->dts_vstate; -+ struct mutex mtx; -+ -+ union { -+ rwlock_t ri; -+ uintptr_t rw; -+ } r; -+ -+ dt_dbg_dif(" Subroutine %d\n", subr); -+ -+ switch (subr) { -+ case DIF_SUBR_RAND: -+ regs[rd] = ktime_to_ns(dtrace_gethrtime()) * 2416 + 374441; -+ regs[rd] = do_div(regs[rd], 1771875); -+ break; -+ -+ case DIF_SUBR_MUTEX_OWNED: -+ if (!dtrace_canload(tupregs[0].dttk_value, -+ sizeof(struct mutex), mstate, vstate)) -+ break; -+ -+ dtrace_bcopy((const void *)(uintptr_t)tupregs[0].dttk_value, -+ &mtx, sizeof(struct mutex)); -+ if (*flags & CPU_DTRACE_FAULT) -+ break; -+ -+ regs[rd] = mutex_owned(&mtx); -+ break; -+ -+ case DIF_SUBR_MUTEX_OWNER: -+ regs[rd] = 0; -+ if (!dtrace_canload(tupregs[0].dttk_value, -+ sizeof(struct mutex), mstate, vstate)) -+ break; -+ -+ dtrace_bcopy((const void *)(uintptr_t)tupregs[0].dttk_value, -+ &mtx, sizeof(struct mutex)); -+ if (*flags & CPU_DTRACE_FAULT) -+ break; -+ -+#ifdef CONFIG_SMP -+ regs[rd] = (uintptr_t)__mutex_owner(&mtx); -+#else -+ regs[rd] = 0; -+#endif -+ break; -+ -+ case DIF_SUBR_MUTEX_TYPE_ADAPTIVE: -+ if (!dtrace_canload(tupregs[0].dttk_value, -+ sizeof(struct mutex), mstate, vstate)) { -+ regs[rd] = 0; -+ break; -+ } -+ -+ /* -+ * On Linux, all mutexes are adaptive. -+ */ -+ regs[rd] = 1; -+ break; -+ -+ case DIF_SUBR_MUTEX_TYPE_SPIN: -+ if (!dtrace_canload(tupregs[0].dttk_value, -+ sizeof(struct mutex), mstate, vstate)) { -+ regs[rd] = 0; -+ break; -+ } -+ -+ /* -+ * On Linux, all mutexes are adaptive. -+ */ -+ regs[rd] = 0; -+ break; -+ -+ case DIF_SUBR_RW_READ_HELD: { -+ if (!dtrace_canload(tupregs[0].dttk_value, sizeof(rwlock_t), -+ mstate, vstate)) { -+ regs[rd] = 0; -+ break; -+ } -+ -+ r.rw = dtrace_loadptr(tupregs[0].dttk_value); -+ regs[rd] = !peek_write_can_lock(&r.ri) && -+ peek_read_can_lock(&r.ri); -+ break; -+ } -+ -+ case DIF_SUBR_RW_WRITE_HELD: -+ if (!dtrace_canload(tupregs[0].dttk_value, sizeof(rwlock_t), -+ mstate, vstate)) { -+ regs[rd] = 0; -+ break; -+ } -+ -+ r.rw = dtrace_loadptr(tupregs[0].dttk_value); -+ regs[rd] = !peek_write_can_lock(&r.ri); -+ break; -+ -+ case DIF_SUBR_RW_ISWRITER: -+ if (!dtrace_canload(tupregs[0].dttk_value, sizeof(rwlock_t), -+ mstate, vstate)) { -+ regs[rd] = 0; -+ break; -+ } -+ -+ r.rw = dtrace_loadptr(tupregs[0].dttk_value); -+ /* -+ * On Linux there is no way to determine whether someone is -+ * trying to acquire a write lock. -+ */ -+ regs[rd] = !peek_write_can_lock(&r.ri); -+ break; -+ -+ case DIF_SUBR_BCOPY: { -+ /* -+ * We need to be sure that the destination is in the scratch -+ * region -- no other region is allowed. -+ */ -+ uintptr_t src = tupregs[0].dttk_value; -+ uintptr_t dest = tupregs[1].dttk_value; -+ size_t size = tupregs[2].dttk_value; -+ -+ if (!dtrace_inscratch(dest, size, mstate)) { -+ *flags |= CPU_DTRACE_BADADDR; -+ *illval = regs[rd]; -+ break; -+ } -+ -+ if (!dtrace_canload(src, size, mstate, vstate)) { -+ regs[rd] = 0; -+ break; -+ } -+ -+ dtrace_bcopy((void *)src, (void *)dest, size); -+ break; -+ } -+ -+ case DIF_SUBR_ALLOCA: -+ case DIF_SUBR_COPYIN: { -+ uintptr_t dest = P2ROUNDUP(mstate->dtms_scratch_ptr, 8); -+ uint64_t size; -+ size_t scratch_size; -+ -+ size = tupregs[subr == DIF_SUBR_ALLOCA ? 0 : 1].dttk_value; -+ scratch_size = (dest - mstate->dtms_scratch_ptr) + size; -+ -+ /* -+ * This action doesn't require any credential checks since -+ * probes will not activate in user contexts to which the -+ * enabling user does not have permissions. -+ */ -+ -+ /* -+ * Rounding up the user allocation size could have overflowed -+ * a large, bogus allocation (like -1ULL) to 0. -+ */ -+ if (scratch_size < size || -+ !DTRACE_INSCRATCH(mstate, scratch_size)) { -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); -+ regs[rd] = 0; -+ break; -+ } -+ -+ if (subr == DIF_SUBR_COPYIN) { -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); -+ dtrace_copyin(tupregs[0].dttk_value, dest, size, flags); -+ DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); -+ } -+ -+ mstate->dtms_scratch_ptr += scratch_size; -+ regs[rd] = dest; -+ break; -+ } -+ -+ case DIF_SUBR_COPYINTO: { -+ uint64_t size = tupregs[1].dttk_value; -+ uintptr_t dest = tupregs[2].dttk_value; -+ -+ /* -+ * This action doesn't require any credential checks since -+ * probes will not activate in user contexts to which the -+ * enabling user does not have permissions. -+ */ -+ if (!dtrace_inscratch(dest, size, mstate)) { -+ *flags |= CPU_DTRACE_BADADDR; -+ *illval = regs[rd]; -+ break; -+ } -+ -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); -+ dtrace_copyin(tupregs[0].dttk_value, dest, size, flags); -+ DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); -+ break; -+ } -+ -+ case DIF_SUBR_COPYINSTR: { -+ uintptr_t dest = mstate->dtms_scratch_ptr; -+ uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; -+ -+ if (nargs > 1 && tupregs[1].dttk_value < size) -+ size = tupregs[1].dttk_value + 1; -+ -+ /* -+ * This action doesn't require any credential checks since -+ * probes will not activate in user contexts to which the -+ * enabling user does not have permissions. -+ */ -+ if (!DTRACE_INSCRATCH(mstate, size)) { -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); -+ regs[rd] = 0; -+ break; -+ } -+ -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); -+ dtrace_copyinstr(tupregs[0].dttk_value, dest, size, flags); -+ DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); -+ -+ ((char *)dest)[size - 1] = '\0'; -+ mstate->dtms_scratch_ptr += size; -+ regs[rd] = dest; -+ break; -+ } -+ -+#if 0 /* FIXME */ -+ case DIF_SUBR_MSGSIZE: -+ case DIF_SUBR_MSGDSIZE: { -+ uintptr_t baddr = tupregs[0].dttk_value, daddr; -+ uintptr_t wptr, rptr; -+ size_t count = 0; -+ int cont = 0; -+ -+ while (baddr != NULL && !(*flags & CPU_DTRACE_FAULT)) { -+ -+ if (!dtrace_canload(baddr, sizeof(mblk_t), mstate, -+ vstate)) { -+ regs[rd] = 0; -+ break; -+ } -+ -+ wptr = dtrace_loadptr(baddr + -+ offsetof(mblk_t, b_wptr)); -+ -+ rptr = dtrace_loadptr(baddr + -+ offsetof(mblk_t, b_rptr)); -+ -+ if (wptr < rptr) { -+ *flags |= CPU_DTRACE_BADADDR; -+ *illval = tupregs[0].dttk_value; -+ break; -+ } -+ -+ daddr = dtrace_loadptr(baddr + -+ offsetof(mblk_t, b_datap)); -+ -+ baddr = dtrace_loadptr(baddr + -+ offsetof(mblk_t, b_cont)); -+ -+ /* -+ * We want to prevent against denial-of-service here, -+ * so we're only going to search the list for -+ * dtrace_msgdsize_max mblks. -+ */ -+ if (cont++ > dtrace_msgdsize_max) { -+ *flags |= CPU_DTRACE_ILLOP; -+ break; -+ } -+ -+ if (subr == DIF_SUBR_MSGDSIZE) { -+ if (dtrace_load8(daddr + -+ offsetof(dblk_t, db_type)) != M_DATA) -+ continue; -+ } -+ -+ count += wptr - rptr; -+ } -+ -+ if (!(*flags & CPU_DTRACE_FAULT)) -+ regs[rd] = count; -+ -+ break; -+ } -+#endif -+ -+ case DIF_SUBR_PROGENYOF: { -+ pid_t pid = tupregs[0].dttk_value; -+ struct task_struct *p; -+ int rval = 0; -+ -+ for (p = current; p != NULL; p = p->real_parent) { -+ if (p->pid == pid) { -+ rval = 1; -+ break; -+ } -+ -+ if (p == p->real_parent) -+ break; -+ } -+ -+ regs[rd] = rval; -+ break; -+ } -+ -+ case DIF_SUBR_SPECULATION: -+ regs[rd] = dtrace_speculation(state); -+ break; -+ -+ case DIF_SUBR_COPYOUT: { -+ uintptr_t kaddr = tupregs[0].dttk_value; -+ uintptr_t uaddr = tupregs[1].dttk_value; -+ uint64_t size = tupregs[2].dttk_value; -+ -+ if (!dtrace_destructive_disallow && -+ dtrace_priv_proc_control(state) && -+ !dtrace_istoxic(kaddr, size) && -+ dtrace_canload(kaddr, size, mstate, vstate)) { -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); -+ dtrace_copyout(kaddr, uaddr, size, flags); -+ DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); -+ } -+ break; -+ } -+ -+ case DIF_SUBR_COPYOUTSTR: { -+ uintptr_t kaddr = tupregs[0].dttk_value; -+ uintptr_t uaddr = tupregs[1].dttk_value; -+ uint64_t size = tupregs[2].dttk_value; -+ -+ if (!dtrace_destructive_disallow && -+ dtrace_priv_proc_control(state) && -+ !dtrace_istoxic(kaddr, size) && -+ dtrace_strcanload(kaddr, size, mstate, vstate)) { -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); -+ dtrace_copyoutstr(kaddr, uaddr, size, flags); -+ DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); -+ } -+ break; -+ } -+ -+ case DIF_SUBR_STRLEN: { -+ size_t sz; -+ uintptr_t addr = (uintptr_t)tupregs[0].dttk_value; -+ -+ sz = dtrace_strlen((char *)addr, -+ state->dts_options[DTRACEOPT_STRSIZE]); -+ -+ if (!dtrace_canload(addr, sz + 1, mstate, vstate)) { -+ regs[rd] = 0; -+ break; -+ } -+ -+ regs[rd] = sz; -+ -+ break; -+ } -+ -+ case DIF_SUBR_STRCHR: -+ case DIF_SUBR_STRRCHR: { -+ /* -+ * We're going to iterate over the string looking for the -+ * specified character. We will iterate until we have reached -+ * the string length or we have found the character. If this -+ * is DIF_SUBR_STRRCHR, we will look for the last occurrence -+ * of the specified character instead of the first. -+ */ -+ uintptr_t saddr = tupregs[0].dttk_value; -+ uintptr_t addr = tupregs[0].dttk_value; -+ uintptr_t limit = addr + -+ state->dts_options[DTRACEOPT_STRSIZE]; -+ char c, target = (char)tupregs[1].dttk_value; -+ -+ for (regs[rd] = 0; addr < limit; addr++) { -+ c = dtrace_load8(addr); -+ if (c == target) { -+ regs[rd] = addr; -+ -+ if (subr == DIF_SUBR_STRCHR) -+ break; -+ } -+ -+ if (c == '\0') -+ break; -+ } -+ -+ if (!dtrace_canload(saddr, addr - saddr, mstate, vstate)) { -+ regs[rd] = 0; -+ break; -+ } -+ -+ break; -+ } -+ -+ case DIF_SUBR_STRSTR: -+ case DIF_SUBR_INDEX: -+ case DIF_SUBR_RINDEX: { -+ /* -+ * We're going to iterate over the string looking for the -+ * specified string. We will iterate until we have reached -+ * the string length or we have found the string. (Yes, this -+ * is done in the most naive way possible -- but considering -+ * that the string we're searching for is likely to be -+ * relatively short, the complexity of Rabin-Karp or similar -+ * hardly seems merited.) -+ */ -+ char *addr = (char *)(uintptr_t) -+ tupregs[0].dttk_value; -+ char *substr = (char *)(uintptr_t) -+ tupregs[1].dttk_value; -+ uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; -+ size_t len = dtrace_strlen(addr, size); -+ size_t sublen = dtrace_strlen(substr, size); -+ char *limit = addr + len, *orig = addr; -+ int notfound = subr == DIF_SUBR_STRSTR ? 0 : -1; -+ int inc = 1; -+ -+ regs[rd] = notfound; -+ -+ if (!dtrace_canload((uintptr_t)addr, len + 1, mstate, vstate)) { -+ regs[rd] = 0; -+ break; -+ } -+ -+ if (!dtrace_canload((uintptr_t)substr, sublen + 1, mstate, -+ vstate)) { -+ regs[rd] = 0; -+ break; -+ } -+ -+ /* -+ * strstr() and index()/rindex() have similar semantics if -+ * both strings are the empty string: strstr() returns a -+ * pointer to the (empty) string, and index() and rindex() -+ * both return index 0 (regardless of any position argument). -+ */ -+ if (sublen == 0 && len == 0) { -+ if (subr == DIF_SUBR_STRSTR) -+ regs[rd] = (uintptr_t)addr; -+ else -+ regs[rd] = 0; -+ break; -+ } -+ -+ if (subr != DIF_SUBR_STRSTR) { -+ if (subr == DIF_SUBR_RINDEX) { -+ limit = orig - 1; -+ addr += len; -+ inc = -1; -+ } -+ -+ /* -+ * Both index() and rindex() take an optional position -+ * argument that denotes the starting position. -+ */ -+ if (nargs == 3) { -+ int64_t pos = (int64_t)tupregs[2].dttk_value; -+ -+ /* -+ * If the position argument to index() is -+ * negative, Perl implicitly clamps it at -+ * zero. This semantic is a little surprising -+ * given the special meaning of negative -+ * positions to similar Perl functions like -+ * substr(), but it appears to reflect a -+ * notion that index() can start from a -+ * negative index and increment its way up to -+ * the string. Given this notion, Perl's -+ * rindex() is at least self-consistent in -+ * that it implicitly clamps positions greater -+ * than the string length to be the string -+ * length. Where Perl completely loses -+ * coherence, however, is when the specified -+ * substring is the empty string (""). In -+ * this case, even if the position is -+ * negative, rindex() returns 0 -- and even if -+ * the position is greater than the length, -+ * index() returns the string length. These -+ * semantics violate the notion that index() -+ * should never return a value less than the -+ * specified position and that rindex() should -+ * never return a value greater than the -+ * specified position. (One assumes that -+ * these semantics are artifacts of Perl's -+ * implementation and not the results of -+ * deliberate design -- it beggars belief that -+ * even Larry Wall could desire such oddness.) -+ * While in the abstract one would wish for -+ * consistent position semantics across -+ * substr(), index() and rindex() -- or at the -+ * very least self-consistent position -+ * semantics for index() and rindex() -- we -+ * instead opt to keep with the extant Perl -+ * semantics, in all their broken glory. (Do -+ * we have more desire to maintain Perl's -+ * semantics than Perl does? Probably.) -+ */ -+ if (subr == DIF_SUBR_RINDEX) { -+ if (pos < 0) { -+ if (sublen == 0) -+ regs[rd] = 0; -+ break; -+ } -+ -+ if (pos > len) -+ pos = len; -+ } else { -+ if (pos < 0) -+ pos = 0; -+ -+ if (pos >= len) { -+ if (sublen == 0) -+ regs[rd] = len; -+ break; -+ } -+ } -+ -+ addr = orig + pos; -+ } -+ } -+ -+ for (regs[rd] = notfound; addr != limit; addr += inc) { -+ if (dtrace_strncmp(addr, substr, sublen) == 0) { -+ if (subr != DIF_SUBR_STRSTR) { -+ /* -+ * As D index() and rindex() are -+ * modeled on Perl (and not on awk), -+ * we return a zero-based (and not a -+ * one-based) index. (For you Perl -+ * weenies: no, we're not going to add -+ * $[ -- and shouldn't you be at a con -+ * or something?) -+ */ -+ regs[rd] = (uintptr_t)(addr - orig); -+ break; -+ } -+ -+ ASSERT(subr == DIF_SUBR_STRSTR); -+ regs[rd] = (uintptr_t)addr; -+ break; -+ } -+ } -+ -+ break; -+ } -+ -+ case DIF_SUBR_STRTOK: { -+ uintptr_t addr = tupregs[0].dttk_value; -+ uintptr_t tokaddr = tupregs[1].dttk_value; -+ uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; -+ uintptr_t limit, toklimit = tokaddr + size; -+ uint8_t c = 0, tokmap[32]; /* 256 / 8 */ -+ char *dest = (char *)mstate->dtms_scratch_ptr; -+ int i; -+ -+ /* -+ * Check both the token buffer and (later) the input buffer, -+ * since both could be non-scratch addresses. -+ */ -+ if (!dtrace_strcanload(tokaddr, size, mstate, vstate)) { -+ regs[rd] = 0; -+ break; -+ } -+ -+ if (!DTRACE_INSCRATCH(mstate, size)) { -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); -+ regs[rd] = 0; -+ break; -+ } -+ -+ if (addr == (uintptr_t)NULL) { -+ /* -+ * If the address specified is NULL, we use our saved -+ * strtok pointer from the mstate. Note that this -+ * means that the saved strtok pointer is _only_ -+ * valid within multiple enablings of the same probe -- -+ * it behaves like an implicit clause-local variable. -+ */ -+ addr = mstate->dtms_strtok; -+ } else { -+ /* -+ * If the user-specified address is non-NULL we must -+ * access check it. This is the only time we have -+ * a chance to do so, since this address may reside -+ * in the string table of this clause-- future calls -+ * (when we fetch addr from mstate->dtms_strtok) -+ * would fail this access check. -+ */ -+ if (!dtrace_strcanload(addr, size, mstate, vstate)) { -+ regs[rd] = 0; -+ break; -+ } -+ } -+ -+ /* -+ * First, zero the token map, and then process the token -+ * string -- setting a bit in the map for every character -+ * found in the token string. -+ */ -+ for (i = 0; i < sizeof(tokmap); i++) -+ tokmap[i] = 0; -+ -+ for (; tokaddr < toklimit; tokaddr++) { -+ c = dtrace_load8(tokaddr); -+ if (c == '\0') -+ break; -+ -+ ASSERT((c >> 3) < sizeof(tokmap)); -+ tokmap[c >> 3] |= (1 << (c & 0x7)); -+ } -+ -+ for (limit = addr + size; addr < limit; addr++) { -+ /* -+ * We're looking for a character that is _not_ contained -+ * in the token string. -+ */ -+ c = dtrace_load8(addr); -+ if (c == '\0') -+ break; -+ -+ if (!(tokmap[c >> 3] & (1 << (c & 0x7)))) -+ break; -+ } -+ -+ if (c == '\0') { -+ /* -+ * We reached the end of the string without finding -+ * any character that was not in the token string. -+ * We return NULL in this case, and we set the saved -+ * address to NULL as well. -+ */ -+ regs[rd] = 0; -+ mstate->dtms_strtok = (uintptr_t)NULL; -+ break; -+ } -+ -+ /* -+ * From here on, we're copying into the destination string. -+ */ -+ for (i = 0; addr < limit && i < size - 1; addr++) { -+ c = dtrace_load8(addr); -+ if (c == '\0') -+ break; -+ -+ if (tokmap[c >> 3] & (1 << (c & 0x7))) -+ break; -+ -+ ASSERT(i < size); -+ dest[i++] = c; -+ } -+ -+ ASSERT(i < size); -+ dest[i] = '\0'; -+ regs[rd] = (uintptr_t)dest; -+ mstate->dtms_scratch_ptr += size; -+ mstate->dtms_strtok = addr; -+ break; -+ } -+ -+ case DIF_SUBR_SUBSTR: { -+ uintptr_t s = tupregs[0].dttk_value; -+ uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; -+ char *d = (char *)mstate->dtms_scratch_ptr; -+ int64_t index = (int64_t)tupregs[1].dttk_value; -+ int64_t remaining = (int64_t)tupregs[2].dttk_value; -+ size_t len = dtrace_strlen((char *)s, size); -+ int64_t i = 0; -+ -+ if (!dtrace_canload(s, len + 1, mstate, vstate)) { -+ regs[rd] = 0; -+ break; -+ } -+ -+ if (!DTRACE_INSCRATCH(mstate, size)) { -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); -+ regs[rd] = 0; -+ break; -+ } -+ -+ if (nargs <= 2) -+ remaining = (int64_t)size; -+ -+ if (index < 0) { -+ index += len; -+ -+ if (index < 0 && index + remaining > 0) { -+ remaining += index; -+ index = 0; -+ } -+ } -+ -+ if (index >= len || index < 0) -+ remaining = 0; -+ else if (remaining < 0) -+ remaining += len - index; -+ else if (index + remaining > size) -+ remaining = size - index; -+ -+ for (i = 0; i < remaining; i++) { -+ d[i] = dtrace_load8(s + index + i); -+ if (d[i] == '\0') -+ break; -+ } -+ -+ d[i] = '\0'; -+ -+ mstate->dtms_scratch_ptr += size; -+ regs[rd] = (uintptr_t)d; -+ break; -+ } -+ -+ case DIF_SUBR_GETMAJOR: -+ regs[rd] = MAJOR(tupregs[0].dttk_value); -+ break; -+ -+ case DIF_SUBR_GETMINOR: -+ regs[rd] = MINOR(tupregs[0].dttk_value); -+ break; -+ -+#if 0 /* FIXME */ -+ case DIF_SUBR_DDI_PATHNAME: { -+ /* -+ * This one is a galactic mess. We are going to roughly -+ * emulate ddi_pathname(), but it's made more complicated -+ * by the fact that we (a) want to include the minor name and -+ * (b) must proceed iteratively instead of recursively. -+ */ -+ uintptr_t dest = mstate->dtms_scratch_ptr; -+ uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; -+ char *start = (char *)dest, *end = start + size - 1; -+ uintptr_t daddr = tupregs[0].dttk_value; -+ int64_t minor = (int64_t)tupregs[1].dttk_value; -+ char *s; -+ int i, len, depth = 0; -+ -+ /* -+ * Due to all the pointer jumping we do and context we must -+ * rely upon, we just mandate that the user must have kernel -+ * read privileges to use this routine. -+ */ -+ if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) == 0) { -+ *flags |= CPU_DTRACE_KPRIV; -+ *illval = daddr; -+ regs[rd] = 0; -+ } -+ -+ if (!DTRACE_INSCRATCH(mstate, size)) { -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); -+ regs[rd] = 0; -+ break; -+ } -+ -+ *end = '\0'; -+ -+ /* -+ * We want to have a name for the minor. In order to do this, -+ * we need to walk the minor list from the devinfo. We want -+ * to be sure that we don't infinitely walk a circular list, -+ * so we check for circularity by sending a scout pointer -+ * ahead two elements for every element that we iterate over; -+ * if the list is circular, these will ultimately point to the -+ * same element. You may recognize this little trick as the -+ * answer to a stupid interview question -- one that always -+ * seems to be asked by those who had to have it laboriously -+ * explained to them, and who can't even concisely describe -+ * the conditions under which one would be forced to resort to -+ * this technique. Needless to say, those conditions are -+ * found here -- and probably only here. Is this the only use -+ * of this infamous trick in shipping, production code? If it -+ * isn't, it probably should be... -+ */ -+ if (minor != -1) { -+ uintptr_t maddr = dtrace_loadptr(daddr + -+ offsetof(struct dev_info, devi_minor)); -+ -+ uintptr_t next = offsetof(struct ddi_minor_data, next); -+ uintptr_t name = offsetof(struct ddi_minor_data, -+ d_minor) + offsetof(struct ddi_minor, name); -+ uintptr_t dev = offsetof(struct ddi_minor_data, -+ d_minor) + offsetof(struct ddi_minor, dev); -+ uintptr_t scout; -+ -+ if (maddr != NULL) -+ scout = dtrace_loadptr(maddr + next); -+ -+ while (maddr != NULL && !(*flags & CPU_DTRACE_FAULT)) { -+ uint64_t m; -+#ifdef _LP64 -+ m = dtrace_load64(maddr + dev) & MAXMIN64; -+#else -+ m = dtrace_load32(maddr + dev) & MAXMIN; -+#endif -+ if (m != minor) { -+ maddr = dtrace_loadptr(maddr + next); -+ -+ if (scout == NULL) -+ continue; -+ -+ scout = dtrace_loadptr(scout + next); -+ -+ if (scout == NULL) -+ continue; -+ -+ scout = dtrace_loadptr(scout + next); -+ -+ if (scout == NULL) -+ continue; -+ -+ if (scout == maddr) { -+ *flags |= CPU_DTRACE_ILLOP; -+ break; -+ } -+ -+ continue; -+ } -+ -+ /* -+ * We have the minor data. Now we need to -+ * copy the minor's name into the end of the -+ * pathname. -+ */ -+ s = (char *)dtrace_loadptr(maddr + name); -+ len = dtrace_strlen(s, size); -+ -+ if (*flags & CPU_DTRACE_FAULT) -+ break; -+ -+ if (len != 0) { -+ end -= len + 1; -+ if (end < start) -+ break; -+ -+ *end = ':'; -+ } -+ -+ for (i = 1; i <= len; i++) -+ end[i] = dtrace_load8((uintptr_t)s++); -+ break; -+ } -+ } -+ -+ while (daddr != NULL && !(*flags & CPU_DTRACE_FAULT)) { -+ ddi_node_state_t devi_state; -+ -+ devi_state = dtrace_load32(daddr + -+ offsetof(struct dev_info, devi_node_state)); -+ -+ if (*flags & CPU_DTRACE_FAULT) -+ break; -+ -+ if (devi_state >= DS_INITIALIZED) { -+ s = (char *)dtrace_loadptr(daddr + -+ offsetof(struct dev_info, devi_addr)); -+ len = dtrace_strlen(s, size); -+ -+ if (*flags & CPU_DTRACE_FAULT) -+ break; -+ -+ if (len != 0) { -+ end -= len + 1; -+ if (end < start) -+ break; -+ -+ *end = '@'; -+ } -+ -+ for (i = 1; i <= len; i++) -+ end[i] = dtrace_load8((uintptr_t)s++); -+ } -+ -+ /* -+ * Now for the node name... -+ */ -+ s = (char *)dtrace_loadptr(daddr + -+ offsetof(struct dev_info, devi_node_name)); -+ -+ daddr = dtrace_loadptr(daddr + -+ offsetof(struct dev_info, devi_parent)); -+ -+ /* -+ * If our parent is NULL (that is, if we're the root -+ * node), we're going to use the special path -+ * "devices". -+ */ -+ if (daddr == NULL) -+ s = "devices"; -+ -+ len = dtrace_strlen(s, size); -+ if (*flags & CPU_DTRACE_FAULT) -+ break; -+ -+ end -= len + 1; -+ if (end < start) -+ break; -+ -+ for (i = 1; i <= len; i++) -+ end[i] = dtrace_load8((uintptr_t)s++); -+ *end = '/'; -+ -+ if (depth++ > dtrace_devdepth_max) { -+ *flags |= CPU_DTRACE_ILLOP; -+ break; -+ } -+ } -+ -+ if (end < start) -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); -+ -+ if (daddr == NULL) { -+ regs[rd] = (uintptr_t)end; -+ mstate->dtms_scratch_ptr += size; -+ } -+ -+ break; -+ } -+#endif -+ -+ case DIF_SUBR_STRJOIN: { -+ char *d = (char *)mstate->dtms_scratch_ptr; -+ uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; -+ uintptr_t s1 = tupregs[0].dttk_value; -+ uintptr_t s2 = tupregs[1].dttk_value; -+ int i = 0; -+ -+ if (!dtrace_strcanload(s1, size, mstate, vstate) || -+ !dtrace_strcanload(s2, size, mstate, vstate)) { -+ regs[rd] = 0; -+ break; -+ } -+ -+ if (!DTRACE_INSCRATCH(mstate, size)) { -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); -+ regs[rd] = 0; -+ break; -+ } -+ -+ for (;;) { -+ if (i >= size) { -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); -+ regs[rd] = 0; -+ break; -+ } -+ -+ d[i] = dtrace_load8(s1++); -+ if ((d[i++]) == '\0') { -+ i--; -+ break; -+ } -+ } -+ -+ for (;;) { -+ if (i >= size) { -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); -+ regs[rd] = 0; -+ break; -+ } -+ -+ d[i] = dtrace_load8(s2++); -+ if ((d[i++]) == '\0') -+ break; -+ } -+ -+ if (i < size) { -+ mstate->dtms_scratch_ptr += i; -+ regs[rd] = (uintptr_t)d; -+ } -+ -+ break; -+ } -+ -+ case DIF_SUBR_LLTOSTR: { -+ int64_t i = (int64_t)tupregs[0].dttk_value; -+ int64_t val = i < 0 ? i * -1 : i; -+ uint64_t size = 22; /* room for 2^64 in dec */ -+ char *end = (char *)mstate->dtms_scratch_ptr + size -+ - 1; -+ -+ if (!DTRACE_INSCRATCH(mstate, size)) { -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); -+ regs[rd] = 0; -+ break; -+ } -+ -+ /* -+ * GCC on Linux introduces calls to functions that are not -+ * linked into the kernel image, so we need to use the do_div() -+ * function instead. It modifies the first argument in place -+ * (replaces it with the quotient), and returns the remainder. -+ * -+ * Was: -+ * for (*end-- = '\0'; val; val /= 10) -+ * *end-- = '0' + (val % 10); -+ */ -+ for (*end-- = '\0'; val; ) -+ *end-- = '0' + do_div(val, 10); -+ -+ if (i == 0) -+ *end-- = '0'; -+ -+ if (i < 0) -+ *end-- = '-'; -+ -+ regs[rd] = (uintptr_t)end + 1; -+ mstate->dtms_scratch_ptr += size; -+ break; -+ } -+ -+ case DIF_SUBR_HTONS: -+ case DIF_SUBR_NTOHS: -+#ifdef __BIG_ENDIAN -+ regs[rd] = (uint16_t)tupregs[0].dttk_value; -+#else -+ regs[rd] = DT_BSWAP_16((uint16_t)tupregs[0].dttk_value); -+#endif -+ break; -+ -+ -+ case DIF_SUBR_HTONL: -+ case DIF_SUBR_NTOHL: -+#ifdef __BIG_ENDIAN -+ regs[rd] = (uint32_t)tupregs[0].dttk_value; -+#else -+ regs[rd] = DT_BSWAP_32((uint32_t)tupregs[0].dttk_value); -+#endif -+ break; -+ -+ -+ case DIF_SUBR_HTONLL: -+ case DIF_SUBR_NTOHLL: -+#ifdef __BIG_ENDIAN -+ regs[rd] = (uint64_t)tupregs[0].dttk_value; -+#else -+ regs[rd] = DT_BSWAP_64((uint64_t)tupregs[0].dttk_value); -+#endif -+ break; -+ -+ -+ case DIF_SUBR_DIRNAME: -+ case DIF_SUBR_BASENAME: { -+ char *dest = (char *)mstate->dtms_scratch_ptr; -+ uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; -+ uintptr_t src = tupregs[0].dttk_value; -+ int i, j, len = dtrace_strlen((char *)src, size); -+ int lastbase = -1, firstbase = -1, lastdir = -1; -+ int start, end; -+ -+ if (!dtrace_canload(src, len + 1, mstate, vstate)) { -+ regs[rd] = 0; -+ break; -+ } -+ -+ if (!DTRACE_INSCRATCH(mstate, size)) { -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); -+ regs[rd] = 0; -+ break; -+ } -+ -+ /* -+ * The basename and dirname for a zero-length string is -+ * defined to be "." -+ */ -+ if (len == 0) { -+ len = 1; -+ src = (uintptr_t)"."; -+ } -+ -+ /* -+ * Start from the back of the string, moving back toward the -+ * front until we see a character that isn't a slash. That -+ * character is the last character in the basename. -+ */ -+ for (i = len - 1; i >= 0; i--) { -+ if (dtrace_load8(src + i) != '/') -+ break; -+ } -+ -+ if (i >= 0) -+ lastbase = i; -+ -+ /* -+ * Starting from the last character in the basename, move -+ * towards the front until we find a slash. The character -+ * that we processed immediately before that is the first -+ * character in the basename. -+ */ -+ for (; i >= 0; i--) { -+ if (dtrace_load8(src + i) == '/') -+ break; -+ } -+ -+ if (i >= 0) -+ firstbase = i + 1; -+ -+ /* -+ * Now keep going until we find a non-slash character. That -+ * character is the last character in the dirname. -+ */ -+ for (; i >= 0; i--) { -+ if (dtrace_load8(src + i) != '/') -+ break; -+ } -+ -+ if (i >= 0) -+ lastdir = i; -+ -+ ASSERT(!(lastbase == -1 && firstbase != -1)); -+ ASSERT(!(firstbase == -1 && lastdir != -1)); -+ -+ if (lastbase == -1) { -+ /* -+ * We didn't find a non-slash character. We know that -+ * the length is non-zero, so the whole string must be -+ * slashes. In either the dirname or the basename -+ * case, we return '/'. -+ */ -+ ASSERT(firstbase == -1); -+ firstbase = lastbase = lastdir = 0; -+ } -+ -+ if (firstbase == -1) { -+ /* -+ * The entire string consists only of a basename -+ * component. If we're looking for dirname, we need -+ * to change our string to be just "."; if we're -+ * looking for a basename, we'll just set the first -+ * character of the basename to be 0. -+ */ -+ if (subr == DIF_SUBR_DIRNAME) { -+ ASSERT(lastdir == -1); -+ src = (uintptr_t)"."; -+ lastdir = 0; -+ } else { -+ firstbase = 0; -+ } -+ } -+ -+ if (subr == DIF_SUBR_DIRNAME) { -+ if (lastdir == -1) { -+ /* -+ * We know that we have a slash in the name -- -+ * or lastdir would be set to 0, above. And -+ * because lastdir is -1, we know that this -+ * slash must be the first character. (That -+ * is, the full string must be of the form -+ * "/basename".) In this case, the last -+ * character of the directory name is 0. -+ */ -+ lastdir = 0; -+ } -+ -+ start = 0; -+ end = lastdir; -+ } else { -+ ASSERT(subr == DIF_SUBR_BASENAME); -+ ASSERT(firstbase != -1 && lastbase != -1); -+ start = firstbase; -+ end = lastbase; -+ } -+ -+ for (i = start, j = 0; i <= end && j < size - 1; i++, j++) -+ dest[j] = dtrace_load8(src + i); -+ -+ dest[j] = '\0'; -+ regs[rd] = (uintptr_t)dest; -+ mstate->dtms_scratch_ptr += size; -+ break; -+ } -+ -+ case DIF_SUBR_CLEANPATH: { -+ char *dest = (char *)mstate->dtms_scratch_ptr, c; -+ uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; -+ uintptr_t src = tupregs[0].dttk_value; -+ int i = 0, j = 0; -+ -+ if (!dtrace_strcanload(src, size, mstate, vstate)) { -+ regs[rd] = 0; -+ break; -+ } -+ -+ if (!DTRACE_INSCRATCH(mstate, size)) { -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); -+ regs[rd] = 0; -+ break; -+ } -+ -+ /* -+ * Move forward, loading each character. -+ */ -+ do { -+ c = dtrace_load8(src + i++); -+next: -+ if (j + 5 >= size) /* 5 = strlen("/..c\0") */ -+ break; -+ -+ if (c != '/') { -+ dest[j++] = c; -+ continue; -+ } -+ -+ c = dtrace_load8(src + i++); -+ -+ if (c == '/') { -+ /* -+ * We have two slashes -- we can just advance -+ * to the next character. -+ */ -+ goto next; -+ } -+ -+ if (c != '.') { -+ /* -+ * This is not "." and it's not ".." -- we can -+ * just store the "/" and this character and -+ * drive on. -+ */ -+ dest[j++] = '/'; -+ dest[j++] = c; -+ continue; -+ } -+ -+ c = dtrace_load8(src + i++); -+ -+ if (c == '/') { -+ /* -+ * This is a "/./" component. We're not going -+ * to store anything in the destination buffer; -+ * we're just going to go to the next component. -+ */ -+ goto next; -+ } -+ -+ if (c != '.') { -+ /* -+ * This is not ".." -- we can just store the -+ * "/." and this character and continue -+ * processing. -+ */ -+ dest[j++] = '/'; -+ dest[j++] = '.'; -+ dest[j++] = c; -+ continue; -+ } -+ -+ c = dtrace_load8(src + i++); -+ -+ if (c != '/' && c != '\0') { -+ /* -+ * This is not ".." -- it's "..[mumble]". -+ * We'll store the "/.." and this character -+ * and continue processing. -+ */ -+ dest[j++] = '/'; -+ dest[j++] = '.'; -+ dest[j++] = '.'; -+ dest[j++] = c; -+ continue; -+ } -+ -+ /* -+ * This is "/../" or "/..\0". We need to back up -+ * our destination pointer until we find a "/". -+ */ -+ i--; -+ while (j != 0 && dest[--j] != '/') -+ continue; -+ -+ if (c == '\0') -+ dest[++j] = '/'; -+ } while (c != '\0'); -+ -+ dest[j] = '\0'; -+ regs[rd] = (uintptr_t)dest; -+ mstate->dtms_scratch_ptr += size; -+ break; -+ } -+ -+ case DIF_SUBR_LINK_NTOP: { -+ struct dtrace_hwtype_alen { -+ int dhwa_hwtype; -+ size_t dhwa_hwalen; -+ } hwinfo[] = { -+ { ARPHRD_ETHER, ETH_ALEN }, -+ { ARPHRD_INFINIBAND, INFINIBAND_ALEN }, -+ { -1, 0 } -+ }; -+/* -+ * Captures the maximum hardware address length among all the supported -+ * hardware types. Please update this macro when adding a new hardware type. -+ */ -+#define DTRACE_MAX_HWTYPE_ALEN (ETH_ALEN > INFINIBAND_ALEN ? \ -+ ETH_ALEN : INFINIBAND_ALEN) -+ uintptr_t src = tupregs[1].dttk_value; -+ int hwtype = tupregs[0].dttk_value; -+ uint8_t hwaddr[DTRACE_MAX_HWTYPE_ALEN]; -+ char *base; -+ size_t size, len; -+ int i; -+ -+ for (i = 0; hwinfo[i].dhwa_hwtype != -1; i++) { -+ if (hwtype == hwinfo[i].dhwa_hwtype) -+ break; -+ } -+ if (hwinfo[i].dhwa_hwtype == -1) { -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); -+ regs[rd] = 0; -+ break; -+ } -+ len = hwinfo[i].dhwa_hwalen; -+ -+ /* -+ * Safely load the hardware address. -+ */ -+ if (!dtrace_canload(src, len, mstate, vstate)) { -+ regs[rd] = 0; -+ break; -+ } -+ dtrace_bcopy((void *)src, hwaddr, len); -+ -+ /* -+ * Check if a hardware address string will fit in scratch. -+ * For every byte we need 3 characters (including ':'). -+ */ -+ size = len * 3; -+ if (!DTRACE_INSCRATCH(mstate, size)) { -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); -+ regs[rd] = 0; -+ break; -+ } -+ base = (char *)mstate->dtms_scratch_ptr; -+ -+ /* -+ * Build the Hardware address string by working through the -+ * address from the beginning. Given a hardware address -+ * {0xa0, 0xaa, 0xff, 0xc, 0, 1, 2} of length 6, it will build -+ * a0:aa:ff:0c:00:01:02. -+ */ -+ for (i = 0; i < len; i++) { -+ if (hwaddr[i] < 16) { -+ *base++ = '0'; -+ *base++ = hexdigits[hwaddr[i]]; -+ } else { -+ *base++ = hexdigits[hwaddr[i] / 16]; -+ *base++ = hexdigits[hwaddr[i] % 16]; -+ } -+ -+ if (i < len - 1) -+ *base++ = ':'; -+ } -+ *base++ = '\0'; -+ regs[rd] = mstate->dtms_scratch_ptr; -+ mstate->dtms_scratch_ptr += size; -+#undef DTRACE_MAX_HWTYPE_ALEN -+ break; -+ } -+ -+ case DIF_SUBR_INET_NTOA: -+ case DIF_SUBR_INET_NTOA6: -+ case DIF_SUBR_INET_NTOP: { -+ uintptr_t src; -+ size_t size; -+ int af, argi, i; -+ char *base, *end; -+ -+ if (subr == DIF_SUBR_INET_NTOP) { -+ af = (int)tupregs[0].dttk_value; -+ argi = 1; -+ } else { -+ af = subr == DIF_SUBR_INET_NTOA ? AF_INET : AF_INET6; -+ argi = 0; -+ } -+ -+ src = tupregs[argi].dttk_value; -+ if (af == AF_INET) { -+ ipaddr_t ip4; -+ ipaddr_t_p ptr4; -+ uint8_t *ptr8, val; -+ -+ /* -+ * Safely load the IPv4 address. -+ */ -+ if (!dtrace_canload(src, 4, mstate, vstate)) { -+ regs[rd] = 0; -+ break; -+ } -+ ip4 = dtrace_load32(src); -+ -+ /* -+ * Check an IPv4 string will fit in scratch. -+ */ -+ size = INET_ADDRSTRLEN; -+ if (!DTRACE_INSCRATCH(mstate, size)) { -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); -+ regs[rd] = 0; -+ break; -+ } -+ base = (char *)mstate->dtms_scratch_ptr; -+ end = (char *)mstate->dtms_scratch_ptr + size - 1; -+ -+ /* -+ * Stringify as a dotted decimal quad. -+ */ -+ *end-- = '\0'; -+ ptr4 = &ip4; -+ ptr8 = (uint8_t *)ptr4; -+ for (i = 3; i >= 0; i--) { -+ val = ptr8[i]; -+ -+ if (val == 0) { -+ *end-- = '0'; -+ } else { -+ for (; val; val /= 10) -+ *end-- = '0' + (val % 10); -+ } -+ -+ if (i > 0) -+ *end-- = '.'; -+ } -+ ASSERT(end + 1 >= base); -+#if IS_ENABLED(CONFIG_IPV6) -+ } else if (af == AF_INET6) { -+ in6_addr_t ip6; -+ int firstzero, tryzero, numzero, v6end; -+ uint16_t val; -+ -+ /* -+ * Stringify using RFC 1884 convention 2 - 16 bit -+ * hexadecimal values with a zero-run compression. -+ * Lower case hexadecimal digits are used. -+ * eg, fe80::214:4fff:fe0b:76c8. -+ * The IPv4 embedded form is returned for inet_ntop, -+ * just the IPv4 string is returned for inet_ntoa6. -+ */ -+ -+ /* -+ * Safely load the IPv6 address. -+ */ -+ if (!dtrace_canload(src, sizeof(in6_addr_t), mstate, -+ vstate)) { -+ regs[rd] = 0; -+ break; -+ } -+ dtrace_bcopy((void *)src, (void *)(uintptr_t)&ip6, -+ sizeof(in6_addr_t)); -+ -+ /* -+ * Check an IPv6 string will fit in scratch. -+ */ -+ size = INET6_ADDRSTRLEN; -+ if (!DTRACE_INSCRATCH(mstate, size)) { -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); -+ regs[rd] = 0; -+ break; -+ } -+ base = (char *)mstate->dtms_scratch_ptr; -+ end = (char *)mstate->dtms_scratch_ptr + size - 1; -+ *end-- = '\0'; -+ -+ /* -+ * Find the longest run of 16 bit zero values -+ * for the single allowed zero compression - "::". -+ */ -+ firstzero = -1; -+ tryzero = -1; -+ numzero = 1; -+ for (i = 0; i < sizeof(in6_addr_t); i++) { -+ if (ip6.s6_addr[i] == 0 && -+ tryzero == -1 && i % 2 == 0) { -+ tryzero = i; -+ continue; -+ } -+ -+ if (tryzero != -1 && -+ (ip6.s6_addr[i] != 0 || -+ i == sizeof(in6_addr_t) - 1)) { -+ -+ if (i - tryzero <= numzero) { -+ tryzero = -1; -+ continue; -+ } -+ -+ firstzero = tryzero; -+ numzero = i - i % 2 - tryzero; -+ tryzero = -1; -+ -+ if (ip6.s6_addr[i] == 0 && -+ i == sizeof(in6_addr_t) - 1) -+ numzero += 2; -+ } -+ } -+ ASSERT(firstzero + numzero <= sizeof(in6_addr_t)); -+ -+ /* -+ * Check for an IPv4 embedded address. -+ */ -+ v6end = sizeof(in6_addr_t) - 2; -+ if (ipv6_addr_type(&ip6) & -+ (IPV6_ADDR_COMPATv4 | IPV6_ADDR_MAPPED)) { -+ for (i = sizeof(in6_addr_t) - 1; -+ i >= DTRACE_V4MAPPED_OFFSET; i--) { -+ ASSERT(end >= base); -+ -+ val = ip6.s6_addr[i]; -+ -+ if (val == 0) { -+ *end-- = '0'; -+ } else { -+ for (; val; val /= 10) -+ *end-- = '0' + val % 10; -+ } -+ -+ if (i > DTRACE_V4MAPPED_OFFSET) -+ *end-- = '.'; -+ } -+ -+ if (subr == DIF_SUBR_INET_NTOA6) -+ goto inetout; -+ -+ /* -+ * Set v6end to skip the IPv4 address that -+ * we have already stringified. -+ */ -+ v6end = 10; -+ } -+ -+ /* -+ * Build the IPv6 string by working through the -+ * address in reverse. -+ */ -+ for (i = v6end; i >= 0; i -= 2) { -+ ASSERT(end >= base); -+ -+ if (i == firstzero + numzero - 2) { -+ *end-- = ':'; -+ *end-- = ':'; -+ i -= numzero - 2; -+ continue; -+ } -+ -+ if (i < 14 && i != firstzero - 2) -+ *end-- = ':'; -+ -+ val = (ip6.s6_addr[i] << 8) + -+ ip6.s6_addr[i + 1]; -+ -+ if (val == 0) { -+ *end-- = '0'; -+ } else { -+ for (; val; val /= 16) -+ *end-- = hexdigits[val % 16]; -+ } -+ } -+ ASSERT(end + 1 >= base); -+#endif -+ } else { -+ /* -+ * The user didn't use AH_INET or AH_INET6. -+ */ -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); -+ regs[rd] = 0; -+ break; -+ } -+ -+#if IS_ENABLED(CONFIG_IPV6) -+inetout: -+#endif -+ regs[rd] = (uintptr_t)end + 1; -+ mstate->dtms_scratch_ptr += size; -+ break; -+ } -+ -+ case DIF_SUBR_D_PATH: { -+ struct path *path = (struct path *)tupregs[0].dttk_value; -+ char *dest = (char *)mstate->dtms_scratch_ptr; -+ char *ptr; -+ uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; -+ unsigned int fd; -+ struct files_struct -+ *files = current->files; -+ struct fdtable *fdt; -+ -+ if (!dtrace_canload((uintptr_t)path, sizeof(struct path), -+ mstate, vstate)) { -+ regs[rd] = 0; -+ break; -+ } -+ -+ if (!DTRACE_INSCRATCH(mstate, size)) { -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); -+ regs[rd] = 0; -+ break; -+ } -+ -+ if (spin_is_locked(&files->file_lock) || -+ !spin_trylock(&files->file_lock)) { -+ regs[rd] = 0; -+ break; -+ } -+ -+ fdt = files->fdt; -+ -+ /* -+ * We (currently) limit the d_path() subroutine to paths that -+ * relate to open files in the current task. -+ */ -+ for (fd = 0; fd < fdt->max_fds; fd++) { -+ if (fdt->fd[fd] && &fdt->fd[fd]->f_path == path) -+ break; -+ } -+ -+ spin_unlock(&files->file_lock); -+ -+ if (fd >= fdt->max_fds) { -+ *flags |= CPU_DTRACE_BADADDR; -+ *illval = (uintptr_t)path; -+ regs[rd] = 0; -+ break; -+ } -+ -+ ptr = d_path(path, dest, size); -+ if (ptr < 0) { -+ regs[rd] = 0; -+ break; -+ } -+ -+ regs[rd] = (uintptr_t)ptr; -+ mstate->dtms_scratch_ptr += size; -+ break; -+ } -+ -+ } -+} -+ -+/* -+ * Emulate the execution of DTrace IR instructions specified by the given DIF -+ * object. This function is deliberately void fo assertions as all of the -+ * necessary checks are handled by a call to dtrace_difo_validate(). -+ */ -+uint64_t dtrace_dif_emulate(struct dtrace_difo *difo, -+ struct dtrace_mstate *mstate, -+ struct dtrace_vstate *vstate, -+ struct dtrace_state *state) -+{ -+ const dif_instr_t *text = difo->dtdo_buf; -+ const uint_t textlen = difo->dtdo_len; -+ const char *strtab = difo->dtdo_strtab; -+ const uint64_t *inttab = difo->dtdo_inttab; -+ -+ uint64_t rval = 0; -+ struct dtrace_statvar *svar; -+ struct dtrace_dstate *dstate = &vstate->dtvs_dynvars; -+ struct dtrace_difv *v; -+ volatile uint16_t *flags = &this_cpu_core->cpuc_dtrace_flags; -+ volatile uintptr_t *illval = &this_cpu_core->cpuc_dtrace_illval; -+ -+ struct dtrace_key tupregs[DIF_DTR_NREGS + 2]; -+ /* +2 for thread and id */ -+ uint64_t regs[DIF_DIR_NREGS]; -+ uint64_t *tmp; -+ -+ uint8_t cc_n = 0, cc_z = 0, cc_v = 0, cc_c = 0; -+ int64_t cc_r; -+ uint_t pc = 0, id, opc = 0; -+ uint8_t ttop = 0; -+ dif_instr_t instr; -+ uint_t r1, r2, rd; -+ -+ dt_dbg_dif(" DIF %p emulation (text %p, %d instructions)...\n", -+ difo, text, textlen); -+ -+ /* -+ * We stash the current DIF object into the machine state: we need it -+ * for subsequent access checking. -+ */ -+ mstate->dtms_difo = difo; -+ -+ regs[DIF_REG_R0] = 0; /* %r0 is fixed at zero */ -+ -+ while (pc < textlen && !(*flags & CPU_DTRACE_FAULT)) { -+ opc = pc; -+ -+ instr = text[pc++]; -+ r1 = DIF_INSTR_R1(instr); -+ r2 = DIF_INSTR_R2(instr); -+ rd = DIF_INSTR_RD(instr); -+ -+ dt_dbg_dif(" Executing opcode %02x (%02x, %02x, %02x)\n", -+ DIF_INSTR_OP(instr), r1, r2, rd); -+ -+ switch (DIF_INSTR_OP(instr)) { -+ case DIF_OP_OR: -+ regs[rd] = regs[r1] | regs[r2]; -+ break; -+ case DIF_OP_XOR: -+ regs[rd] = regs[r1] ^ regs[r2]; -+ break; -+ case DIF_OP_AND: -+ regs[rd] = regs[r1] & regs[r2]; -+ break; -+ case DIF_OP_SLL: -+ regs[rd] = regs[r1] << regs[r2]; -+ break; -+ case DIF_OP_SRL: -+ regs[rd] = regs[r1] >> regs[r2]; -+ break; -+ case DIF_OP_SUB: -+ regs[rd] = regs[r1] - regs[r2]; -+ break; -+ case DIF_OP_ADD: -+ regs[rd] = regs[r1] + regs[r2]; -+ break; -+ case DIF_OP_MUL: -+ regs[rd] = regs[r1] * regs[r2]; -+ break; -+ case DIF_OP_SDIV: -+ if (regs[r2] == 0) { -+ regs[rd] = 0; -+ *flags |= CPU_DTRACE_DIVZERO; -+ } else { -+ int neg = 0; -+ -+ /* -+ * We cannot simply do a 64-bit division, since -+ * gcc translates it into a call to a function -+ * that is not linked into the kernel. -+ * -+ * regs[rd] = (int64_t)regs[r1] / -+ * (int64_t)regs[r2]; -+ */ -+ if ((int64_t)regs[r1] < 0) { -+ neg = !neg; -+ regs[r1] = -(int64_t)regs[r1]; -+ } -+ if ((int64_t)regs[r2] < 0) { -+ neg = !neg; -+ regs[r2] = -(int64_t)regs[r2]; -+ } -+ regs[rd] = regs[r1]; -+ do_div(regs[rd], regs[r2]); -+ -+ if (neg) -+ regs[rd] = -(int64_t)regs[rd]; -+ } -+ break; -+ -+ case DIF_OP_UDIV: -+ if (regs[r2] == 0) { -+ regs[rd] = 0; -+ *flags |= CPU_DTRACE_DIVZERO; -+ } else { -+ /* -+ * We cannot simply do a 64-bit division, since -+ * gcc translates it into a call to a function -+ * that is not linked into the kernel. -+ * -+ * regs[rd] = regs[r1] / regs[r2]; -+ */ -+ regs[rd] = regs[r1]; -+ do_div(regs[rd], regs[r2]); -+ } -+ break; -+ -+ case DIF_OP_SREM: -+ if (regs[r2] == 0) { -+ regs[rd] = 0; -+ *flags |= CPU_DTRACE_DIVZERO; -+ } else { -+ int neg = 0; -+ -+ /* -+ * We cannot simply do a 64-bit division, since -+ * gcc translates it into a call to a function -+ * that is not linked into the kernel. -+ * -+ * regs[rd] = (int64_t)regs[r1] % -+ * (int64_t)regs[r2]; -+ */ -+ if ((int64_t)regs[r1] < 0) { -+ neg = !neg; -+ regs[r1] = -(int64_t)regs[r1]; -+ } -+ if ((int64_t)regs[r2] < 0) { -+ neg = !neg; -+ regs[r2] = -(int64_t)regs[r2]; -+ } -+ regs[rd] = regs[r1]; -+ regs[rd] = do_div(regs[rd], regs[r2]); -+ -+ if (neg) -+ regs[rd] = -(int64_t)regs[rd]; -+ } -+ break; -+ -+ case DIF_OP_UREM: -+ if (regs[r2] == 0) { -+ regs[rd] = 0; -+ *flags |= CPU_DTRACE_DIVZERO; -+ } else { -+ /* -+ * We cannot simply do a 64-bit division, since -+ * gcc translates it into a call to a function -+ * that is not linked into the kernel. -+ * -+ * regs[rd] = regs[r1] % regs[r2]; -+ */ -+ regs[rd] = regs[r1]; -+ regs[rd] = do_div(regs[rd], regs[r2]); -+ } -+ break; -+ -+ case DIF_OP_NOT: -+ regs[rd] = ~regs[r1]; -+ break; -+ case DIF_OP_MOV: -+ regs[rd] = regs[r1]; -+ break; -+ case DIF_OP_CMP: -+ cc_r = regs[r1] - regs[r2]; -+ cc_n = cc_r < 0; -+ cc_z = cc_r == 0; -+ cc_v = 0; -+ cc_c = regs[r1] < regs[r2]; -+ break; -+ case DIF_OP_TST: -+ cc_n = cc_v = cc_c = 0; -+ cc_z = regs[r1] == 0; -+ break; -+ case DIF_OP_BA: -+ pc = DIF_INSTR_LABEL(instr); -+ break; -+ case DIF_OP_BE: -+ if (cc_z) -+ pc = DIF_INSTR_LABEL(instr); -+ break; -+ case DIF_OP_BNE: -+ if (cc_z == 0) -+ pc = DIF_INSTR_LABEL(instr); -+ break; -+ case DIF_OP_BG: -+ if ((cc_z | (cc_n ^ cc_v)) == 0) -+ pc = DIF_INSTR_LABEL(instr); -+ break; -+ case DIF_OP_BGU: -+ if ((cc_c | cc_z) == 0) -+ pc = DIF_INSTR_LABEL(instr); -+ break; -+ case DIF_OP_BGE: -+ if ((cc_n ^ cc_v) == 0) -+ pc = DIF_INSTR_LABEL(instr); -+ break; -+ case DIF_OP_BGEU: -+ if (cc_c == 0) -+ pc = DIF_INSTR_LABEL(instr); -+ break; -+ case DIF_OP_BL: -+ if (cc_n ^ cc_v) -+ pc = DIF_INSTR_LABEL(instr); -+ break; -+ case DIF_OP_BLU: -+ if (cc_c) -+ pc = DIF_INSTR_LABEL(instr); -+ break; -+ case DIF_OP_BLE: -+ if (cc_z | (cc_n ^ cc_v)) -+ pc = DIF_INSTR_LABEL(instr); -+ break; -+ case DIF_OP_BLEU: -+ if (cc_c | cc_z) -+ pc = DIF_INSTR_LABEL(instr); -+ break; -+ case DIF_OP_RLDSB: -+#ifdef FIXME_OPENSOLARIS_BUG -+ if (!dtrace_canstore(regs[r1], 1, mstate, vstate)) { -+#else -+ if (!dtrace_canload(regs[r1], 1, mstate, vstate)) { -+#endif -+ *flags |= CPU_DTRACE_KPRIV; -+ *illval = regs[r1]; -+ break; -+ } -+ /*FALLTHROUGH*/ -+ case DIF_OP_LDSB: -+ regs[rd] = (int8_t)dtrace_load8(regs[r1]); -+ break; -+ case DIF_OP_RLDSH: -+ if (!dtrace_canstore(regs[r1], 2, mstate, vstate)) { -+ *flags |= CPU_DTRACE_KPRIV; -+ *illval = regs[r1]; -+ break; -+ } -+ /*FALLTHROUGH*/ -+ case DIF_OP_LDSH: -+ regs[rd] = (int16_t)dtrace_load16(regs[r1]); -+ break; -+ case DIF_OP_RLDSW: -+ if (!dtrace_canstore(regs[r1], 4, mstate, vstate)) { -+ *flags |= CPU_DTRACE_KPRIV; -+ *illval = regs[r1]; -+ break; -+ } -+ /*FALLTHROUGH*/ -+ case DIF_OP_LDSW: -+ regs[rd] = (int32_t)dtrace_load32(regs[r1]); -+ break; -+ case DIF_OP_RLDUB: -+ if (!dtrace_canstore(regs[r1], 1, mstate, vstate)) { -+ *flags |= CPU_DTRACE_KPRIV; -+ *illval = regs[r1]; -+ break; -+ } -+ /*FALLTHROUGH*/ -+ case DIF_OP_LDUB: -+ regs[rd] = dtrace_load8(regs[r1]); -+ break; -+ case DIF_OP_RLDUH: -+ if (!dtrace_canstore(regs[r1], 2, mstate, vstate)) { -+ *flags |= CPU_DTRACE_KPRIV; -+ *illval = regs[r1]; -+ break; -+ } -+ /*FALLTHROUGH*/ -+ case DIF_OP_LDUH: -+ regs[rd] = dtrace_load16(regs[r1]); -+ break; -+ case DIF_OP_RLDUW: -+ if (!dtrace_canstore(regs[r1], 4, mstate, vstate)) { -+ *flags |= CPU_DTRACE_KPRIV; -+ *illval = regs[r1]; -+ break; -+ } -+ /*FALLTHROUGH*/ -+ case DIF_OP_LDUW: -+ regs[rd] = dtrace_load32(regs[r1]); -+ break; -+ case DIF_OP_RLDX: -+ if (!dtrace_canstore(regs[r1], 8, mstate, vstate)) { -+ *flags |= CPU_DTRACE_KPRIV; -+ *illval = regs[r1]; -+ break; -+ } -+ /*FALLTHROUGH*/ -+ case DIF_OP_LDX: -+ regs[rd] = dtrace_load64(regs[r1]); -+ break; -+ case DIF_OP_ULDSB: -+ regs[rd] = (int8_t)dtrace_fuword8( -+ (void *)(uintptr_t)regs[r1]); -+ break; -+ case DIF_OP_ULDSH: -+ regs[rd] = (int16_t)dtrace_fuword16( -+ (void *)(uintptr_t)regs[r1]); -+ break; -+ case DIF_OP_ULDSW: -+ regs[rd] = (int32_t)dtrace_fuword32( -+ (void *)(uintptr_t)regs[r1]); -+ break; -+ case DIF_OP_ULDUB: -+ regs[rd] = dtrace_fuword8((void *)(uintptr_t)regs[r1]); -+ break; -+ case DIF_OP_ULDUH: -+ regs[rd] = dtrace_fuword16( -+ (void *)(uintptr_t)regs[r1]); -+ break; -+ case DIF_OP_ULDUW: -+ regs[rd] = dtrace_fuword32( -+ (void *)(uintptr_t)regs[r1]); -+ break; -+ case DIF_OP_ULDX: -+ regs[rd] = dtrace_fuword64( -+ (void *)(uintptr_t)regs[r1]); -+ break; -+ case DIF_OP_RET: -+ rval = regs[rd]; -+ pc = textlen; -+ break; -+ case DIF_OP_NOP: -+ break; -+ case DIF_OP_SETX: -+ regs[rd] = inttab[DIF_INSTR_INTEGER(instr)]; -+ break; -+ case DIF_OP_SETS: -+ regs[rd] = (uint64_t)(uintptr_t) -+ (strtab + DIF_INSTR_STRING(instr)); -+ break; -+ case DIF_OP_SCMP: { -+ size_t sz = state->dts_options[ -+ DTRACEOPT_STRSIZE]; -+ uintptr_t s1 = regs[r1]; -+ uintptr_t s2 = regs[r2]; -+ -+ if (s1 != (uintptr_t)NULL && -+ !dtrace_strcanload(s1, sz, mstate, vstate)) -+ break; -+ if (s2 != (uintptr_t)NULL && -+ !dtrace_strcanload(s2, sz, mstate, vstate)) -+ break; -+ -+ cc_r = dtrace_strncmp((char *)s1, (char *)s2, sz); -+ -+ cc_n = cc_r < 0; -+ cc_z = cc_r == 0; -+ cc_v = cc_c = 0; -+ break; -+ } -+ case DIF_OP_LDGA: -+ regs[rd] = dtrace_dif_variable(mstate, state, r1, -+ regs[r2]); -+ break; -+ case DIF_OP_LDGS: -+ id = DIF_INSTR_VAR(instr); -+ -+ if (id >= DIF_VAR_OTHER_UBASE) { -+ uintptr_t a; -+ -+ id -= DIF_VAR_OTHER_UBASE; -+ svar = vstate->dtvs_globals[id]; -+ ASSERT(svar != NULL); -+ v = &svar->dtsv_var; -+ -+ if (!(v->dtdv_type.dtdt_flags & DIF_TF_BYREF)) { -+ regs[rd] = svar->dtsv_data; -+ break; -+ } -+ -+ a = (uintptr_t)svar->dtsv_data; -+ -+ /* -+ * If the 0th byte is set to UINT8_MAX then -+ * this is to be treated as a reference to a -+ * NULL variable. -+ */ -+ if (*(uint8_t *)a == UINT8_MAX) -+ regs[rd] = 0; -+ else -+ regs[rd] = a + sizeof(uint64_t); -+ -+ break; -+ } -+ -+ regs[rd] = dtrace_dif_variable(mstate, state, id, 0); -+ break; -+ -+ case DIF_OP_STGS: -+ id = DIF_INSTR_VAR(instr); -+ -+ ASSERT(id >= DIF_VAR_OTHER_UBASE); -+ id -= DIF_VAR_OTHER_UBASE; -+ -+ svar = vstate->dtvs_globals[id]; -+ ASSERT(svar != NULL); -+ v = &svar->dtsv_var; -+ -+ if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { -+ uintptr_t a = (uintptr_t)svar->dtsv_data; -+ -+ ASSERT(a != 0); -+ ASSERT(svar->dtsv_size != 0); -+ -+ if (regs[rd] == 0) { -+ *(uint8_t *)a = UINT8_MAX; -+ break; -+ } else { -+ *(uint8_t *)a = 0; -+ a += sizeof(uint64_t); -+ } -+ -+ if (!dtrace_vcanload( -+ (void *)(uintptr_t)regs[rd], -+ &v->dtdv_type, mstate, vstate)) -+ break; -+ -+ dtrace_vcopy((void *)(uintptr_t)regs[rd], -+ (void *)a, &v->dtdv_type); -+ break; -+ } -+ -+ svar->dtsv_data = regs[rd]; -+ break; -+ -+ case DIF_OP_LDTA: -+ /* -+ * There are no DTrace built-in thread-local arrays at -+ * present. This opcode is saved for future work. -+ */ -+ *flags |= CPU_DTRACE_ILLOP; -+ regs[rd] = 0; -+ break; -+ -+ case DIF_OP_LDLS: -+ id = DIF_INSTR_VAR(instr); -+ -+ if (id < DIF_VAR_OTHER_UBASE) { -+ /* -+ * For now, this has no meaning. -+ */ -+ regs[rd] = 0; -+ break; -+ } -+ -+ id -= DIF_VAR_OTHER_UBASE; -+ -+ ASSERT(id < vstate->dtvs_nlocals); -+ ASSERT(vstate->dtvs_locals != NULL); -+ -+ svar = vstate->dtvs_locals[id]; -+ ASSERT(svar != NULL); -+ v = &svar->dtsv_var; -+ -+ if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { -+ uintptr_t a = (uintptr_t)svar->dtsv_data; -+ size_t sz = v->dtdv_type.dtdt_size; -+ -+ sz += sizeof(uint64_t); -+ ASSERT(svar->dtsv_size == NR_CPUS * sz); -+ a += smp_processor_id() * sz; -+ -+ if (*(uint8_t *)a == UINT8_MAX) { -+ /* -+ * If the 0th byte is set to UINT8_MAX -+ * then this is to be treated as a -+ * reference to a NULL variable. -+ */ -+ regs[rd] = 0; -+ } else -+ regs[rd] = a + sizeof(uint64_t); -+ -+ break; -+ } -+ -+ ASSERT(svar->dtsv_size == NR_CPUS * sizeof(uint64_t)); -+ tmp = (uint64_t *)(uintptr_t)svar->dtsv_data; -+ regs[rd] = tmp[smp_processor_id()]; -+ break; -+ -+ case DIF_OP_STLS: -+ id = DIF_INSTR_VAR(instr); -+ -+ ASSERT(id >= DIF_VAR_OTHER_UBASE); -+ id -= DIF_VAR_OTHER_UBASE; -+ ASSERT(id < vstate->dtvs_nlocals); -+ -+ ASSERT(vstate->dtvs_locals != NULL); -+ svar = vstate->dtvs_locals[id]; -+ ASSERT(svar != NULL); -+ v = &svar->dtsv_var; -+ -+ if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { -+ uintptr_t a = (uintptr_t)svar->dtsv_data; -+ size_t sz = v->dtdv_type.dtdt_size; -+ -+ sz += sizeof(uint64_t); -+ ASSERT(svar->dtsv_size == NR_CPUS * sz); -+ a += smp_processor_id() * sz; -+ -+ if (regs[rd] == 0) { -+ *(uint8_t *)a = UINT8_MAX; -+ break; -+ } else { -+ *(uint8_t *)a = 0; -+ a += sizeof(uint64_t); -+ } -+ -+ if (!dtrace_vcanload( -+ (void *)(uintptr_t)regs[rd], -+ &v->dtdv_type, mstate, vstate)) -+ break; -+ -+ dtrace_vcopy((void *)(uintptr_t)regs[rd], -+ (void *)a, &v->dtdv_type); -+ break; -+ } -+ -+ ASSERT(svar->dtsv_size == NR_CPUS * sizeof(uint64_t)); -+ tmp = (uint64_t *)(uintptr_t)svar->dtsv_data; -+ tmp[smp_processor_id()] = regs[rd]; -+ break; -+ -+ case DIF_OP_LDTS: { -+ struct dtrace_dynvar *dvar; -+ struct dtrace_key *key; -+ -+ id = DIF_INSTR_VAR(instr); -+ ASSERT(id >= DIF_VAR_OTHER_UBASE); -+ id -= DIF_VAR_OTHER_UBASE; -+ v = &vstate->dtvs_tlocals[id]; -+ -+ key = &tupregs[DIF_DTR_NREGS]; -+ key[0].dttk_value = (uint64_t)id; -+ key[0].dttk_size = 0; -+ DTRACE_TLS_THRKEY(key[1].dttk_value); -+ key[1].dttk_size = 0; -+ -+ dvar = dtrace_dynvar(dstate, 2, key, sizeof(uint64_t), -+ DTRACE_DYNVAR_NOALLOC, mstate, -+ vstate); -+ -+ if (dvar == NULL) { -+ regs[rd] = 0; -+ break; -+ } -+ -+ if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) -+ regs[rd] = (uint64_t)(uintptr_t)dvar->dtdv_data; -+ else -+ regs[rd] = *((uint64_t *)dvar->dtdv_data); -+ -+ break; -+ } -+ -+ case DIF_OP_STTS: { -+ struct dtrace_dynvar *dvar; -+ struct dtrace_key *key; -+ -+ id = DIF_INSTR_VAR(instr); -+ ASSERT(id >= DIF_VAR_OTHER_UBASE); -+ id -= DIF_VAR_OTHER_UBASE; -+ -+ key = &tupregs[DIF_DTR_NREGS]; -+ key[0].dttk_value = (uint64_t)id; -+ key[0].dttk_size = 0; -+ DTRACE_TLS_THRKEY(key[1].dttk_value); -+ key[1].dttk_size = 0; -+ v = &vstate->dtvs_tlocals[id]; -+ -+ dvar = dtrace_dynvar(dstate, 2, key, -+ v->dtdv_type.dtdt_size > sizeof(uint64_t) -+ ? v->dtdv_type.dtdt_size -+ : sizeof(uint64_t), -+ regs[rd] -+ ? DTRACE_DYNVAR_ALLOC -+ : DTRACE_DYNVAR_DEALLOC, -+ mstate, vstate); -+ -+ /* -+ * Given that we're storing to thread-local data, -+ * we need to flush our predicate cache. -+ */ -+ if (current->dt_task != NULL) -+ current->dt_task->dt_predcache = 0; -+ -+ if (dvar == NULL) -+ break; -+ -+ if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { -+ if (!dtrace_vcanload( -+ (void *)(uintptr_t)regs[rd], -+ &v->dtdv_type, mstate, vstate)) -+ break; -+ -+ dtrace_vcopy((void *)(uintptr_t)regs[rd], -+ dvar->dtdv_data, &v->dtdv_type); -+ } else -+ *((uint64_t *)dvar->dtdv_data) = regs[rd]; -+ -+ break; -+ } -+ -+ case DIF_OP_SRA: -+ regs[rd] = (int64_t)regs[r1] >> regs[r2]; -+ break; -+ -+ case DIF_OP_CALL: -+ dtrace_dif_subr(DIF_INSTR_SUBR(instr), rd, regs, -+ tupregs, ttop, mstate, state); -+ break; -+ -+ case DIF_OP_PUSHTR: -+ if (ttop == DIF_DTR_NREGS) { -+ *flags |= CPU_DTRACE_TUPOFLOW; -+ break; -+ } -+ -+ if (r1 == DIF_TYPE_STRING) -+ /* -+ * If this is a string type and the size is 0, -+ * we'll use the system-wide default string -+ * size. Note that we are _not_ looking at -+ * the value of the DTRACEOPT_STRSIZE option; -+ * had this been set, we would expect to have -+ * a non-zero size value in the "pushtr". -+ */ -+ tupregs[ttop].dttk_size = -+ dtrace_strlen( -+ (char *)(uintptr_t)regs[rd], -+ regs[r2] -+ ? regs[r2] -+ : dtrace_strsize_default -+ ) + 1; -+ else -+ tupregs[ttop].dttk_size = regs[r2]; -+ -+ tupregs[ttop++].dttk_value = regs[rd]; -+ break; -+ -+ case DIF_OP_PUSHTV: -+ if (ttop == DIF_DTR_NREGS) { -+ *flags |= CPU_DTRACE_TUPOFLOW; -+ break; -+ } -+ -+ tupregs[ttop].dttk_value = regs[rd]; -+ tupregs[ttop++].dttk_size = 0; -+ break; -+ -+ case DIF_OP_POPTS: -+ if (ttop != 0) -+ ttop--; -+ break; -+ -+ case DIF_OP_FLUSHTS: -+ ttop = 0; -+ break; -+ -+ case DIF_OP_LDGAA: -+ case DIF_OP_LDTAA: { -+ struct dtrace_dynvar *dvar; -+ struct dtrace_key *key = tupregs; -+ uint_t nkeys = ttop; -+ -+ id = DIF_INSTR_VAR(instr); -+ ASSERT(id >= DIF_VAR_OTHER_UBASE); -+ id -= DIF_VAR_OTHER_UBASE; -+ -+ key[nkeys].dttk_value = (uint64_t)id; -+ key[nkeys++].dttk_size = 0; -+ -+ if (DIF_INSTR_OP(instr) == DIF_OP_LDTAA) { -+ DTRACE_TLS_THRKEY(key[nkeys].dttk_value); -+ key[nkeys++].dttk_size = 0; -+ v = &vstate->dtvs_tlocals[id]; -+ } else -+ v = &vstate->dtvs_globals[id]->dtsv_var; -+ -+ dvar = dtrace_dynvar(dstate, nkeys, key, -+ v->dtdv_type.dtdt_size > sizeof(uint64_t) ? -+ v->dtdv_type.dtdt_size : sizeof(uint64_t), -+ DTRACE_DYNVAR_NOALLOC, mstate, vstate); -+ -+ if (dvar == NULL) { -+ regs[rd] = 0; -+ break; -+ } -+ -+ if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) -+ regs[rd] = (uint64_t)(uintptr_t)dvar->dtdv_data; -+ else -+ regs[rd] = *((uint64_t *)dvar->dtdv_data); -+ -+ break; -+ } -+ -+ case DIF_OP_STGAA: -+ case DIF_OP_STTAA: { -+ struct dtrace_dynvar *dvar; -+ struct dtrace_key *key = tupregs; -+ uint_t nkeys = ttop; -+ -+ id = DIF_INSTR_VAR(instr); -+ ASSERT(id >= DIF_VAR_OTHER_UBASE); -+ id -= DIF_VAR_OTHER_UBASE; -+ -+ key[nkeys].dttk_value = (uint64_t)id; -+ key[nkeys++].dttk_size = 0; -+ -+ if (DIF_INSTR_OP(instr) == DIF_OP_STTAA) { -+ DTRACE_TLS_THRKEY(key[nkeys].dttk_value); -+ key[nkeys++].dttk_size = 0; -+ v = &vstate->dtvs_tlocals[id]; -+ } else -+ v = &vstate->dtvs_globals[id]->dtsv_var; -+ -+ dvar = dtrace_dynvar(dstate, nkeys, key, -+ v->dtdv_type.dtdt_size > sizeof(uint64_t) -+ ? v->dtdv_type.dtdt_size -+ : sizeof(uint64_t), -+ regs[rd] ? DTRACE_DYNVAR_ALLOC -+ : DTRACE_DYNVAR_DEALLOC, -+ mstate, vstate); -+ -+ if (dvar == NULL) -+ break; -+ -+ if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { -+ if (!dtrace_vcanload( -+ (void *)(uintptr_t)regs[rd], -+ &v->dtdv_type, mstate, vstate)) -+ break; -+ -+ dtrace_vcopy((void *)(uintptr_t)regs[rd], -+ dvar->dtdv_data, &v->dtdv_type); -+ } else -+ *((uint64_t *)dvar->dtdv_data) = regs[rd]; -+ -+ break; -+ } -+ -+ case DIF_OP_ALLOCS: { -+ uintptr_t ptr = -+ P2ROUNDUP(mstate->dtms_scratch_ptr, 8); -+ size_t size = ptr - mstate->dtms_scratch_ptr + -+ regs[r1]; -+ -+ /* -+ * Rounding up the user allocation size could have -+ * overflowed large, bogus allocations (like -1ULL) to -+ * 0. -+ */ -+ if (size < regs[r1] || -+ !DTRACE_INSCRATCH(mstate, size)) { -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); -+ regs[rd] = 0; -+ break; -+ } -+ -+ dtrace_bzero((void *) mstate->dtms_scratch_ptr, size); -+ mstate->dtms_scratch_ptr += size; -+ regs[rd] = ptr; -+ break; -+ } -+ -+ case DIF_OP_COPYS: -+ if (!dtrace_canstore(regs[rd], regs[r2], mstate, -+ vstate)) { -+ *flags |= CPU_DTRACE_BADADDR; -+ *illval = regs[rd]; -+ break; -+ } -+ -+ if (!dtrace_canload(regs[r1], regs[r2], mstate, vstate)) -+ break; -+ -+ dtrace_bcopy((void *)(uintptr_t)regs[r1], -+ (void *)(uintptr_t)regs[rd], -+ (size_t)regs[r2]); -+ break; -+ -+ case DIF_OP_STB: -+ if (!dtrace_canstore(regs[rd], 1, mstate, vstate)) { -+ *flags |= CPU_DTRACE_BADADDR; -+ *illval = regs[rd]; -+ break; -+ } -+ -+ *((uint8_t *)(uintptr_t)regs[rd]) = (uint8_t)regs[r1]; -+ break; -+ -+ case DIF_OP_STH: -+ if (!dtrace_canstore(regs[rd], 2, mstate, vstate)) { -+ *flags |= CPU_DTRACE_BADADDR; -+ *illval = regs[rd]; -+ break; -+ } -+ -+ if (regs[rd] & 1) { -+ *flags |= CPU_DTRACE_BADALIGN; -+ *illval = regs[rd]; -+ break; -+ } -+ -+ *((uint16_t *)(uintptr_t)regs[rd]) = (uint16_t)regs[r1]; -+ break; -+ -+ case DIF_OP_STW: -+ if (!dtrace_canstore(regs[rd], 4, mstate, vstate)) { -+ *flags |= CPU_DTRACE_BADADDR; -+ *illval = regs[rd]; -+ break; -+ } -+ -+ if (regs[rd] & 3) { -+ *flags |= CPU_DTRACE_BADALIGN; -+ *illval = regs[rd]; -+ break; -+ } -+ -+ *((uint32_t *)(uintptr_t)regs[rd]) = (uint32_t)regs[r1]; -+ break; -+ -+ case DIF_OP_STX: -+ if (!dtrace_canstore(regs[rd], 8, mstate, vstate)) { -+ *flags |= CPU_DTRACE_BADADDR; -+ *illval = regs[rd]; -+ break; -+ } -+ -+ if (regs[rd] & 7) { -+ *flags |= CPU_DTRACE_BADALIGN; -+ *illval = regs[rd]; -+ break; -+ } -+ -+ *((uint64_t *)(uintptr_t)regs[rd]) = regs[r1]; -+ break; -+ } -+ } -+ -+ -+ if (!(*flags & CPU_DTRACE_FAULT)) { -+ dt_dbg_dif(" DIF %p completed, rval = %llx (flags %x)\n", -+ difo, rval, *flags); -+ return rval; -+ } -+ -+ dt_dbg_dif(" DIF %p emulation failed (flags %x)\n", difo, *flags); -+ -+ mstate->dtms_fltoffs = opc * sizeof(dif_instr_t); -+ mstate->dtms_present |= DTRACE_MSTATE_FLTOFFS; -+ -+ return 0; -+} -diff --git a/dtrace/dtrace_dof.c b/dtrace/dtrace_dof.c -new file mode 100644 -index 000000000000..85ff9b21a205 ---- /dev/null -+++ b/dtrace/dtrace_dof.c -@@ -0,0 +1,2504 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_dof.c -+ * DESCRIPTION: DTrace - DOF implementation -+ * -+ * Copyright (c) 2010, 2018, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/dtrace_task_impl.h> -+#include <linux/slab.h> -+#include <linux/types.h> -+#include <linux/vmalloc.h> -+#include <linux/uaccess.h> -+ -+#include "dtrace.h" -+ -+size_t dtrace_difo_maxsize = 256 * 1024; -+dtrace_optval_t dtrace_dof_maxsize = 256 * 1024; -+size_t dtrace_actions_max = 16 * 1024; -+dtrace_optval_t dtrace_helper_actions_max = 32; -+dtrace_optval_t dtrace_helper_providers_max = 32; -+ -+static int dtrace_helpers; -+ -+static uint32_t dtrace_helptrace_next; -+static uint32_t dtrace_helptrace_nlocals; -+ -+#ifdef CONFIG_DT_DEBUG -+int dtrace_helptrace_enabled = 1; -+#else -+int dtrace_helptrace_enabled = 0; -+#endif -+int dtrace_helptrace_bufsize = 512 * 1024; -+char *dtrace_helptrace_buffer; -+ -+void dtrace_dof_error(struct dof_hdr *dof, const char *str) -+{ -+ if (dtrace_err_verbose) -+ pr_warn("failed to process DOF: %s", str); -+ else -+ dt_dbg_dof("Failed to process DOF: %s\n", str); -+ -+#ifdef DTRACE_ERRDEBUG -+ dtrace_errdebug(str); -+#endif -+} -+ -+/* -+ * Create DOF out of a currently enabled state. Right now, we only create -+ * DOF containing the run-time options -- but this could be expanded to create -+ * complete DOF representing the enabled state. -+ */ -+struct dof_hdr *dtrace_dof_create(struct dtrace_state *state) -+{ -+ struct dof_hdr *dof; -+ struct dof_sec *sec; -+ struct dof_optdesc *opt; -+ -+ int i, len = sizeof(struct dof_hdr) + -+ roundup(sizeof(struct dof_sec), -+ sizeof(uint64_t)) + -+ sizeof(struct dof_optdesc) * DTRACEOPT_MAX; -+ -+ ASSERT(MUTEX_HELD(&dtrace_lock)); -+ -+ dof = vmalloc(len); -+ if (dof == NULL) -+ return NULL; -+ -+ dof->dofh_ident[DOF_ID_MAG0] = DOF_MAG_MAG0; -+ dof->dofh_ident[DOF_ID_MAG1] = DOF_MAG_MAG1; -+ dof->dofh_ident[DOF_ID_MAG2] = DOF_MAG_MAG2; -+ dof->dofh_ident[DOF_ID_MAG3] = DOF_MAG_MAG3; -+ -+ dof->dofh_ident[DOF_ID_MODEL] = DOF_MODEL_NATIVE; -+ dof->dofh_ident[DOF_ID_ENCODING] = DOF_ENCODE_NATIVE; -+ dof->dofh_ident[DOF_ID_VERSION] = DOF_VERSION; -+ dof->dofh_ident[DOF_ID_DIFVERS] = DIF_VERSION; -+ dof->dofh_ident[DOF_ID_DIFIREG] = DIF_DIR_NREGS; -+ dof->dofh_ident[DOF_ID_DIFTREG] = DIF_DTR_NREGS; -+ -+ dof->dofh_flags = 0; -+ dof->dofh_hdrsize = sizeof(struct dof_hdr); -+ dof->dofh_secsize = sizeof(struct dof_sec); -+ dof->dofh_secnum = 1; /* only DOF_SECT_OPTDESC */ -+ dof->dofh_secoff = sizeof(struct dof_hdr); -+ dof->dofh_loadsz = len; -+ dof->dofh_filesz = len; -+ dof->dofh_pad = 0; -+ -+ /* -+ * Fill in the option section header... -+ */ -+ sec = (struct dof_sec *)((uintptr_t)dof + sizeof(struct dof_hdr)); -+ sec->dofs_type = DOF_SECT_OPTDESC; -+ sec->dofs_align = sizeof(uint64_t); -+ sec->dofs_flags = DOF_SECF_LOAD; -+ sec->dofs_entsize = sizeof(struct dof_optdesc); -+ -+ opt = (struct dof_optdesc *)((uintptr_t)sec + -+ roundup(sizeof(struct dof_sec), -+ sizeof(uint64_t))); -+ -+ sec->dofs_offset = (uintptr_t)opt - (uintptr_t)dof; -+ sec->dofs_size = sizeof(struct dof_optdesc) * DTRACEOPT_MAX; -+ -+ for (i = 0; i < DTRACEOPT_MAX; i++) { -+ opt[i].dofo_option = i; -+ opt[i].dofo_strtab = DOF_SECIDX_NONE; -+ opt[i].dofo_value = state->dts_options[i]; -+ } -+ -+ return dof; -+} -+ -+struct dof_hdr *dtrace_dof_copyin(void __user *argp, int *errp) -+{ -+ struct dof_hdr hdr, *dof; -+ -+ ASSERT(!MUTEX_HELD(&dtrace_lock)); -+ -+ /* -+ * First, we're going to copyin() the sizeof(dof_hdr_t). -+ */ -+ if (copy_from_user(&hdr, argp, sizeof(hdr)) != 0) { -+ dtrace_dof_error(NULL, "failed to copyin DOF header"); -+ *errp = -EFAULT; -+ return NULL; -+ } -+ -+ /* -+ * Now we'll allocate the entire DOF and copy it in -- provided -+ * that the length isn't outrageous. -+ */ -+ if (hdr.dofh_loadsz >= dtrace_dof_maxsize) { -+ dtrace_dof_error(&hdr, "load size exceeds maximum"); -+ *errp = -E2BIG; -+ return NULL; -+ } -+ -+ if (hdr.dofh_loadsz < sizeof(hdr)) { -+ dtrace_dof_error(&hdr, "invalid load size"); -+ *errp = -EINVAL; -+ return NULL; -+ } -+ -+ dof = vmalloc(hdr.dofh_loadsz); -+ if (dof == NULL) { -+ *errp = -ENOMEM; -+ return NULL; -+ } -+ -+ if (copy_from_user(dof, argp, hdr.dofh_loadsz) != 0 || -+ dof->dofh_loadsz != hdr.dofh_loadsz) { -+ vfree(dof); -+ *errp = -EFAULT; -+ return NULL; -+ } -+ -+ return dof; -+} -+ -+struct dof_hdr *dtrace_dof_property(const char *name) -+{ -+ uchar_t *buf; -+ uint64_t loadsz; -+ unsigned int len, i; -+ struct dof_hdr *dof; -+ -+ /* -+ * Unfortunately, array of values in .conf files are always (and -+ * only) interpreted to be integer arrays. We must read our DOF -+ * as an integer array, and then squeeze it into a byte array. -+ */ -+#ifdef FIXME -+ if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dtrace_devi, 0, -+ (char *)name, (int **)&buf, &len) != -+ DDI_PROP_SUCCESS) -+ return NULL; -+#else -+ return NULL; -+#endif -+ -+ for (i = 0; i < len; i++) -+ buf[i] = (uchar_t)(((int *)buf)[i]); -+ -+ if (len < sizeof(struct dof_hdr)) { -+#ifdef FIXME -+ ddi_prop_free(buf); -+#endif -+ dtrace_dof_error(NULL, "truncated header"); -+ return NULL; -+ } -+ -+ loadsz = ((struct dof_hdr *)buf)->dofh_loadsz; -+ if (len < loadsz) { -+#ifdef FIXME -+ ddi_prop_free(buf); -+#endif -+ dtrace_dof_error(NULL, "truncated DOF"); -+ return NULL; -+ } -+ -+ if (loadsz >= dtrace_dof_maxsize) { -+#ifdef FIXME -+ ddi_prop_free(buf); -+#endif -+ dtrace_dof_error(NULL, "oversized DOF"); -+ return NULL; -+ } -+ -+ dof = vmalloc(loadsz); -+ if (dof == NULL) { -+ dtrace_dof_error(NULL, "out-of-memory"); -+ return NULL; -+ } -+ memcpy(dof, buf, loadsz); -+#ifdef FIXME -+ ddi_prop_free(buf); -+#endif -+ -+ return dof; -+} -+ -+void dtrace_dof_destroy(struct dof_hdr *dof) -+{ -+ vfree(dof); -+} -+ -+/* -+ * Return the dof_sec_t pointer corresponding to a given section index. If the -+ * index is not valid, dtrace_dof_error() is called and NULL is returned. If -+ * a type other than DOF_SECT_NONE is specified, the header is checked against -+ * this type and NULL is returned if the types do not match. -+ */ -+static struct dof_sec *dtrace_dof_sect(struct dof_hdr *dof, uint32_t doftype, -+ dof_secidx_t i) -+{ -+ struct dof_sec *sec; -+ -+ sec = (struct dof_sec *)(uintptr_t) ((uintptr_t)dof + -+ dof->dofh_secoff + -+ i * dof->dofh_secsize); -+ -+ if (i >= dof->dofh_secnum) { -+ dtrace_dof_error(dof, "referenced section index is invalid"); -+ return NULL; -+ } -+ -+ if (!(sec->dofs_flags & DOF_SECF_LOAD)) { -+ dtrace_dof_error(dof, "referenced section is not loadable"); -+ return NULL; -+ } -+ -+ if (doftype != DOF_SECT_NONE && doftype != sec->dofs_type) { -+ dtrace_dof_error(dof, "referenced section is the wrong type"); -+ return NULL; -+ } -+ -+ return sec; -+} -+ -+static struct dtrace_probedesc *dtrace_dof_probedesc(struct dof_hdr *dof, -+ struct dof_sec *sec, -+ struct dtrace_probedesc *desc) -+{ -+ struct dof_probedesc *probe; -+ struct dof_sec *strtab; -+ uintptr_t daddr = (uintptr_t)dof; -+ uintptr_t str; -+ size_t size; -+ -+ if (sec->dofs_type != DOF_SECT_PROBEDESC) { -+ dtrace_dof_error(dof, "invalid probe section"); -+ return NULL; -+ } -+ -+ if (sec->dofs_align != sizeof(dof_secidx_t)) { -+ dtrace_dof_error(dof, "bad alignment in probe description"); -+ return NULL; -+ } -+ -+ if (sec->dofs_offset + sizeof(struct dof_probedesc) > -+ dof->dofh_loadsz) { -+ dtrace_dof_error(dof, "truncated probe description"); -+ return NULL; -+ } -+ -+ probe = (struct dof_probedesc *)(uintptr_t)(daddr + sec->dofs_offset); -+ strtab = dtrace_dof_sect(dof, DOF_SECT_STRTAB, probe->dofp_strtab); -+ -+ if (strtab == NULL) -+ return NULL; -+ -+ str = daddr + strtab->dofs_offset; -+ size = strtab->dofs_size; -+ -+ if (probe->dofp_provider >= strtab->dofs_size) { -+ dtrace_dof_error(dof, "corrupt probe provider"); -+ return NULL; -+ } -+ -+ strncpy(desc->dtpd_provider, (char *)(str + probe->dofp_provider), -+ min((size_t)DTRACE_PROVNAMELEN - 1, -+ size - probe->dofp_provider)); -+ -+ if (probe->dofp_mod >= strtab->dofs_size) { -+ dtrace_dof_error(dof, "corrupt probe module"); -+ return NULL; -+ } -+ -+ strncpy(desc->dtpd_mod, (char *)(str + probe->dofp_mod), -+ min((size_t)DTRACE_MODNAMELEN - 1, size - probe->dofp_mod)); -+ -+ if (probe->dofp_func >= strtab->dofs_size) { -+ dtrace_dof_error(dof, "corrupt probe function"); -+ return NULL; -+ } -+ -+ strncpy(desc->dtpd_func, (char *)(str + probe->dofp_func), -+ min((size_t)DTRACE_FUNCNAMELEN - 1, size - probe->dofp_func)); -+ -+ if (probe->dofp_name >= strtab->dofs_size) { -+ dtrace_dof_error(dof, "corrupt probe name"); -+ return NULL; -+ } -+ -+ strncpy(desc->dtpd_name, (char *)(str + probe->dofp_name), -+ min((size_t)DTRACE_NAMELEN - 1, size - probe->dofp_name)); -+ -+ dt_dbg_dof(" ECB Probe %s:%s:%s:%s\n", -+ desc->dtpd_provider, desc->dtpd_mod, desc->dtpd_func, -+ desc->dtpd_name); -+ -+ return desc; -+} -+ -+static struct dtrace_difo *dtrace_dof_difo(struct dof_hdr *dof, -+ struct dof_sec *sec, -+ struct dtrace_vstate *vstate, -+ const struct cred *cr) -+{ -+ struct dtrace_difo *dp; -+ size_t ttl = 0; -+ struct dof_difohdr *dofd; -+ uintptr_t daddr = (uintptr_t)dof; -+ size_t max = dtrace_difo_maxsize; -+ int i, l, n; -+ -+ static const struct { -+ int section; -+ int bufoffs; -+ int lenoffs; -+ int entsize; -+ int align; -+ const char *msg; -+ } difo[] = { -+ { -+ DOF_SECT_DIF, -+ offsetof(struct dtrace_difo, dtdo_buf), -+ offsetof(struct dtrace_difo, dtdo_len), -+ sizeof(dif_instr_t), -+ sizeof(dif_instr_t), -+ "multiple DIF sections" -+ }, -+ { -+ DOF_SECT_INTTAB, -+ offsetof(struct dtrace_difo, dtdo_inttab), -+ offsetof(struct dtrace_difo, dtdo_intlen), -+ sizeof(uint64_t), -+ sizeof(uint64_t), -+ "multiple integer tables" -+ }, -+ { -+ DOF_SECT_STRTAB, -+ offsetof(struct dtrace_difo, dtdo_strtab), -+ offsetof(struct dtrace_difo, dtdo_strlen), -+ 0, -+ sizeof(char), -+ "multiple string tables" -+ }, -+ { -+ DOF_SECT_VARTAB, -+ offsetof(struct dtrace_difo, dtdo_vartab), -+ offsetof(struct dtrace_difo, dtdo_varlen), -+ sizeof(struct dtrace_difv), -+ sizeof(uint_t), -+ "multiple variable tables" -+ }, -+ { -+ DOF_SECT_NONE, -+ 0, -+ 0, -+ 0, -+ 0, -+ NULL -+ } -+ }; -+ -+ if (sec->dofs_type != DOF_SECT_DIFOHDR) { -+ dtrace_dof_error(dof, "invalid DIFO header section"); -+ return NULL; -+ } -+ -+ if (sec->dofs_align != sizeof(dof_secidx_t)) { -+ dtrace_dof_error(dof, "bad alignment in DIFO header"); -+ return NULL; -+ } -+ -+ if (sec->dofs_size < sizeof(struct dof_difohdr) || -+ sec->dofs_size % sizeof(dof_secidx_t)) { -+ dtrace_dof_error(dof, "bad size in DIFO header"); -+ return NULL; -+ } -+ -+ dofd = (struct dof_difohdr *)(uintptr_t)(daddr + sec->dofs_offset); -+ n = (sec->dofs_size - sizeof(*dofd)) / sizeof(dof_secidx_t) + 1; -+ -+ dp = kzalloc(sizeof(struct dtrace_difo), GFP_KERNEL); -+ if (dp == NULL) { -+ dtrace_dof_error(dof, "out-of-memory"); -+ return NULL; -+ } -+ dp->dtdo_rtype = dofd->dofd_rtype; -+ -+ for (l = 0; l < n; l++) { -+ struct dof_sec *subsec; -+ void **bufp; -+ uint32_t *lenp; -+ -+ subsec = dtrace_dof_sect(dof, DOF_SECT_NONE, -+ dofd->dofd_links[l]); -+ if (subsec == NULL) -+ goto err; /* invalid section link */ -+ -+ if (ttl + subsec->dofs_size > max) { -+ dtrace_dof_error(dof, "exceeds maximum size"); -+ goto err; -+ } -+ -+ ttl += subsec->dofs_size; -+ -+ for (i = 0; difo[i].section != DOF_SECT_NONE; i++) { -+ if (subsec->dofs_type != difo[i].section) -+ continue; -+ -+ if (!(subsec->dofs_flags & DOF_SECF_LOAD)) { -+ dtrace_dof_error(dof, "section not loaded"); -+ goto err; -+ } -+ -+ if (subsec->dofs_align != difo[i].align) { -+ dtrace_dof_error(dof, "bad alignment"); -+ goto err; -+ } -+ -+ bufp = (void **)((uintptr_t)dp + difo[i].bufoffs); -+ lenp = (uint32_t *)((uintptr_t)dp + difo[i].lenoffs); -+ -+ if (*bufp != NULL) { -+ dtrace_dof_error(dof, difo[i].msg); -+ goto err; -+ } -+ -+ if (difo[i].entsize != subsec->dofs_entsize) { -+ dtrace_dof_error(dof, "entry size mismatch"); -+ goto err; -+ } -+ -+ if (subsec->dofs_entsize != 0) { -+ uint64_t n = subsec->dofs_size; -+ -+ if (do_div(n, subsec->dofs_entsize) != 0) { -+ dtrace_dof_error(dof, -+ "corrupt entry size"); -+ goto err; -+ } -+ } -+ -+ *lenp = subsec->dofs_size; -+ *bufp = vmalloc(subsec->dofs_size); -+ if (*bufp == NULL) { -+ dtrace_dof_error(dof, "out-of-memory"); -+ goto err; -+ } -+ memcpy(*bufp, -+ (char *)(uintptr_t)(daddr + subsec->dofs_offset), -+ subsec->dofs_size); -+ -+ if (subsec->dofs_entsize != 0) -+ *lenp /= subsec->dofs_entsize; -+ -+ break; -+ } -+ -+ /* -+ * If we encounter a loadable DIFO sub-section that is not -+ * known to us, assume this is a broken program and fail. -+ */ -+ if (difo[i].section == DOF_SECT_NONE && -+ (subsec->dofs_flags & DOF_SECF_LOAD)) { -+ dtrace_dof_error(dof, "unrecognized DIFO subsection"); -+ goto err; -+ } -+ } -+ -+ if (dp->dtdo_buf == NULL) { -+ /* -+ * We can't have a DIF object without DIF text. -+ */ -+ dtrace_dof_error(dof, "missing DIF text"); -+ goto err; -+ } -+ -+ /* -+ * Before we validate the DIF object, run through the variable table -+ * looking for the strings -- if any of their size are under, we'll set -+ * their size to be the system-wide default string size. Note that -+ * this should _not_ happen if the "strsize" option has been set -- -+ * in this case, the compiler should have set the size to reflect the -+ * setting of the option. -+ */ -+ for (i = 0; i < dp->dtdo_varlen; i++) { -+ struct dtrace_difv *v = &dp->dtdo_vartab[i]; -+ struct dtrace_diftype *t = &v->dtdv_type; -+ -+ if (v->dtdv_id < DIF_VAR_OTHER_UBASE) -+ continue; -+ -+ if (t->dtdt_kind == DIF_TYPE_STRING && t->dtdt_size == 0) -+ t->dtdt_size = dtrace_strsize_default; -+ } -+ -+ if (dtrace_difo_validate(dp, vstate, DIF_DIR_NREGS, cr) != 0) -+ goto err; -+ -+ dtrace_difo_init(dp, vstate); -+ return dp; -+ -+err: -+ if (dp->dtdo_buf != NULL) -+ vfree(dp->dtdo_buf); -+ if (dp->dtdo_inttab != NULL) -+ vfree(dp->dtdo_inttab); -+ if (dp->dtdo_strtab != NULL) -+ vfree(dp->dtdo_strtab); -+ if (dp->dtdo_vartab != NULL) -+ vfree(dp->dtdo_vartab); -+ -+ kfree(dp); -+ -+ return NULL; -+} -+ -+static struct dtrace_predicate *dtrace_dof_predicate(struct dof_hdr *dof, -+ struct dof_sec *sec, -+ struct dtrace_vstate *vstate, -+ const struct cred *cr) -+{ -+ struct dtrace_difo *dp; -+ -+ if ((dp = dtrace_dof_difo(dof, sec, vstate, cr)) == NULL) -+ return NULL; -+ -+ return dtrace_predicate_create(dp); -+} -+ -+static struct dtrace_actdesc *dtrace_dof_actdesc(struct dof_hdr *dof, -+ struct dof_sec *sec, -+ struct dtrace_vstate *vstate, -+ const struct cred *cr) -+{ -+ struct dtrace_actdesc *act, *first = NULL, *last = NULL, *next; -+ struct dof_actdesc *desc; -+ struct dof_sec *difosec; -+ size_t offs; -+ uintptr_t daddr = (uintptr_t)dof; -+ uint64_t arg; -+ dtrace_actkind_t kind; -+ -+ if (sec->dofs_type != DOF_SECT_ACTDESC) { -+ dtrace_dof_error(dof, "invalid action section"); -+ return NULL; -+ } -+ -+ if (sec->dofs_offset + sizeof(struct dof_actdesc) > dof->dofh_loadsz) { -+ dtrace_dof_error(dof, "truncated action description"); -+ return NULL; -+ } -+ -+ if (sec->dofs_align != sizeof(uint64_t)) { -+ dtrace_dof_error(dof, "bad alignment in action description"); -+ return NULL; -+ } -+ -+ if (sec->dofs_size < sec->dofs_entsize) { -+ dtrace_dof_error(dof, "section entry size exceeds total size"); -+ return NULL; -+ } -+ -+ if (sec->dofs_entsize != sizeof(struct dof_actdesc)) { -+ dtrace_dof_error(dof, "bad entry size in action description"); -+ return NULL; -+ } -+ -+ /* -+ * Was: sec->dofs_size / sec->dofs_entsize > dtrace_actions_max -+ * but it is safer to simply avoid the division (it requires use of -+ * a macro in Linux to cover 64-bit division in a 32-bit kernel. -+ */ -+ if (sec->dofs_size > sec->dofs_entsize * dtrace_actions_max) { -+ dtrace_dof_error(dof, "actions exceed dtrace_actions_max"); -+ return NULL; -+ } -+ -+ for (offs = 0; offs < sec->dofs_size; offs += sec->dofs_entsize) { -+ desc = (struct dof_actdesc *)(daddr + -+ (uintptr_t)sec->dofs_offset + offs); -+ kind = (dtrace_actkind_t)desc->dofa_kind; -+ -+ if (DTRACEACT_ISPRINTFLIKE(kind) && -+ (kind != DTRACEACT_PRINTA || -+ desc->dofa_strtab != DOF_SECIDX_NONE)) { -+ struct dof_sec *strtab; -+ char *str, *fmt; -+ uint64_t i; -+ -+ /* -+ * The printf()-like actions must have a format string. -+ */ -+ strtab = dtrace_dof_sect(dof, DOF_SECT_STRTAB, -+ desc->dofa_strtab); -+ if (strtab == NULL) -+ goto err; -+ -+ str = (char *)((uintptr_t)dof + -+ (uintptr_t)strtab->dofs_offset); -+ -+ for (i = desc->dofa_arg; i < strtab->dofs_size; i++) { -+ if (str[i] == '\0') -+ break; -+ } -+ -+ if (i >= strtab->dofs_size) { -+ dtrace_dof_error(dof, "bogus format string"); -+ goto err; -+ } -+ -+ if (i == desc->dofa_arg) { -+ dtrace_dof_error(dof, "empty format string"); -+ goto err; -+ } -+ -+ i -= desc->dofa_arg; -+ fmt = vmalloc(i + 1); -+ if (fmt == NULL) { -+ dtrace_dof_error(dof, "out-of-memory"); -+ goto err; -+ } -+ memcpy(fmt, &str[desc->dofa_arg], i + 1); -+ arg = (uint64_t)(uintptr_t)fmt; -+ } else { -+ if (kind == DTRACEACT_PRINTA) { -+ ASSERT(desc->dofa_strtab == DOF_SECIDX_NONE); -+ arg = 0; -+ } else -+ arg = desc->dofa_arg; -+ } -+ -+ act = dtrace_actdesc_create(kind, desc->dofa_ntuple, -+ desc->dofa_uarg, arg); -+ if (act == NULL) -+ goto err; -+ -+ if (last != NULL) -+ last->dtad_next = act; -+ else -+ first = act; -+ -+ last = act; -+ -+ if (desc->dofa_difo == DOF_SECIDX_NONE) -+ continue; -+ -+ difosec = dtrace_dof_sect(dof, DOF_SECT_DIFOHDR, -+ desc->dofa_difo); -+ if (difosec == NULL) -+ goto err; -+ -+ act->dtad_difo = dtrace_dof_difo(dof, difosec, vstate, cr); -+ -+ if (act->dtad_difo == NULL) -+ goto err; -+ } -+ -+ ASSERT(first != NULL); -+ return first; -+ -+err: -+ for (act = first; act != NULL; act = next) { -+ next = act->dtad_next; -+ dtrace_actdesc_release(act, vstate); -+ } -+ -+ return NULL; -+} -+ -+static struct dtrace_ecbdesc *dtrace_dof_ecbdesc(struct dof_hdr *dof, -+ struct dof_sec *sec, -+ struct dtrace_vstate *vstate, -+ const struct cred *cr) -+{ -+ struct dtrace_ecbdesc *ep; -+ struct dof_ecbdesc *ecb; -+ struct dtrace_probedesc *desc; -+ struct dtrace_predicate *pred = NULL; -+ -+ if (sec->dofs_size < sizeof(struct dof_ecbdesc)) { -+ dtrace_dof_error(dof, "truncated ECB description"); -+ return NULL; -+ } -+ -+ if (sec->dofs_align != sizeof(uint64_t)) { -+ dtrace_dof_error(dof, "bad alignment in ECB description"); -+ return NULL; -+ } -+ -+ ecb = (struct dof_ecbdesc *) -+ ((uintptr_t)dof + (uintptr_t)sec->dofs_offset); -+ sec = dtrace_dof_sect(dof, DOF_SECT_PROBEDESC, ecb->dofe_probes); -+ -+ if (sec == NULL) -+ return NULL; -+ -+ ep = kzalloc(sizeof(struct dtrace_ecbdesc), GFP_KERNEL); -+ if (ep == NULL) -+ return NULL; -+ ep->dted_uarg = ecb->dofe_uarg; -+ desc = &ep->dted_probe; -+ -+ if (dtrace_dof_probedesc(dof, sec, desc) == NULL) -+ goto err; -+ -+ if (ecb->dofe_pred != DOF_SECIDX_NONE) { -+ sec = dtrace_dof_sect(dof, DOF_SECT_DIFOHDR, ecb->dofe_pred); -+ if (sec == NULL) -+ goto err; -+ -+ pred = dtrace_dof_predicate(dof, sec, vstate, cr); -+ if (pred == NULL) -+ goto err; -+ -+ ep->dted_pred.dtpdd_predicate = pred; -+ } -+ -+ if (ecb->dofe_actions != DOF_SECIDX_NONE) { -+ sec = dtrace_dof_sect(dof, DOF_SECT_ACTDESC, ecb->dofe_actions); -+ if (sec == NULL) -+ goto err; -+ -+ ep->dted_action = dtrace_dof_actdesc(dof, sec, vstate, cr); -+ -+ if (ep->dted_action == NULL) -+ goto err; -+ } -+ -+ return ep; -+ -+err: -+ if (pred != NULL) -+ dtrace_predicate_release(pred, vstate); -+ kfree(ep); -+ return NULL; -+} -+ -+/* -+ * Apply the relocations from the specified 'sec' (a DOF_SECT_URELHDR) to the -+ * specified DOF. At present, this amounts to simply adding 'ubase' to the -+ * site of any user SETX relocations to account for load object base address. -+ * In the future, if we need other relocations, this function can be extended. -+ */ -+static int dtrace_dof_relocate(struct dof_hdr *dof, struct dof_sec *sec, -+ uint64_t ubase) -+{ -+ uintptr_t daddr = (uintptr_t)dof; -+ struct dof_relohdr *dofr; -+ struct dof_sec *ss, *rs, *ts; -+ struct dof_relodesc *r; -+ uint_t i, n; -+ -+ dofr = (struct dof_relohdr *)(uintptr_t) (daddr + sec->dofs_offset); -+ -+ if (sec->dofs_size < sizeof(struct dof_relohdr) || -+ sec->dofs_align != sizeof(dof_secidx_t)) { -+ dtrace_dof_error(dof, "invalid relocation header"); -+ return -1; -+ } -+ -+ ss = dtrace_dof_sect(dof, DOF_SECT_STRTAB, dofr->dofr_strtab); -+ rs = dtrace_dof_sect(dof, DOF_SECT_RELTAB, dofr->dofr_relsec); -+ ts = dtrace_dof_sect(dof, DOF_SECT_NONE, dofr->dofr_tgtsec); -+ -+ if (ss == NULL || rs == NULL || ts == NULL) -+ return -1; /* dtrace_dof_error() has been called already */ -+ -+ if (rs->dofs_entsize < sizeof(struct dof_relodesc) || -+ rs->dofs_align != sizeof(uint64_t)) { -+ dtrace_dof_error(dof, "invalid relocation section"); -+ return -1; -+ } -+ -+ r = (struct dof_relodesc *)(uintptr_t)(daddr + rs->dofs_offset); -+ /* -+ * Was: n = rs->dofs_size / rs->dofs_entsize; -+ * but on Linux we need to use a macro for the division to handle the -+ * possible case of 64-bit division on a 32-bit kernel. -+ */ -+ n = rs->dofs_size; -+ do_div(n, rs->dofs_entsize); -+ -+ for (i = 0; i < n; i++) { -+ uintptr_t taddr = daddr + ts->dofs_offset + r->dofr_offset; -+ -+ switch (r->dofr_type) { -+ case DOF_RELO_NONE: -+ break; -+ case DOF_RELO_SETX: -+ if (r->dofr_offset >= ts->dofs_size || -+ r->dofr_offset + sizeof(uint64_t) > -+ ts->dofs_size) { -+ dtrace_dof_error(dof, "bad relocation offset"); -+ return -1; -+ } -+ -+ if (!IS_ALIGNED(taddr, sizeof(uint64_t))) { -+ dtrace_dof_error(dof, "misaligned setx relo"); -+ return -1; -+ } -+ -+ /* -+ * This is a bit ugly but it is necessary for arm64, -+ * where the linking of shared libraries retains the -+ * relocation records for the .SUNW_dof section. In -+ * that case, the runtime loader already performed the -+ * relocation, so we do not have to do anything here. -+ * -+ * We check for this situation by comparing the target -+ * address against the base address (ubase). If it is -+ * larger, we assume the relocation already took place. -+ */ -+ if (*(uint64_t *)taddr > ubase) -+ dt_dbg_dof(" Relocation by runtime " \ -+ "loader: 0x%llx (base 0x%llx)\n", -+ *(uint64_t *)taddr, ubase); -+ else { -+ dt_dbg_dof(" Relocate 0x%llx + 0x%llx " \ -+ "= 0x%llx\n", -+ *(uint64_t *)taddr, ubase, -+ *(uint64_t *)taddr + ubase); -+ -+ *(uint64_t *)taddr += ubase; -+ } -+ -+ break; -+ default: -+ dtrace_dof_error(dof, "invalid relocation type"); -+ return -1; -+ } -+ -+ r = (struct dof_relodesc *)((uintptr_t)r + rs->dofs_entsize); -+ } -+ -+ return 0; -+} -+ -+/* -+ * The dof_hdr_t passed to dtrace_dof_slurp() should be a partially validated -+ * header: it should be at the front of a memory region that is at least -+ * sizeof(dof_hdr_t) in size -- and then at least dof_hdr.dofh_loadsz in -+ * size. It need not be validated in any other way. -+ */ -+int dtrace_dof_slurp(struct dof_hdr *dof, struct dtrace_vstate *vstate, -+ const struct cred *cr, struct dtrace_enabling **enabp, -+ uint64_t ubase, int noprobes) -+{ -+ uint64_t len = dof->dofh_loadsz, seclen; -+ uintptr_t daddr = (uintptr_t)dof; -+ struct dtrace_ecbdesc *ep; -+ struct dtrace_enabling *enab; -+ uint_t i; -+ -+ ASSERT(MUTEX_HELD(&dtrace_lock)); -+ ASSERT(dof->dofh_loadsz >= sizeof(struct dof_hdr)); -+ -+ dt_dbg_dof(" DOF 0x%p Slurping...\n", dof); -+ -+ dt_dbg_dof(" DOF 0x%p Validating...\n", dof); -+ -+ /* -+ * Check the DOF header identification bytes. In addition to checking -+ * valid settings, we also verify that unused bits/bytes are zeroed so -+ * we can use them later without fear of regressing existing binaries. -+ */ -+ if (memcmp(&dof->dofh_ident[DOF_ID_MAG0], DOF_MAG_STRING, -+ DOF_MAG_STRLEN) != 0) { -+ dtrace_dof_error(dof, "DOF magic string mismatch"); -+ return -1; -+ } -+ -+ if (dof->dofh_ident[DOF_ID_MODEL] != DOF_MODEL_ILP32 && -+ dof->dofh_ident[DOF_ID_MODEL] != DOF_MODEL_LP64) { -+ dtrace_dof_error(dof, "DOF has invalid data model"); -+ return -1; -+ } -+ -+ if (dof->dofh_ident[DOF_ID_ENCODING] != DOF_ENCODE_NATIVE) { -+ dtrace_dof_error(dof, "DOF encoding mismatch"); -+ return -1; -+ } -+ -+ if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 && -+ dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_2) { -+ dtrace_dof_error(dof, "DOF version mismatch"); -+ return -1; -+ } -+ -+ if (dof->dofh_ident[DOF_ID_DIFVERS] != DIF_VERSION_2) { -+ dtrace_dof_error(dof, "DOF uses unsupported instruction set"); -+ return -1; -+ } -+ -+ if (dof->dofh_ident[DOF_ID_DIFIREG] > DIF_DIR_NREGS) { -+ dtrace_dof_error(dof, "DOF uses too many integer registers"); -+ return -1; -+ } -+ -+ if (dof->dofh_ident[DOF_ID_DIFTREG] > DIF_DTR_NREGS) { -+ dtrace_dof_error(dof, "DOF uses too many tuple registers"); -+ return -1; -+ } -+ -+ for (i = DOF_ID_PAD; i < DOF_ID_SIZE; i++) { -+ if (dof->dofh_ident[i] != 0) { -+ dtrace_dof_error(dof, "DOF has invalid ident byte set"); -+ return -1; -+ } -+ } -+ -+ if (dof->dofh_flags & ~DOF_FL_VALID) { -+ dtrace_dof_error(dof, "DOF has invalid flag bits set"); -+ return -1; -+ } -+ -+ if (dof->dofh_secsize == 0) { -+ dtrace_dof_error(dof, "zero section header size"); -+ return -1; -+ } -+ -+ /* -+ * Check that the section headers don't exceed the amount of DOF -+ * data. Note that we cast the section size and number of sections -+ * to uint64_t's to prevent possible overflow in the multiplication. -+ */ -+ seclen = (uint64_t)dof->dofh_secnum * (uint64_t)dof->dofh_secsize; -+ -+ if (dof->dofh_secoff > len || seclen > len || -+ dof->dofh_secoff + seclen > len) { -+ dtrace_dof_error(dof, "truncated section headers"); -+ return -1; -+ } -+ -+ if (!IS_ALIGNED(dof->dofh_secoff, sizeof(uint64_t))) { -+ dtrace_dof_error(dof, "misaligned section headers"); -+ return -1; -+ } -+ -+ if (!IS_ALIGNED(dof->dofh_secsize, sizeof(uint64_t))) { -+ dtrace_dof_error(dof, "misaligned section size"); -+ return -1; -+ } -+ -+ /* -+ * Take an initial pass through the section headers to be sure that -+ * the headers don't have stray offsets. If the 'noprobes' flag is -+ * set, do not permit sections relating to providers, probes, or args. -+ */ -+ dt_dbg_dof(" DOF 0x%p Checking section offsets...\n", dof); -+ -+ for (i = 0; i < dof->dofh_secnum; i++) { -+ struct dof_sec *sec; -+ -+ sec = (struct dof_sec *)(daddr + (uintptr_t)dof->dofh_secoff + -+ i * dof->dofh_secsize); -+ -+ if (noprobes) { -+ switch (sec->dofs_type) { -+ case DOF_SECT_PROVIDER: -+ case DOF_SECT_PROBES: -+ case DOF_SECT_PRARGS: -+ case DOF_SECT_PROFFS: -+ dtrace_dof_error( -+ dof, "illegal sections for enabling"); -+ return -1; -+ } -+ } -+ -+ if (DOF_SEC_ISLOADABLE(sec->dofs_type) && -+ !(sec->dofs_flags & DOF_SECF_LOAD)) { -+ dtrace_dof_error( -+ dof, "loadable section with load flag unset"); -+ return -1; -+ } -+ -+ /* -+ * Just ignore non-loadable sections. -+ */ -+ if (!(sec->dofs_flags & DOF_SECF_LOAD)) -+ continue; -+ -+ if (sec->dofs_align & (sec->dofs_align - 1)) { -+ dtrace_dof_error(dof, "bad section alignment"); -+ return -1; -+ } -+ -+ if (sec->dofs_offset & (sec->dofs_align - 1)) { -+ dtrace_dof_error(dof, "misaligned section"); -+ return -1; -+ } -+ -+ if (sec->dofs_offset > len || sec->dofs_size > len || -+ sec->dofs_offset + sec->dofs_size > len) { -+ dtrace_dof_error(dof, "corrupt section header"); -+ return -1; -+ } -+ -+ if (sec->dofs_type == DOF_SECT_STRTAB && *((char *)daddr + -+ sec->dofs_offset + sec->dofs_size - 1) != '\0') { -+ dtrace_dof_error(dof, "non-terminating string table"); -+ return -1; -+ } -+ } -+ -+ /* -+ * Take a second pass through the sections and locate and perform any -+ * relocations that are present. We do this after the first pass to -+ * be sure that all sections have had their headers validated. -+ */ -+ dt_dbg_dof(" DOF 0x%p Performing relocations...\n", dof); -+ -+ for (i = 0; i < dof->dofh_secnum; i++) { -+ struct dof_sec *sec; -+ -+ sec = (struct dof_sec *)(daddr + (uintptr_t)dof->dofh_secoff + -+ i * dof->dofh_secsize); -+ -+ /* -+ * Skip sections that are not loadable. -+ */ -+ if (!(sec->dofs_flags & DOF_SECF_LOAD)) -+ continue; -+ -+ switch (sec->dofs_type) { -+ case DOF_SECT_URELHDR: -+ if (dtrace_dof_relocate(dof, sec, ubase) != 0) -+ return -1; -+ break; -+ } -+ } -+ -+ dt_dbg_dof(" DOF 0x%p Processing enablings...\n", dof); -+ -+ enab = *enabp; -+ if (enab == NULL) -+ enab = *enabp = dtrace_enabling_create(vstate); -+ -+ if (enab == NULL) { -+ dt_dbg_dof(" DOF 0x%p Done slurping - no enablings\n", dof); -+ return -1; -+ } -+ -+ for (i = 0; i < dof->dofh_secnum; i++) { -+ struct dof_sec *sec; -+ -+ sec = (struct dof_sec *)(daddr + (uintptr_t)dof->dofh_secoff + -+ i * dof->dofh_secsize); -+ -+ if (sec->dofs_type != DOF_SECT_ECBDESC) -+ continue; -+ -+ ep = dtrace_dof_ecbdesc(dof, sec, vstate, cr); -+ if (ep == NULL) { -+ dt_dbg_dof(" DOF 0x%p Done slurping - ECB problem\n", -+ dof); -+ dtrace_enabling_destroy(enab); -+ *enabp = NULL; -+ return -1; -+ } -+ -+ dtrace_enabling_add(enab, ep); -+ } -+ -+ dt_dbg_dof(" DOF 0x%p Enablings processed\n", dof); -+ dt_dbg_dof(" DOF 0x%p Done slurping\n", dof); -+ -+ return 0; -+} -+ -+/* -+ * Process DOF for any options. This should be called after the DOF has been -+ * processed by dtrace_dof_slurp(). -+ */ -+int dtrace_dof_options(struct dof_hdr *dof, struct dtrace_state *state) -+{ -+ int i, rval; -+ uint32_t entsize; -+ size_t offs; -+ struct dof_optdesc *desc; -+ -+ for (i = 0; i < dof->dofh_secnum; i++) { -+ struct dof_sec *sec; -+ -+ sec = (struct dof_sec *)((uintptr_t)dof + -+ (uintptr_t)dof->dofh_secoff + -+ i * dof->dofh_secsize); -+ -+ if (sec->dofs_type != DOF_SECT_OPTDESC) -+ continue; -+ -+ if (sec->dofs_align != sizeof(uint64_t)) { -+ dtrace_dof_error( -+ dof, "bad alignment in option description"); -+ return -EINVAL; -+ } -+ -+ entsize = sec->dofs_entsize; -+ if (entsize == 0) { -+ dtrace_dof_error(dof, "zeroed option entry size"); -+ return -EINVAL; -+ } -+ -+ if (entsize < sizeof(struct dof_optdesc)) { -+ dtrace_dof_error(dof, "bad option entry size"); -+ return -EINVAL; -+ } -+ -+ for (offs = 0; offs < sec->dofs_size; offs += entsize) { -+ desc = (struct dof_optdesc *)((uintptr_t)dof + -+ (uintptr_t)sec->dofs_offset + -+ offs); -+ -+ if (desc->dofo_strtab != DOF_SECIDX_NONE) { -+ dtrace_dof_error( -+ dof, "non-zero option string"); -+ return -EINVAL; -+ } -+ -+ if (desc->dofo_value == DTRACEOPT_UNSET) { -+ dtrace_dof_error(dof, "unset option"); -+ return -EINVAL; -+ } -+ -+ rval = dtrace_state_option(state, desc->dofo_option, -+ desc->dofo_value); -+ if (rval != 0) { -+ dtrace_dof_error(dof, "rejected option"); -+ return rval; -+ } -+ } -+ } -+ -+ return 0; -+} -+ -+static struct dtrace_helpers *dtrace_helpers_create(struct task_struct *curr) -+{ -+ struct dtrace_helpers *dth; -+ -+ ASSERT(MUTEX_HELD(&dtrace_lock)); -+ -+ if (curr->dt_task == NULL) -+ return NULL; -+ -+ ASSERT(curr->dt_task->dt_helpers == NULL); -+ -+ dth = kzalloc(sizeof(struct dtrace_helpers), GFP_KERNEL); -+ if (dth == NULL) -+ return NULL; -+ -+ dth->dthps_actions = vzalloc(sizeof(struct dtrace_helper_action *) * -+ DTRACE_NHELPER_ACTIONS); -+ if (dth->dthps_actions == NULL) { -+ kfree(dth); -+ return NULL; -+ } -+ -+ curr->dt_task->dt_helpers = dth; -+ dtrace_helpers++; -+ -+ dt_dbg_dof(" Helpers allocated for task 0x%p (%d system-wide)\n", -+ curr, dtrace_helpers); -+ -+ return dth; -+} -+ -+static int dtrace_helper_validate(struct dtrace_helper_action *helper) -+{ -+ int err = 0, i; -+ struct dtrace_difo *dp; -+ -+ dp = helper->dtha_predicate; -+ if (dp != NULL) -+ err += dtrace_difo_validate_helper(dp); -+ -+ for (i = 0; i < helper->dtha_nactions; i++) -+ err += dtrace_difo_validate_helper(helper->dtha_actions[i]); -+ -+ return (err == 0); -+} -+ -+static int dtrace_helper_provider_validate(struct dof_hdr *dof, -+ struct dof_sec *sec) -+{ -+ uintptr_t daddr = (uintptr_t)dof; -+ struct dof_sec *str_sec, *prb_sec, *arg_sec, *off_sec, -+ *enoff_sec; -+ struct dof_provider *prov; -+ struct dof_probe *prb; -+ uint8_t *arg; -+ char *strtab, *typestr; -+ dof_stridx_t typeidx; -+ size_t typesz; -+ uint_t nprobes, j, k; -+ -+ ASSERT(sec->dofs_type == DOF_SECT_PROVIDER); -+ -+ if (sec->dofs_offset & (sizeof(uint_t) - 1)) { -+ dtrace_dof_error(dof, "misaligned section offset"); -+ return -1; -+ } -+ -+ /* -+ * The section needs to be large enough to contain the DOF provider -+ * structure appropriate for the given version. -+ */ -+ if (sec->dofs_size < -+ ((dof->dofh_ident[DOF_ID_VERSION] == DOF_VERSION_1) -+ ? offsetof(struct dof_provider, dofpv_prenoffs) -+ : sizeof(struct dof_provider))) { -+ dtrace_dof_error(dof, "provider section too small"); -+ return -1; -+ } -+ -+ prov = (struct dof_provider *)(uintptr_t)(daddr + sec->dofs_offset); -+ str_sec = dtrace_dof_sect(dof, DOF_SECT_STRTAB, prov->dofpv_strtab); -+ prb_sec = dtrace_dof_sect(dof, DOF_SECT_PROBES, prov->dofpv_probes); -+ arg_sec = dtrace_dof_sect(dof, DOF_SECT_PRARGS, prov->dofpv_prargs); -+ off_sec = dtrace_dof_sect(dof, DOF_SECT_PROFFS, prov->dofpv_proffs); -+ -+ if (str_sec == NULL || prb_sec == NULL || -+ arg_sec == NULL || off_sec == NULL) -+ return -1; -+ -+ enoff_sec = NULL; -+ -+ if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 && -+ prov->dofpv_prenoffs != DOF_SECT_NONE) { -+ enoff_sec = dtrace_dof_sect(dof, DOF_SECT_PRENOFFS, -+ prov->dofpv_prenoffs); -+ -+ if (enoff_sec == NULL) -+ return -1; -+ } -+ -+ strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset); -+ -+ if (prov->dofpv_name >= str_sec->dofs_size || -+ strlen(strtab + prov->dofpv_name) >= DTRACE_PROVNAMELEN) { -+ dtrace_dof_error(dof, "invalid provider name"); -+ return -1; -+ } -+ -+ if (prb_sec->dofs_entsize == 0 || -+ prb_sec->dofs_entsize > prb_sec->dofs_size) { -+ dtrace_dof_error(dof, "invalid entry size"); -+ return -1; -+ } -+ -+ if (prb_sec->dofs_entsize & (sizeof(uintptr_t) - 1)) { -+ dtrace_dof_error(dof, "misaligned entry size"); -+ return -1; -+ } -+ -+ if (off_sec->dofs_entsize != sizeof(uint32_t)) { -+ dtrace_dof_error(dof, "invalid entry size"); -+ return -1; -+ } -+ -+ if (off_sec->dofs_offset & (sizeof(uint32_t) - 1)) { -+ dtrace_dof_error(dof, "misaligned section offset"); -+ return -1; -+ } -+ -+ if (arg_sec->dofs_entsize != sizeof(uint8_t)) { -+ dtrace_dof_error(dof, "invalid entry size"); -+ return -1; -+ } -+ -+ arg = (uint8_t *)(uintptr_t)(daddr + arg_sec->dofs_offset); -+ nprobes = prb_sec->dofs_size / prb_sec->dofs_entsize; -+ -+ dt_dbg_dof(" DOF 0x%p %s::: with %d probes\n", -+ dof, strtab + prov->dofpv_name, nprobes); -+ -+ /* -+ * Take a pass through the probes to check for errors. -+ */ -+ for (j = 0; j < nprobes; j++) { -+ prb = (struct dof_probe *)(uintptr_t) -+ (daddr + prb_sec->dofs_offset + -+ j * prb_sec->dofs_entsize); -+ -+ if (prb->dofpr_func >= str_sec->dofs_size) { -+ dtrace_dof_error(dof, "invalid function name"); -+ return -1; -+ } -+ -+ if (strlen(strtab + prb->dofpr_func) >= DTRACE_FUNCNAMELEN) { -+ dtrace_dof_error(dof, "function name too long"); -+ return -1; -+ } -+ -+ if (prb->dofpr_name >= str_sec->dofs_size || -+ strlen(strtab + prb->dofpr_name) >= DTRACE_NAMELEN) { -+ dtrace_dof_error(dof, "invalid probe name"); -+ return -1; -+ } -+ -+ /* -+ * The offset count must not wrap the index, and the offsets -+ * must also not overflow the section's data. -+ */ -+ if (prb->dofpr_offidx + prb->dofpr_noffs < prb->dofpr_offidx || -+ (prb->dofpr_offidx + prb->dofpr_noffs) * -+ off_sec->dofs_entsize > off_sec->dofs_size) { -+ dtrace_dof_error(dof, "invalid probe offset"); -+ return -1; -+ } -+ -+ if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1) { -+ /* -+ * If there's no is-enabled offset section, make sure -+ * there aren't any is-enabled offsets. Otherwise -+ * perform the same checks as for probe offsets -+ * (immediately above). -+ */ -+ if (enoff_sec == NULL) { -+ if (prb->dofpr_enoffidx != 0 || -+ prb->dofpr_nenoffs != 0) { -+ dtrace_dof_error(dof, -+ "is-enabled offsets " -+ "with null section"); -+ return -1; -+ } -+ } else if (prb->dofpr_enoffidx + prb->dofpr_nenoffs < -+ prb->dofpr_enoffidx || -+ (prb->dofpr_enoffidx + prb->dofpr_nenoffs) * -+ enoff_sec->dofs_entsize > -+ enoff_sec->dofs_size) { -+ dtrace_dof_error(dof, "invalid is-enabled " -+ "offset"); -+ return -1; -+ } -+ -+ if (prb->dofpr_noffs + prb->dofpr_nenoffs == 0) { -+ dtrace_dof_error(dof, "zero probe and " -+ "is-enabled offsets"); -+ return -1; -+ } -+ } else if (prb->dofpr_noffs == 0) { -+ dtrace_dof_error(dof, "zero probe offsets"); -+ return -1; -+ } -+ -+ if (prb->dofpr_argidx + prb->dofpr_xargc < prb->dofpr_argidx || -+ (prb->dofpr_argidx + prb->dofpr_xargc) * -+ arg_sec->dofs_entsize > arg_sec->dofs_size) { -+ dtrace_dof_error(dof, "invalid args"); -+ return -1; -+ } -+ -+ typeidx = prb->dofpr_nargv; -+ typestr = strtab + prb->dofpr_nargv; -+ for (k = 0; k < prb->dofpr_nargc; k++) { -+ if (typeidx >= str_sec->dofs_size) { -+ dtrace_dof_error(dof, "bad native argument " -+ "type"); -+ return -1; -+ } -+ -+ typesz = strlen(typestr) + 1; -+ if (typesz > DTRACE_ARGTYPELEN) { -+ dtrace_dof_error(dof, "native argument type " -+ "too long"); -+ return -1; -+ } -+ -+ typeidx += typesz; -+ typestr += typesz; -+ } -+ -+ typeidx = prb->dofpr_xargv; -+ typestr = strtab + prb->dofpr_xargv; -+ for (k = 0; k < prb->dofpr_xargc; k++) { -+ if (arg[prb->dofpr_argidx + k] > prb->dofpr_nargc) { -+ dtrace_dof_error(dof, "bad native argument " -+ "index"); -+ return -1; -+ } -+ -+ if (typeidx >= str_sec->dofs_size) { -+ dtrace_dof_error(dof, "bad translated " -+ "argument type"); -+ return -1; -+ } -+ -+ typesz = strlen(typestr) + 1; -+ if (typesz > DTRACE_ARGTYPELEN) { -+ dtrace_dof_error(dof, "translated argument " -+ "type too long"); -+ return -1; -+ } -+ -+ typeidx += typesz; -+ typestr += typesz; -+ } -+ -+ dt_dbg_dof(" Probe %d %s:%s:%s:%s with %d offsets, " -+ "%d is-enabled offsets\n", j, -+ strtab + prov->dofpv_name, "", -+ strtab + prb->dofpr_func, strtab + prb->dofpr_name, -+ prb->dofpr_noffs, prb->dofpr_nenoffs); -+ } -+ -+ return 0; -+} -+ -+static void dtrace_helper_action_destroy(struct dtrace_helper_action *helper, -+ struct dtrace_vstate *vstate) -+{ -+ int i; -+ -+ if (helper->dtha_predicate != NULL) -+ dtrace_difo_release(helper->dtha_predicate, vstate); -+ -+ for (i = 0; i < helper->dtha_nactions; i++) { -+ ASSERT(helper->dtha_actions[i] != NULL); -+ dtrace_difo_release(helper->dtha_actions[i], vstate); -+ } -+ -+ vfree(helper->dtha_actions); -+ kfree(helper); -+} -+ -+static int dtrace_helper_action_add(int which, struct dtrace_ecbdesc *ep) -+{ -+ struct dtrace_helpers *dth; -+ struct dtrace_helper_action *helper, *last; -+ struct dtrace_actdesc *act; -+ struct dtrace_vstate *vstate; -+ struct dtrace_predicate *pred; -+ int count = 0, nactions = 0, i; -+ -+ if (which < 0 || which >= DTRACE_NHELPER_ACTIONS) -+ return -EINVAL; -+ -+ if (current->dt_task == NULL) -+ return -ENOMEM; -+ -+ dth = current->dt_task->dt_helpers; -+ last = dth->dthps_actions[which]; -+ vstate = &dth->dthps_vstate; -+ -+ for (count = 0; last != NULL; last = last->dtha_next) { -+ count++; -+ if (last->dtha_next == NULL) -+ break; -+ } -+ -+ /* -+ * If we already have dtrace_helper_actions_max helper actions for this -+ * helper action type, we'll refuse to add a new one. -+ */ -+ if (count >= dtrace_helper_actions_max) -+ return -ENOSPC; -+ -+ helper = kzalloc(sizeof(struct dtrace_helper_action), GFP_KERNEL); -+ if (helper == NULL) -+ return -ENOMEM; -+ -+ helper->dtha_generation = dth->dthps_generation; -+ -+ pred = ep->dted_pred.dtpdd_predicate; -+ if (pred != NULL) { -+ ASSERT(pred->dtp_difo != NULL); -+ dtrace_difo_hold(pred->dtp_difo); -+ helper->dtha_predicate = pred->dtp_difo; -+ } -+ -+ for (act = ep->dted_action; act != NULL; act = act->dtad_next) { -+ if (act->dtad_kind != DTRACEACT_DIFEXPR) -+ goto err; -+ -+ if (act->dtad_difo == NULL) -+ goto err; -+ -+ nactions++; -+ } -+ -+ helper->dtha_actions = vzalloc(sizeof(struct dtrace_difo *) * -+ (helper->dtha_nactions = nactions)); -+ if (helper->dtha_actions == NULL) -+ goto err; -+ -+ for (act = ep->dted_action, i = 0; act != NULL; act = act->dtad_next) { -+ dtrace_difo_hold(act->dtad_difo); -+ helper->dtha_actions[i++] = act->dtad_difo; -+ } -+ -+ if (!dtrace_helper_validate(helper)) -+ goto err; -+ -+ if (last == NULL) -+ dth->dthps_actions[which] = helper; -+ else -+ last->dtha_next = helper; -+ -+ if (vstate->dtvs_nlocals > dtrace_helptrace_nlocals) { -+ dtrace_helptrace_nlocals = vstate->dtvs_nlocals; -+ dtrace_helptrace_next = 0; -+ } -+ -+ return 0; -+ -+err: -+ dtrace_helper_action_destroy(helper, vstate); -+ if (helper->dtha_actions != NULL) -+ vfree(helper->dtha_actions); -+ else -+ return -ENOMEM; -+ -+ return -EINVAL; -+} -+ -+static int dtrace_helper_provider_add(struct dof_helper *dofhp, int gen) -+{ -+ struct dtrace_helpers *dth; -+ struct dtrace_helper_provider *hprov, **tmp_provs; -+ uint_t tmp_maxprovs, i; -+ -+ ASSERT(MUTEX_HELD(&dtrace_lock)); -+ -+ if (current->dt_task == NULL) -+ return -ENOMEM; -+ -+ dth = current->dt_task->dt_helpers; -+ ASSERT(dth != NULL); -+ -+ /* -+ * If we already have dtrace_helper_providers_max helper providers, -+ * we're refuse to add a new one. -+ */ -+ if (dth->dthps_nprovs >= dtrace_helper_providers_max) -+ return -ENOSPC; -+ -+ /* -+ * Check to make sure this isn't a duplicate. -+ */ -+ for (i = 0; i < dth->dthps_nprovs; i++) { -+ if (dofhp->dofhp_addr == -+ dth->dthps_provs[i]->dthp_prov.dofhp_addr) -+ return -EALREADY; -+ } -+ -+ hprov = kzalloc(sizeof(struct dtrace_helper_provider), GFP_KERNEL); -+ if (hprov == NULL) -+ return -ENOMEM; -+ hprov->dthp_prov = *dofhp; -+ hprov->dthp_ref = 1; -+ hprov->dthp_generation = gen; -+ -+ /* -+ * Allocate a bigger table for helper providers if it's already full. -+ */ -+ if (dth->dthps_maxprovs == dth->dthps_nprovs) { -+ tmp_maxprovs = dth->dthps_maxprovs; -+ tmp_provs = dth->dthps_provs; -+ -+ if (dth->dthps_maxprovs == 0) -+ dth->dthps_maxprovs = 2; -+ else -+ dth->dthps_maxprovs *= 2; -+ -+ if (dth->dthps_maxprovs > dtrace_helper_providers_max) -+ dth->dthps_maxprovs = dtrace_helper_providers_max; -+ -+ ASSERT(tmp_maxprovs < dth->dthps_maxprovs); -+ -+ dth->dthps_provs = -+ vzalloc(dth->dthps_maxprovs * -+ sizeof(struct dtrace_helper_provider *)); -+ -+ if (dth->dthps_provs == NULL) { -+ kfree(hprov); -+ return -ENOMEM; -+ } -+ -+ if (tmp_provs != NULL) { -+ memcpy(dth->dthps_provs, tmp_provs, -+ tmp_maxprovs * -+ sizeof(struct dtrace_helper_provider *)); -+ vfree(tmp_provs); -+ } -+ } -+ -+ dth->dthps_provs[dth->dthps_nprovs] = hprov; -+ dth->dthps_nprovs++; -+ -+ return 0; -+} -+ -+static void dtrace_helper_provider_destroy(struct dtrace_helper_provider *hprov) -+{ -+ mutex_lock(&dtrace_lock); -+ -+ if (--hprov->dthp_ref == 0) { -+ struct dof_hdr *dof; -+ -+ mutex_unlock(&dtrace_lock); -+ -+ dof = (struct dof_hdr *)(uintptr_t)hprov->dthp_prov.dofhp_dof; -+ dtrace_dof_destroy(dof); -+ kfree(hprov); -+ } else -+ mutex_unlock(&dtrace_lock); -+} -+ -+static void dtrace_dofattr2attr(struct dtrace_attribute *attr, -+ const dof_attr_t dofattr) -+{ -+ attr->dtat_name = DOF_ATTR_NAME(dofattr); -+ attr->dtat_data = DOF_ATTR_DATA(dofattr); -+ attr->dtat_class = DOF_ATTR_CLASS(dofattr); -+} -+ -+static void dtrace_dofprov2hprov(struct dtrace_helper_provdesc *hprov, -+ const struct dof_provider *dofprov, -+ char *strtab) -+{ -+ hprov->dthpv_provname = strtab + dofprov->dofpv_name; -+ dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_provider, -+ dofprov->dofpv_provattr); -+ dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_mod, -+ dofprov->dofpv_modattr); -+ dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_func, -+ dofprov->dofpv_funcattr); -+ dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_name, -+ dofprov->dofpv_nameattr); -+ dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_args, -+ dofprov->dofpv_argsattr); -+} -+ -+static void dtrace_helper_provider_remove_one(struct dof_helper *dhp, -+ struct dof_sec *sec, pid_t pid) -+{ -+ uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; -+ struct dof_hdr *dof = (struct dof_hdr *)daddr; -+ struct dof_sec *str_sec; -+ struct dof_provider *prov; -+ char *strtab; -+ struct dtrace_helper_provdesc dhpv; -+ struct dtrace_meta *meta = dtrace_meta_pid; -+ struct dtrace_mops *mops = &meta->dtm_mops; -+ -+ prov = (struct dof_provider *)(uintptr_t)(daddr + sec->dofs_offset); -+ str_sec = (struct dof_sec *)(uintptr_t)(daddr + dof->dofh_secoff + -+ prov->dofpv_strtab * -+ dof->dofh_secsize); -+ -+ strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset); -+ -+ /* -+ * Create the provider. -+ */ -+ dtrace_dofprov2hprov(&dhpv, prov, strtab); -+ -+ dt_dbg_dof(" Removing provider %s for PID %d\n", -+ dhpv.dthpv_provname, pid); -+ -+ mops->dtms_remove_pid(meta->dtm_arg, &dhpv, pid); -+ -+ meta->dtm_count--; -+} -+ -+static void dtrace_helper_provider_remove(struct dof_helper *dhp, pid_t pid) -+{ -+ uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; -+ struct dof_hdr *dof = (struct dof_hdr *)daddr; -+ int i; -+ -+ ASSERT(MUTEX_HELD(&dtrace_meta_lock)); -+ -+ for (i = 0; i < dof->dofh_secnum; i++) { -+ struct dof_sec *sec; -+ -+ sec = (struct dof_sec *)(uintptr_t) (daddr + dof->dofh_secoff + -+ i * dof->dofh_secsize); -+ -+ if (sec->dofs_type != DOF_SECT_PROVIDER) -+ continue; -+ -+ dtrace_helper_provider_remove_one(dhp, sec, pid); -+ } -+} -+ -+static void dtrace_helper_provide_one(struct dof_helper *dhp, -+ struct dof_sec *sec, -+ pid_t pid) -+{ -+ uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; -+ uint32_t *off, *enoff; -+ uint8_t *arg; -+ char *strtab; -+ uint_t i, nprobes; -+ void *parg; -+ -+ struct dof_hdr *dof = (struct dof_hdr *)daddr; -+ struct dof_sec *str_sec, *prb_sec, *arg_sec, *off_sec, -+ *enoff_sec; -+ struct dof_provider *prov; -+ struct dof_probe *probe; -+ struct dtrace_helper_provdesc dhpv; -+ struct dtrace_helper_probedesc dhpb; -+ struct dtrace_meta *meta = dtrace_meta_pid; -+ struct dtrace_mops *mops = &meta->dtm_mops; -+ -+ prov = (struct dof_provider *)(uintptr_t)(daddr + sec->dofs_offset); -+ str_sec = (struct dof_sec *)(uintptr_t)(daddr + dof->dofh_secoff + -+ prov->dofpv_strtab * -+ dof->dofh_secsize); -+ prb_sec = (struct dof_sec *)(uintptr_t)(daddr + dof->dofh_secoff + -+ prov->dofpv_probes * -+ dof->dofh_secsize); -+ arg_sec = (struct dof_sec *)(uintptr_t)(daddr + dof->dofh_secoff + -+ prov->dofpv_prargs * -+ dof->dofh_secsize); -+ off_sec = (struct dof_sec *)(uintptr_t)(daddr + dof->dofh_secoff + -+ prov->dofpv_proffs * -+ dof->dofh_secsize); -+ -+ strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset); -+ off = (uint32_t *)(uintptr_t)(daddr + off_sec->dofs_offset); -+ arg = (uint8_t *)(uintptr_t)(daddr + arg_sec->dofs_offset); -+ enoff = NULL; -+ -+ /* -+ * See dtrace_helper_provider_validate(). -+ */ -+ if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 && -+ prov->dofpv_prenoffs != DOF_SECT_NONE) { -+ enoff_sec = (struct dof_sec *)(uintptr_t) -+ (daddr + dof->dofh_secoff + -+ prov->dofpv_prenoffs * dof->dofh_secsize); -+ enoff = (uint32_t *)(uintptr_t) -+ (daddr + enoff_sec->dofs_offset); -+ } -+ -+ nprobes = prb_sec->dofs_size / prb_sec->dofs_entsize; -+ -+ /* -+ * Create the provider. -+ */ -+ dtrace_dofprov2hprov(&dhpv, prov, strtab); -+ -+ dt_dbg_dof(" Creating provider %s for PID %d\n", -+ strtab + prov->dofpv_name, pid); -+ -+ /* -+ * This used to just 'return;' when parg is NULL, but that causes the -+ * cleanup code (dtrace_helper_provider_remove[_one]) to make a call -+ * to dtms_remove_pid() for a provider that never got created. -+ * -+ * If we fail to provide this provider, mark it as something to ignore, -+ * so we don't try to process it during cleanup. -+ */ -+ parg = mops->dtms_provide_pid(meta->dtm_arg, &dhpv, pid); -+ if (parg == NULL) { -+ sec->dofs_type = DOF_SECT_NONE; -+ return; -+ } -+ -+ meta->dtm_count++; -+ -+ /* -+ * Create the probes. -+ */ -+ for (i = 0; i < nprobes; i++) { -+ probe = (struct dof_probe *)(uintptr_t)(daddr + -+ prb_sec->dofs_offset + -+ i * prb_sec->dofs_entsize); -+ -+ dhpb.dthpb_mod = dhp->dofhp_mod; -+ dhpb.dthpb_func = strtab + probe->dofpr_func; -+ dhpb.dthpb_name = strtab + probe->dofpr_name; -+ dhpb.dthpb_base = probe->dofpr_addr; -+ dhpb.dthpb_offs = off + probe->dofpr_offidx; -+ dhpb.dthpb_noffs = probe->dofpr_noffs; -+ -+ if (enoff != NULL) { -+ dhpb.dthpb_enoffs = enoff + probe->dofpr_enoffidx; -+ dhpb.dthpb_nenoffs = probe->dofpr_nenoffs; -+ } else { -+ dhpb.dthpb_enoffs = NULL; -+ dhpb.dthpb_nenoffs = 0; -+ } -+ -+ dhpb.dthpb_args = arg + probe->dofpr_argidx; -+ dhpb.dthpb_nargc = probe->dofpr_nargc; -+ dhpb.dthpb_xargc = probe->dofpr_xargc; -+ dhpb.dthpb_ntypes = strtab + probe->dofpr_nargv; -+ dhpb.dthpb_xtypes = strtab + probe->dofpr_xargv; -+ -+ dt_dbg_dof(" Creating probe %s:%s:%s:%s\n", -+ strtab + prov->dofpv_name, "", dhpb.dthpb_func, -+ dhpb.dthpb_name); -+ -+ mops->dtms_create_probe(meta->dtm_arg, parg, &dhpb); -+ } -+} -+ -+void dtrace_helper_provide(struct dof_helper *dhp, pid_t pid) -+{ -+ uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; -+ struct dof_hdr *dof = (struct dof_hdr *)daddr; -+ int i; -+ -+ ASSERT(MUTEX_HELD(&dtrace_meta_lock)); -+ -+ for (i = 0; i < dof->dofh_secnum; i++) { -+ struct dof_sec *sec; -+ -+ sec = (struct dof_sec *)(uintptr_t) (daddr + dof->dofh_secoff + -+ i * dof->dofh_secsize); -+ -+ if (sec->dofs_type != DOF_SECT_PROVIDER) -+ continue; -+ -+ dtrace_helper_provide_one(dhp, sec, pid); -+ } -+ -+ /* -+ * We may have just created probes, so we must now rematch against any -+ * retained enablings. Note that this call will acquire both cpu_lock -+ * and dtrace_lock; the fact that we are holding dtrace_meta_lock now -+ * is what defines the ordering with respect to these three locks. -+ */ -+ dt_dbg_dof(" Re-matching against any retained enablings\n"); -+ dtrace_enabling_matchall(); -+} -+ -+static void dtrace_helper_provider_register(struct task_struct *tsk, -+ struct dtrace_helpers *dth, -+ struct dof_helper *dofhp) -+{ -+ ASSERT(!MUTEX_HELD(&dtrace_lock)); -+ -+ mutex_lock(&dtrace_meta_lock); -+ mutex_lock(&dtrace_lock); -+ -+ if (!dtrace_attached() || dtrace_meta_pid == NULL) { -+ dt_dbg_dof(" No meta provider registered -- deferred\n"); -+ -+ /* -+ * If the dtrace module is loaded but not attached, or if there -+ * isn't a meta provider registered to deal with these provider -+ * descriptions, we need to postpone creating the actual -+ * providers until later. -+ */ -+ if (dth->dthps_next == NULL && dth->dthps_prev == NULL && -+ dtrace_deferred_pid != dth) { -+ dth->dthps_deferred = 1; -+ dth->dthps_pid = tsk->pid; -+ dth->dthps_next = dtrace_deferred_pid; -+ dth->dthps_prev = NULL; -+ if (dtrace_deferred_pid != NULL) -+ dtrace_deferred_pid->dthps_prev = dth; -+ dtrace_deferred_pid = dth; -+ } -+ -+ mutex_unlock(&dtrace_lock); -+ } else if (dofhp != NULL) { -+ /* -+ * If the dtrace module is loaded and we have a particular -+ * helper provider description, pass that off to the meta -+ * provider. -+ */ -+ mutex_unlock(&dtrace_lock); -+ -+ dtrace_helper_provide(dofhp, tsk->pid); -+ } else { -+ /* -+ * Otherwise, just pass all the helper provider descriptions -+ * off to the meta provider. -+ */ -+ int i; -+ -+ mutex_unlock(&dtrace_lock); -+ -+ for (i = 0; i < dth->dthps_nprovs; i++) { -+ dtrace_helper_provide(&dth->dthps_provs[i]->dthp_prov, -+ tsk->pid); -+ } -+ } -+ -+ mutex_unlock(&dtrace_meta_lock); -+} -+ -+int dtrace_helper_slurp(struct dof_hdr *dof, struct dof_helper *dhp) -+{ -+ struct dtrace_helpers *dth; -+ struct dtrace_vstate *vstate; -+ struct dtrace_enabling *enab = NULL; -+ int i, gen, rv; -+ int nhelpers = 0, nprovs = 0, destroy = 1; -+ uintptr_t daddr = (uintptr_t)dof; -+ -+ ASSERT(MUTEX_HELD(&dtrace_lock)); -+ -+ if (current->dt_task == NULL) -+ return -1; -+ -+ dth = current->dt_task->dt_helpers; -+ if (dth == NULL) -+ dth = dtrace_helpers_create(current); -+ -+ if (dth == NULL) { -+ dtrace_dof_destroy(dof); -+ return -1; -+ } -+ -+ dt_dbg_dof("DOF 0x%p from helper {'%s', %p, %p}...\n", -+ dof, dhp ? dhp->dofhp_mod : "<none>", -+ dhp ? (void *)(dhp->dofhp_addr) : NULL, -+ dhp ? (void *)(dhp->dofhp_dof) : NULL); -+ -+ vstate = &dth->dthps_vstate; -+ -+ rv = dtrace_dof_slurp(dof, vstate, NULL, &enab, -+ dhp != NULL ? dhp->dofhp_addr : 0, FALSE); -+ if (rv != 0) { -+ dtrace_dof_destroy(dof); -+ return rv; -+ } -+ -+ /* -+ * Look for helper providers and validate their descriptions. -+ */ -+ if (dhp != NULL) { -+ dt_dbg_dof(" DOF 0x%p Validating providers...\n", dof); -+ -+ for (i = 0; i < dof->dofh_secnum; i++) { -+ struct dof_sec *sec; -+ -+ sec = (struct dof_sec *)(uintptr_t) -+ (daddr + dof->dofh_secoff + -+ i * dof->dofh_secsize); -+ -+ if (sec->dofs_type != DOF_SECT_PROVIDER) -+ continue; -+ -+ if (dtrace_helper_provider_validate(dof, sec) != 0) { -+ dtrace_enabling_destroy(enab); -+ dtrace_dof_destroy(dof); -+ return -1; -+ } -+ -+ nprovs++; -+ } -+ } -+ -+ /* -+ * Now we need to walk through the ECB descriptions in the enabling. -+ */ -+ for (i = 0; i < enab->dten_ndesc; i++) { -+ struct dtrace_ecbdesc *ep = enab->dten_desc[i]; -+ struct dtrace_probedesc *desc = &ep->dted_probe; -+ -+ dt_dbg_dof(" ECB Desc %s:%s:%s:%s\n", -+ desc->dtpd_provider, desc->dtpd_mod, -+ desc->dtpd_func, desc->dtpd_name); -+ if (strcmp(desc->dtpd_provider, "dtrace") != 0) -+ continue; -+ -+ if (strcmp(desc->dtpd_mod, "helper") != 0) -+ continue; -+ -+ if (strcmp(desc->dtpd_func, "ustack") != 0) -+ continue; -+ -+ rv = dtrace_helper_action_add(DTRACE_HELPER_ACTION_USTACK, ep); -+ if (rv != 0) { -+ /* -+ * Adding this helper action failed -- we are now going -+ * to rip out the entire generation and return failure. -+ */ -+ dtrace_helper_destroygen(dth->dthps_generation); -+ dtrace_enabling_destroy(enab); -+ dtrace_dof_destroy(dof); -+ return -1; -+ } -+ -+ nhelpers++; -+ } -+ -+ if (nhelpers < enab->dten_ndesc) -+ dtrace_dof_error(dof, "unmatched helpers"); -+ -+ gen = dth->dthps_generation++; -+ dtrace_enabling_destroy(enab); -+ -+ if (dhp != NULL && nprovs > 0) { -+ dt_dbg_dof(" DOF 0x%p Adding and registering providers\n", -+ dof); -+ -+ dhp->dofhp_dof = (uint64_t)(uintptr_t)dof; -+ if (dtrace_helper_provider_add(dhp, gen) == 0) { -+ mutex_unlock(&dtrace_lock); -+ dtrace_helper_provider_register(current, dth, dhp); -+ mutex_lock(&dtrace_lock); -+ -+ destroy = 0; -+ } -+ } -+ -+ if (destroy) -+ dtrace_dof_destroy(dof); -+ -+ return gen; -+} -+ -+void dtrace_helpers_destroy(struct task_struct *tsk) -+{ -+ struct dtrace_helpers *help; -+ struct dtrace_vstate *vstate; -+ int i; -+ -+ if (tsk->dt_task == NULL) -+ return; -+ -+ mutex_lock(&dtrace_lock); -+ -+ ASSERT(tsk->dt_task->dt_helpers != NULL); -+ ASSERT(dtrace_helpers > 0); -+ -+ dt_dbg_dof("Helper cleanup: PID %d\n", tsk->pid); -+ -+ help = tsk->dt_task->dt_helpers; -+ vstate = &help->dthps_vstate; -+ -+ /* -+ * We're now going to lose the help from this process. -+ */ -+ tsk->dt_task->dt_helpers = NULL; -+ dtrace_sync(); -+ -+ /* -+ * Destroy the helper actions. -+ */ -+ for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) { -+ struct dtrace_helper_action *h, *next; -+ -+ for (h = help->dthps_actions[i]; h != NULL; h = next) { -+ next = h->dtha_next; -+ dtrace_helper_action_destroy(h, vstate); -+ h = next; -+ } -+ } -+ -+ mutex_unlock(&dtrace_lock); -+ -+ /* -+ * Destroy the helper providers. -+ */ -+ if (help->dthps_maxprovs > 0) { -+ mutex_lock(&dtrace_meta_lock); -+ if (dtrace_meta_pid != NULL) { -+ ASSERT(dtrace_deferred_pid == NULL); -+ -+ for (i = 0; i < help->dthps_nprovs; i++) { -+ dtrace_helper_provider_remove( -+ &help->dthps_provs[i]->dthp_prov, -+ tsk->pid); -+ } -+ } else { -+ mutex_lock(&dtrace_lock); -+ ASSERT(help->dthps_deferred == 0 || -+ help->dthps_next != NULL || -+ help->dthps_prev != NULL || -+ help == dtrace_deferred_pid); -+ -+ /* -+ * Remove the helper from the deferred list. -+ */ -+ if (help->dthps_next != NULL) -+ help->dthps_next->dthps_prev = help->dthps_prev; -+ if (help->dthps_prev != NULL) -+ help->dthps_prev->dthps_next = help->dthps_next; -+ if (dtrace_deferred_pid == help) { -+ dtrace_deferred_pid = help->dthps_next; -+ ASSERT(help->dthps_prev == NULL); -+ } -+ -+ mutex_unlock(&dtrace_lock); -+ } -+ -+ mutex_unlock(&dtrace_meta_lock); -+ -+ for (i = 0; i < help->dthps_nprovs; i++) -+ dtrace_helper_provider_destroy(help->dthps_provs[i]); -+ -+ vfree(help->dthps_provs); -+ } -+ -+ mutex_lock(&dtrace_lock); -+ -+ dtrace_vstate_fini(&help->dthps_vstate); -+ vfree(help->dthps_actions); -+ kfree(help); -+ -+ --dtrace_helpers; -+ mutex_unlock(&dtrace_lock); -+} -+ -+void dtrace_helpers_duplicate(struct task_struct *from, struct task_struct *to) -+{ -+ struct dtrace_task *dfrom = from->dt_task; -+ struct dtrace_task *dto = to->dt_task; -+ struct dtrace_helpers *help, *newhelp; -+ struct dtrace_helper_action *helper, *new, *last; -+ struct dtrace_difo *dp; -+ struct dtrace_vstate *vstate; -+ -+ int i, j, sz, hasprovs = 0; -+ -+ if (dfrom == NULL || dto == NULL) -+ return; -+ -+ mutex_lock(&dtrace_lock); -+ -+ ASSERT(dfrom->dt_helpers != NULL); -+ ASSERT(dtrace_helpers > 0); -+ -+ help = dfrom->dt_helpers; -+ newhelp = dtrace_helpers_create(to); -+ -+ ASSERT(dto->dt_helpers != NULL); -+ -+ newhelp->dthps_generation = help->dthps_generation; -+ vstate = &newhelp->dthps_vstate; -+ -+ /* -+ * Duplicate the helper actions. -+ */ -+ for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) { -+ helper = help->dthps_actions[i]; -+ if (helper == NULL) -+ continue; -+ -+ for (last = NULL; helper != NULL; helper = helper->dtha_next) { -+ new = kzalloc(sizeof(struct dtrace_helper_action), -+ GFP_KERNEL); -+ new->dtha_generation = helper->dtha_generation; -+ -+ dp = helper->dtha_predicate; -+ if (dp != NULL) { -+ dp = dtrace_difo_duplicate(dp, vstate); -+ new->dtha_predicate = dp; -+ } -+ -+ new->dtha_nactions = helper->dtha_nactions; -+ sz = sizeof(struct dtrace_difo *) * new->dtha_nactions; -+ new->dtha_actions = vmalloc(sz); -+ -+ for (j = 0; j < new->dtha_nactions; j++) { -+ struct dtrace_difo *dp; -+ -+ dp = helper->dtha_actions[j]; -+ ASSERT(dp != NULL); -+ -+ dp = dtrace_difo_duplicate(dp, vstate); -+ new->dtha_actions[j] = dp; -+ } -+ -+ if (last != NULL) -+ last->dtha_next = new; -+ else -+ newhelp->dthps_actions[i] = new; -+ -+ last = new; -+ } -+ } -+ -+ /* -+ * Duplicate the helper providers and register them with the -+ * DTrace framework. -+ */ -+ if (help->dthps_nprovs > 0) { -+ newhelp->dthps_nprovs = help->dthps_nprovs; -+ newhelp->dthps_maxprovs = help->dthps_nprovs; -+ newhelp->dthps_provs = vmalloc( -+ newhelp->dthps_nprovs * -+ sizeof(struct dtrace_helper_provider *)); -+ -+ for (i = 0; i < newhelp->dthps_nprovs; i++) { -+ newhelp->dthps_provs[i] = help->dthps_provs[i]; -+ newhelp->dthps_provs[i]->dthp_ref++; -+ } -+ -+ hasprovs = 1; -+ } -+ -+ mutex_unlock(&dtrace_lock); -+ -+ if (hasprovs) -+ dtrace_helper_provider_register(to, newhelp, NULL); -+} -+ -+int dtrace_helper_destroygen(int gen) -+{ -+ struct task_struct *p = current; -+ struct dtrace_helpers *dth; -+ struct dtrace_vstate *vstate; -+ int i; -+ -+ ASSERT(MUTEX_HELD(&dtrace_lock)); -+ -+ if (current->dt_task == NULL) -+ return -ENOMEM; -+ -+ dth = current->dt_task->dt_helpers; -+ -+ if (dth == NULL || gen > dth->dthps_generation) -+ return -EINVAL; -+ -+ vstate = &dth->dthps_vstate; -+ -+ for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) { -+ struct dtrace_helper_action *last = NULL, *h, *next; -+ -+ for (h = dth->dthps_actions[i]; h != NULL; h = next) { -+ next = h->dtha_next; -+ -+ dt_dbg_dof(" Comparing action (agen %d vs rgen %d)\n", -+ h->dtha_generation, gen); -+ -+ if (h->dtha_generation == gen) { -+ if (last != NULL) -+ last->dtha_next = next; -+ else -+ dth->dthps_actions[i] = next; -+ -+ dtrace_helper_action_destroy(h, vstate); -+ } else -+ last = h; -+ } -+ } -+ -+ /* -+ * Iterate until we've cleared out all helper providers with the given -+ * generation number. -+ */ -+ for (;;) { -+ struct dtrace_helper_provider *prov = NULL; -+ -+ /* -+ * Look for a helper provider with the right generation. We -+ * have to start back at the beginning of the list each time -+ * because we drop dtrace_lock. It's unlikely that we'll make -+ * more than two passes. -+ */ -+ for (i = 0; i < dth->dthps_nprovs; i++) { -+ prov = dth->dthps_provs[i]; -+ -+ if (prov->dthp_generation == gen) -+ break; -+ } -+ -+ /* -+ * If there were no matches, we are done. -+ */ -+ if (i == dth->dthps_nprovs) -+ break; -+ -+ dt_dbg_dof(" Found provider with gen %d\n", gen); -+ -+ /* -+ * Move the last helper provider into this slot. -+ */ -+ dth->dthps_nprovs--; -+ dth->dthps_provs[i] = dth->dthps_provs[dth->dthps_nprovs]; -+ dth->dthps_provs[dth->dthps_nprovs] = NULL; -+ -+ mutex_unlock(&dtrace_lock); -+ -+ /* -+ * If we have a meta provider, remove this helper provider. -+ */ -+ mutex_lock(&dtrace_meta_lock); -+ -+ if (dtrace_meta_pid != NULL) { -+ ASSERT(dtrace_deferred_pid == NULL); -+ -+ dtrace_helper_provider_remove(&prov->dthp_prov, -+ p->pid); -+ } -+ -+ mutex_unlock(&dtrace_meta_lock); -+ -+ dtrace_helper_provider_destroy(prov); -+ -+ mutex_lock(&dtrace_lock); -+ } -+ -+ return 0; -+} -+ -+static void dtrace_helper_trace(struct dtrace_helper_action *helper, -+ struct dtrace_mstate *mstate, -+ struct dtrace_vstate *vstate, int where) -+{ -+ uint32_t size, next, nnext, i; -+ struct dtrace_helptrace *ent; -+ uint16_t flags = this_cpu_core->cpuc_dtrace_flags; -+ -+ if (!dtrace_helptrace_enabled) -+ return; -+ -+ ASSERT(vstate->dtvs_nlocals <= dtrace_helptrace_nlocals); -+ -+ /* -+ * What would a tracing framework be without its own tracing -+ * framework? (Well, a hell of a lot simpler, for starters...) -+ */ -+ size = sizeof(struct dtrace_helptrace) + dtrace_helptrace_nlocals * -+ sizeof(uint64_t) - sizeof(uint64_t); -+ -+ /* -+ * Iterate until we can allocate a slot in the trace buffer. -+ */ -+ do { -+ next = dtrace_helptrace_next; -+ -+ if (next + size < dtrace_helptrace_bufsize) -+ nnext = next + size; -+ else -+ nnext = size; -+ } while (cmpxchg(&dtrace_helptrace_next, next, nnext) != next); -+ -+ /* -+ * We have our slot; fill it in. -+ */ -+ if (nnext == size) -+ next = 0; -+ -+ ent = (struct dtrace_helptrace *)&dtrace_helptrace_buffer[next]; -+ ent->dtht_helper = helper; -+ ent->dtht_where = where; -+ ent->dtht_nlocals = vstate->dtvs_nlocals; -+ -+ ent->dtht_fltoffs = (mstate->dtms_present & DTRACE_MSTATE_FLTOFFS) -+ ? mstate->dtms_fltoffs -+ : -1; -+ ent->dtht_fault = DTRACE_FLAGS2FLT(flags); -+ ent->dtht_illval = this_cpu_core->cpuc_dtrace_illval; -+ -+ for (i = 0; i < vstate->dtvs_nlocals; i++) { -+ struct dtrace_statvar *svar; -+ -+ svar = vstate->dtvs_locals[i]; -+ if (svar == NULL) -+ continue; -+ -+ ASSERT(svar->dtsv_size >= NR_CPUS * sizeof(uint64_t)); -+ ent->dtht_locals[i] = -+ ((uint64_t *)(uintptr_t)svar->dtsv_data)[ -+ smp_processor_id()]; -+ } -+} -+ -+uint64_t dtrace_helper(int which, struct dtrace_mstate *mstate, -+ struct dtrace_state *state, uint64_t arg0, -+ uint64_t arg1) -+{ -+ uint16_t *flags = &this_cpu_core->cpuc_dtrace_flags; -+ uint64_t sarg0 = mstate->dtms_arg[0]; -+ uint64_t sarg1 = mstate->dtms_arg[1]; -+ uint64_t rval = 0; -+ struct dtrace_helpers *helpers; -+ struct dtrace_helper_action *helper; -+ struct dtrace_vstate *vstate; -+ struct dtrace_difo *pred; -+ int i, trace = dtrace_helptrace_enabled; -+ -+ ASSERT(which >= 0 && which < DTRACE_NHELPER_ACTIONS); -+ -+ if (current->dt_task == NULL) -+ return 0; -+ -+ helpers = current->dt_task->dt_helpers; -+ if (helpers == NULL) -+ return 0; -+ -+ helper = helpers->dthps_actions[which]; -+ if (helper == NULL) -+ return 0; -+ -+ vstate = &helpers->dthps_vstate; -+ mstate->dtms_arg[0] = arg0; -+ mstate->dtms_arg[1] = arg1; -+ -+ /* -+ * Now iterate over each helper. If its predicate evaluates to 'true', -+ * we'll call the corresponding actions. Note that the below calls -+ * to dtrace_dif_emulate() may set faults in machine state. This is -+ * okay: our caller (the outer dtrace_dif_emulate()) will simply plow -+ * the stored DIF offset with its own (which is the desired behavior). -+ * Also, note the calls to dtrace_dif_emulate() may allocate scratch -+ * from machine state; this is okay, too. -+ */ -+ for (; helper != NULL; helper = helper->dtha_next) { -+ pred = helper->dtha_predicate; -+ if (pred != NULL) { -+ if (trace) -+ dtrace_helper_trace(helper, mstate, vstate, 0); -+ -+ if (!dtrace_dif_emulate(pred, mstate, vstate, state)) -+ goto next; -+ -+ if (*flags & CPU_DTRACE_FAULT) -+ goto err; -+ } -+ -+ for (i = 0; i < helper->dtha_nactions; i++) { -+ if (trace) -+ dtrace_helper_trace(helper, mstate, vstate, -+ i + 1); -+ -+ rval = dtrace_dif_emulate(helper->dtha_actions[i], -+ mstate, vstate, state); -+ -+ if (*flags & CPU_DTRACE_FAULT) -+ goto err; -+ } -+ -+next: -+ if (trace) -+ dtrace_helper_trace(helper, mstate, vstate, -+ DTRACE_HELPTRACE_NEXT); -+ } -+ -+ if (trace) -+ dtrace_helper_trace(helper, mstate, vstate, -+ DTRACE_HELPTRACE_DONE); -+ -+ /* -+ * Restore the arg0 that we saved upon entry. -+ */ -+ mstate->dtms_arg[0] = sarg0; -+ mstate->dtms_arg[1] = sarg1; -+ -+ return rval; -+ -+err: -+ if (trace) -+ dtrace_helper_trace(helper, mstate, vstate, -+ DTRACE_HELPTRACE_ERR); -+ -+ /* -+ * Restore the arg0 that we saved upon entry. -+ */ -+ mstate->dtms_arg[0] = sarg0; -+ mstate->dtms_arg[1] = sarg1; -+ -+ return 0; -+} -diff --git a/dtrace/dtrace_ecb.c b/dtrace/dtrace_ecb.c -new file mode 100644 -index 000000000000..cc04d1a14661 ---- /dev/null -+++ b/dtrace/dtrace_ecb.c -@@ -0,0 +1,936 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_ecb.c -+ * DESCRIPTION: DTrace - ECB implementation -+ * -+ * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/slab.h> -+#include <linux/vmalloc.h> -+ -+#include "dtrace.h" -+ -+struct dtrace_ecb *dtrace_ecb_create_cache; -+ -+static struct dtrace_action * -+dtrace_ecb_aggregation_create(struct dtrace_ecb *ecb, -+ struct dtrace_actdesc *desc) -+{ -+ struct dtrace_aggregation *agg; -+ size_t size = sizeof(uint64_t); -+ int ntuple = desc->dtad_ntuple; -+ struct dtrace_action *act; -+ struct dtrace_recdesc *frec; -+ dtrace_aggid_t aggid; -+ struct dtrace_state *state = ecb->dte_state; -+ -+ agg = kzalloc(sizeof(struct dtrace_aggregation), GFP_KERNEL); -+ if (agg == NULL) -+ return NULL; -+ -+ agg->dtag_ecb = ecb; -+ -+ ASSERT(DTRACEACT_ISAGG(desc->dtad_kind)); -+ -+ switch (desc->dtad_kind) { -+ case DTRACEAGG_MIN: -+ agg->dtag_initial = INT64_MAX; -+ agg->dtag_aggregate = dtrace_aggregate_min; -+ break; -+ -+ case DTRACEAGG_MAX: -+ agg->dtag_initial = INT64_MIN; -+ agg->dtag_aggregate = dtrace_aggregate_max; -+ break; -+ -+ case DTRACEAGG_COUNT: -+ agg->dtag_aggregate = dtrace_aggregate_count; -+ break; -+ -+ case DTRACEAGG_QUANTIZE: -+ agg->dtag_aggregate = dtrace_aggregate_quantize; -+ size = (((sizeof(uint64_t) * NBBY) - 1) * 2 + 1) * -+ sizeof(uint64_t); -+ break; -+ -+ case DTRACEAGG_LQUANTIZE: { -+ uint16_t step = DTRACE_LQUANTIZE_STEP(desc->dtad_arg); -+ uint16_t levels = -+ DTRACE_LQUANTIZE_LEVELS(desc->dtad_arg); -+ -+ agg->dtag_initial = desc->dtad_arg; -+ agg->dtag_aggregate = dtrace_aggregate_lquantize; -+ -+ if (step == 0 || levels == 0) -+ goto err; -+ -+ size = levels * sizeof(uint64_t) + 3 * sizeof(uint64_t); -+ break; -+ } -+ -+ case DTRACEAGG_LLQUANTIZE: { -+ uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(desc->dtad_arg); -+ uint16_t lmag = DTRACE_LLQUANTIZE_LMAG(desc->dtad_arg); -+ uint16_t hmag = DTRACE_LLQUANTIZE_HMAG(desc->dtad_arg); -+ uint16_t steps = DTRACE_LLQUANTIZE_STEPS(desc->dtad_arg); -+ uint64_t buf64s; -+ -+ agg->dtag_initial = desc->dtad_arg; -+ agg->dtag_aggregate = dtrace_aggregate_llquantize; -+ -+ /* -+ * 64 is the largest hmag can practically be (for the smallest -+ * possible value of factor, 2). libdtrace has already checked -+ * for overflow, so if hmag > 64, we have corrupted DOF. -+ */ -+ if (factor < 2 || steps == 0 || hmag > 64) -+ goto err; -+ -+ /* -+ * The size of the buffer for an llquantize() is given by: -+ * (hmag-lmag+1) logarithmic ranges -+ * x -+ * (steps - steps/factor) bins per range -+ * x -+ * 2 signs -+ * + -+ * two overflow bins -+ * + -+ * one underflow bin -+ * + -+ * beginning word to encode factor,lmag,hmag,steps -+ */ -+ buf64s = ((hmag-lmag+1)*(steps-steps/factor)*2+4); -+ size = buf64s * sizeof(uint64_t); -+ break; -+ } -+ -+ case DTRACEAGG_AVG: -+ agg->dtag_aggregate = dtrace_aggregate_avg; -+ size = sizeof(uint64_t) * 2; -+ break; -+ -+ case DTRACEAGG_STDDEV: -+ agg->dtag_aggregate = dtrace_aggregate_stddev; -+ size = sizeof(uint64_t) * 4; -+ break; -+ -+ case DTRACEAGG_SUM: -+ agg->dtag_aggregate = dtrace_aggregate_sum; -+ break; -+ -+ default: -+ goto err; -+ } -+ -+ agg->dtag_action.dta_rec.dtrd_size = size; -+ -+ if (ntuple == 0) -+ goto err; -+ -+ for (act = ecb->dte_action_last; act != NULL; act = act->dta_prev) { -+ if (DTRACEACT_ISAGG(act->dta_kind)) -+ break; -+ -+ if (--ntuple == 0) { -+ agg->dtag_first = act; -+ goto success; -+ } -+ } -+ -+ ASSERT(ntuple != 0); -+err: -+ kfree(agg); -+ return NULL; -+ -+success: -+ ASSERT(ecb->dte_action_last != NULL); -+ act = ecb->dte_action_last; -+ -+ if (act->dta_kind == DTRACEACT_DIFEXPR) { -+ ASSERT(act->dta_difo != NULL); -+ -+ if (act->dta_difo->dtdo_rtype.dtdt_size == 0) -+ agg->dtag_hasarg = 1; -+ } -+ -+ /* -+ * Get an ID for the aggregation (add it to the idr). -+ */ -+ idr_preload(GFP_KERNEL); -+ aggid = idr_alloc_cyclic(&state->dts_agg_idr, agg, 0, 0, GFP_NOWAIT); -+ idr_preload_end(); -+ if (aggid < 0) { -+ /* FIXME: need to handle this */ -+ } -+ -+ state->dts_naggs++; -+ agg->dtag_id = aggid; -+ -+ frec = &agg->dtag_first->dta_rec; -+ if (frec->dtrd_alignment < sizeof(dtrace_aggid_t)) -+ frec->dtrd_alignment = sizeof(dtrace_aggid_t); -+ -+ for (act = agg->dtag_first; act != NULL; act = act->dta_next) { -+ ASSERT(!act->dta_intuple); -+ -+ act->dta_intuple = 1; -+ } -+ -+ return &agg->dtag_action; -+} -+ -+void dtrace_ecb_aggregation_destroy(struct dtrace_ecb *ecb, -+ struct dtrace_action *act) -+{ -+ struct dtrace_aggregation *agg = (struct dtrace_aggregation *)act; -+ struct dtrace_state *state = ecb->dte_state; -+ -+ ASSERT(DTRACEACT_ISAGG(act->dta_kind)); -+ -+ idr_remove(&state->dts_agg_idr, agg->dtag_id); -+ state->dts_naggs--; -+ -+ kfree(agg); -+} -+ -+static int dtrace_ecb_action_add(struct dtrace_ecb *ecb, -+ struct dtrace_actdesc *desc) -+{ -+ struct dtrace_action *action, *last; -+ struct dtrace_difo *dp = desc->dtad_difo; -+ uint32_t size = 0, align = sizeof(uint8_t), mask; -+ uint16_t format = 0; -+ struct dtrace_recdesc *rec; -+ struct dtrace_state *state = ecb->dte_state; -+ dtrace_optval_t *opt = state->dts_options, nframes, strsize; -+ uint64_t arg = desc->dtad_arg; -+ -+ ASSERT(MUTEX_HELD(&dtrace_lock)); -+ ASSERT(ecb->dte_action == NULL || ecb->dte_action->dta_refcnt == 1); -+ -+ if (DTRACEACT_ISAGG(desc->dtad_kind)) { -+ struct dtrace_action *act; -+ -+ for (act = ecb->dte_action; act != NULL; act = act->dta_next) { -+ if (act->dta_kind == DTRACEACT_COMMIT) -+ return -EINVAL; -+ -+ if (act->dta_kind == DTRACEACT_SPECULATE) -+ return -EINVAL; -+ } -+ -+ action = dtrace_ecb_aggregation_create(ecb, desc); -+ if (action == NULL) -+ return -EINVAL; -+ } else { -+ if (DTRACEACT_ISDESTRUCTIVE(desc->dtad_kind) || -+ (desc->dtad_kind == DTRACEACT_DIFEXPR && -+ dp != NULL && dp->dtdo_destructive)) -+ state->dts_destructive = 1; -+ -+ switch (desc->dtad_kind) { -+ case DTRACEACT_PRINTF: -+ case DTRACEACT_PRINTA: -+ case DTRACEACT_SYSTEM: -+ case DTRACEACT_FREOPEN: -+ if ((void *)(uintptr_t)arg == NULL) { -+ ASSERT(desc->dtad_kind == DTRACEACT_PRINTA); -+ -+ format = 0; -+ } else { -+ ASSERT((void *)(uintptr_t)arg != NULL); -+#ifdef FIXME -+ ASSERT(arg > KERNELBASE); -+#endif -+ -+ format = dtrace_format_add( -+ state, (char *)(uintptr_t)arg); -+ } -+ /* fallthru */ -+ -+ case DTRACEACT_TRACEMEM: -+ case DTRACEACT_LIBACT: -+ case DTRACEACT_DIFEXPR: -+ if (dp == NULL) -+ return -EINVAL; -+ -+ size = dp->dtdo_rtype.dtdt_size; -+ if (size != 0) -+ break; -+ -+ if (dp->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) { -+ if (!(dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) -+ return -EINVAL; -+ -+ size = opt[DTRACEOPT_STRSIZE]; -+ } -+ -+ break; -+ -+ case DTRACEACT_STACK: -+ nframes = arg; -+ if (nframes == 0) { -+ nframes = opt[DTRACEOPT_STACKFRAMES]; -+ -+ ASSERT(nframes > 0); -+ -+ arg = nframes; -+ } -+ -+ size = nframes * sizeof(uint64_t); -+ break; -+ -+ case DTRACEACT_JSTACK: -+ strsize = DTRACE_USTACK_STRSIZE(arg); -+ if (strsize == 0) -+ strsize = opt[DTRACEOPT_JSTACKSTRSIZE]; -+ -+ nframes = DTRACE_USTACK_NFRAMES(arg); -+ if (nframes == 0) -+ nframes = opt[DTRACEOPT_JSTACKFRAMES]; -+ -+ arg = DTRACE_USTACK_ARG(nframes, strsize); -+ /* fallthru */ -+ -+ case DTRACEACT_USTACK: -+ if (desc->dtad_kind != DTRACEACT_JSTACK && -+ (nframes = DTRACE_USTACK_NFRAMES(arg)) == 0) { -+ strsize = DTRACE_USTACK_STRSIZE(arg); -+ nframes = opt[DTRACEOPT_USTACKFRAMES]; -+ -+ ASSERT(nframes > 0); -+ -+ arg = DTRACE_USTACK_ARG(nframes, strsize); -+ } -+ -+ size = (nframes + 2) * sizeof(uint64_t); -+ size += DTRACE_USTACK_STRSIZE(arg); -+ size = P2ROUNDUP(size, (uint32_t)(sizeof(uintptr_t))); -+ -+ break; -+ -+ case DTRACEACT_SYM: -+ case DTRACEACT_MOD: -+ if (dp == NULL || ((size = dp->dtdo_rtype.dtdt_size) != -+ sizeof(uint64_t)) || -+ (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) -+ return -EINVAL; -+ -+ break; -+ -+ case DTRACEACT_USYM: -+ case DTRACEACT_UMOD: -+ case DTRACEACT_UADDR: -+ if (dp == NULL || -+ (dp->dtdo_rtype.dtdt_size != sizeof(uint64_t)) || -+ (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) -+ return -EINVAL; -+ -+ size = 3 * sizeof(uint64_t); -+ -+ break; -+ -+ case DTRACEACT_STOP: -+ case DTRACEACT_BREAKPOINT: -+ case DTRACEACT_PANIC: -+ break; -+ -+ case DTRACEACT_CHILL: -+ case DTRACEACT_DISCARD: -+ case DTRACEACT_RAISE: -+ if (dp == NULL) -+ return -EINVAL; -+ -+ break; -+ -+ case DTRACEACT_EXIT: -+ if (dp == NULL || (size = dp->dtdo_rtype.dtdt_size) != -+ sizeof(int) || -+ (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) -+ return -EINVAL; -+ -+ break; -+ -+ case DTRACEACT_SPECULATE: -+ if (ecb->dte_size > sizeof(dtrace_epid_t)) -+ return -EINVAL; -+ -+ if (dp == NULL) -+ return -EINVAL; -+ -+ state->dts_speculates = 1; -+ -+ break; -+ -+ case DTRACEACT_COMMIT: { -+ struct dtrace_action *act = ecb->dte_action; -+ -+ for (; act != NULL; act = act->dta_next) { -+ if (act->dta_kind == DTRACEACT_COMMIT) -+ return -EINVAL; -+ } -+ -+ if (dp == NULL) -+ return -EINVAL; -+ -+ break; -+ } -+ -+ case DTRACEACT_PCAP: -+ size = dp->dtdo_rtype.dtdt_size; -+ break; -+ -+ default: -+ return -EINVAL; -+ } -+ -+ if (size != 0 || desc->dtad_kind == DTRACEACT_SPECULATE) { -+ struct dtrace_action *act = ecb->dte_action; -+ -+ for (; act != NULL; act = act->dta_next) { -+ if (act->dta_kind == DTRACEACT_COMMIT) -+ return -EINVAL; -+ } -+ } -+ -+ action = kzalloc(sizeof(struct dtrace_action), GFP_KERNEL); -+ if (action == NULL) -+ return -ENOMEM; -+ -+ action->dta_rec.dtrd_size = size; -+ } -+ -+ action->dta_refcnt = 1; -+ rec = &action->dta_rec; -+ size = rec->dtrd_size; -+ -+ for (mask = sizeof(uint64_t) - 1; size != 0 && mask > 0; mask >>= 1) { -+ if (!(size & mask)) { -+ align = mask + 1; -+ -+ break; -+ } -+ } -+ -+ action->dta_kind = desc->dtad_kind; -+ -+ action->dta_difo = dp; -+ if (action->dta_difo != NULL) -+ dtrace_difo_hold(dp); -+ -+ rec->dtrd_action = action->dta_kind; -+ rec->dtrd_arg = arg; -+ rec->dtrd_uarg = desc->dtad_uarg; -+ rec->dtrd_alignment = (uint16_t)align; -+ rec->dtrd_format = format; -+ -+ last = ecb->dte_action_last; -+ if (last != NULL) { -+ ASSERT(ecb->dte_action != NULL); -+ -+ action->dta_prev = last; -+ last->dta_next = action; -+ } else { -+ ASSERT(ecb->dte_action == NULL); -+ -+ ecb->dte_action = action; -+ } -+ -+ ecb->dte_action_last = action; -+ -+ return 0; -+} -+ -+static void dtrace_ecb_action_remove(struct dtrace_ecb *ecb) -+{ -+ struct dtrace_action *act = ecb->dte_action, *next; -+ struct dtrace_vstate *vstate = &ecb->dte_state->dts_vstate; -+ struct dtrace_difo *dp; -+ uint16_t format; -+ -+ if (act != NULL && act->dta_refcnt > 1) { -+ ASSERT(act->dta_next == NULL || act->dta_next->dta_refcnt == 1); -+ -+ act->dta_refcnt--; -+ } else { -+ for (; act != NULL; act = next) { -+ next = act->dta_next; -+ ASSERT(next != NULL || act == ecb->dte_action_last); -+ ASSERT(act->dta_refcnt == 1); -+ -+ format = act->dta_rec.dtrd_format; -+ if (format != 0) -+ dtrace_format_remove(ecb->dte_state, format); -+ -+ dp = act->dta_difo; -+ if (dp != NULL) -+ dtrace_difo_release(dp, vstate); -+ -+ if (DTRACEACT_ISAGG(act->dta_kind)) -+ dtrace_ecb_aggregation_destroy(ecb, act); -+ else -+ kfree(act); -+ } -+ } -+ -+ ecb->dte_action = NULL; -+ ecb->dte_action_last = NULL; -+ ecb->dte_size = sizeof(dtrace_epid_t); -+} -+ -+/* -+ * Disable the ECB by removing it from its probe. -+ */ -+void dtrace_ecb_disable(struct dtrace_ecb *ecb) -+{ -+ struct dtrace_ecb *pecb, *prev = NULL; -+ struct dtrace_probe *probe = ecb->dte_probe; -+ -+ ASSERT(MUTEX_HELD(&dtrace_lock)); -+ -+ if (probe == NULL) -+ return; -+ -+ for (pecb = probe->dtpr_ecb; pecb != NULL; pecb = pecb->dte_next) { -+ if (pecb == ecb) -+ break; -+ -+ prev = pecb; -+ } -+ -+ ASSERT(pecb != NULL); -+ -+ if (prev == NULL) -+ probe->dtpr_ecb = ecb->dte_next; -+ else -+ prev->dte_next = ecb->dte_next; -+ -+ if (ecb == probe->dtpr_ecb_last) { -+ ASSERT(ecb->dte_next == NULL); -+ probe->dtpr_ecb_last = prev; -+ } -+ -+ /* -+ * The ECB has been disconnected from the probe; now sync to assure -+ * that all CPUs have seen the change before returning. -+ */ -+ dtrace_sync(); -+ -+ if (probe->dtpr_ecb == NULL) { -+ /* -+ * That was the last ECB on the probe; clear the predicate -+ * cache ID for the probe, disable it and sync one more time -+ * to assure that we'll never hit it again. -+ */ -+ struct dtrace_provider *prov = probe->dtpr_provider; -+ -+ ASSERT(ecb->dte_next == NULL); -+ ASSERT(probe->dtpr_ecb_last == NULL); -+ -+ probe->dtpr_predcache = DTRACE_CACHEIDNONE; -+ prov->dtpv_pops.dtps_disable(prov->dtpv_arg, -+ probe->dtpr_id, probe->dtpr_arg); -+ -+ dtrace_sync(); -+ } else { -+ /* -+ * There is at least one ECB remaining on the probe. If there -+ * is _exactly_ one, set the probe's predicate cache ID to be -+ * the predicate cache ID of the remaining ECB. -+ */ -+ ASSERT(probe->dtpr_ecb_last != NULL); -+ ASSERT(probe->dtpr_predcache == DTRACE_CACHEIDNONE); -+ -+ if (probe->dtpr_ecb == probe->dtpr_ecb_last) { -+ struct dtrace_predicate *p = -+ probe->dtpr_ecb->dte_predicate; -+ -+ ASSERT(probe->dtpr_ecb->dte_next == NULL); -+ -+ if (p != NULL) -+ probe->dtpr_predcache = p->dtp_cacheid; -+ } -+ -+ ecb->dte_next = NULL; -+ } -+} -+ -+static struct dtrace_ecb *dtrace_ecb_add(struct dtrace_state *state, -+ struct dtrace_probe *probe) -+{ -+ struct dtrace_ecb *ecb; -+ dtrace_epid_t epid; -+ -+ ASSERT(MUTEX_HELD(&dtrace_lock)); -+ -+ ecb = kzalloc(sizeof(struct dtrace_ecb), GFP_KERNEL); -+ if (ecb == NULL) -+ return NULL; -+ -+ ecb->dte_predicate = NULL; -+ ecb->dte_probe = probe; -+ ecb->dte_size = ecb->dte_needed = sizeof(dtrace_epid_t); -+ ecb->dte_alignment = sizeof(dtrace_epid_t); -+ -+ epid = state->dts_epid++; -+ -+ if (epid - 1 >= state->dts_necbs) { -+ struct dtrace_ecb **oecbs = state->dts_ecbs, **ecbs; -+ int necbs = state->dts_necbs << 1; -+ -+ ASSERT(epid == state->dts_necbs + 1); -+ -+ if (necbs == 0) { -+ ASSERT(oecbs == NULL); -+ -+ necbs = 1; -+ } -+ -+ ecbs = vzalloc(necbs * sizeof(*ecbs)); -+ if (ecbs == NULL) { -+ kfree(ecb); -+ return NULL; -+ } -+ -+ if (oecbs != NULL) -+ memcpy(ecbs, oecbs, state->dts_necbs * sizeof(*ecbs)); -+ -+ dtrace_membar_producer(); -+ -+ state->dts_ecbs = ecbs; -+ -+ if (oecbs != NULL) { -+ if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) -+ dtrace_sync(); -+ -+ vfree(oecbs); -+ } -+ -+ dtrace_membar_producer(); -+ -+ state->dts_necbs = necbs; -+ } -+ -+ ecb->dte_state = state; -+ -+ ASSERT(state->dts_ecbs[epid - 1] == NULL); -+ -+ dtrace_membar_producer(); -+ -+ state->dts_ecbs[(ecb->dte_epid = epid) - 1] = ecb; -+ -+ return ecb; -+} -+ -+static struct dtrace_ecb * dtrace_ecb_create(struct dtrace_state *state, -+ struct dtrace_probe *probe, -+ struct dtrace_enabling *enab) -+{ -+ struct dtrace_ecb *ecb; -+ struct dtrace_predicate *pred; -+ struct dtrace_actdesc *act; -+ struct dtrace_provider *prov; -+ struct dtrace_ecbdesc *desc = enab->dten_current; -+ -+ ASSERT(MUTEX_HELD(&dtrace_lock)); -+ ASSERT(state != NULL); -+ -+ ecb = dtrace_ecb_add(state, probe); -+ if (ecb == NULL) -+ return NULL; -+ -+ ecb->dte_uarg = desc->dted_uarg; -+ -+ pred = desc->dted_pred.dtpdd_predicate; -+ if (pred != NULL) { -+ dtrace_predicate_hold(pred); -+ ecb->dte_predicate = pred; -+ } -+ -+ if (probe != NULL) { -+ prov = probe->dtpr_provider; -+ -+ if (!(state->dts_cred.dcr_visible & DTRACE_CRV_ALLPROC) && -+ (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_USER)) -+ ecb->dte_cond |= DTRACE_COND_OWNER; -+ -+ if (!(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) && -+ (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_KERNEL)) -+ ecb->dte_cond |= DTRACE_COND_USERMODE; -+ } -+ -+ if (dtrace_ecb_create_cache != NULL) { -+ struct dtrace_ecb *cached = dtrace_ecb_create_cache; -+ struct dtrace_action *act = cached->dte_action; -+ -+ if (act != NULL) { -+ ASSERT(act->dta_refcnt > 0); -+ -+ act->dta_refcnt++; -+ ecb->dte_action = act; -+ ecb->dte_action_last = cached->dte_action_last; -+ ecb->dte_needed = cached->dte_needed; -+ ecb->dte_size = cached->dte_size; -+ ecb->dte_alignment = cached->dte_alignment; -+ } -+ -+ return ecb; -+ } -+ -+ for (act = desc->dted_action; act != NULL; act = act->dtad_next) { -+ enab->dten_error = dtrace_ecb_action_add(ecb, act); -+ if (enab->dten_error != 0) { -+ dtrace_ecb_destroy(ecb); -+ return NULL; -+ } -+ } -+ -+ dtrace_ecb_resize(ecb); -+ -+ return (dtrace_ecb_create_cache = ecb); -+} -+ -+int dtrace_ecb_create_enable(struct dtrace_probe *probe, void *arg) -+{ -+ struct dtrace_ecb *ecb; -+ struct dtrace_enabling *enab = arg; -+ struct dtrace_state *state = enab->dten_vstate->dtvs_state; -+ -+ ASSERT(state != NULL); -+ -+ if (probe != NULL && probe->dtpr_gen < enab->dten_probegen) -+ return DTRACE_MATCH_NEXT; -+ -+ ecb = dtrace_ecb_create(state, probe, enab); -+ if (ecb == NULL) -+ return DTRACE_MATCH_DONE; -+ -+ if (dtrace_ecb_enable(ecb) < 0) -+ return DTRACE_MATCH_FAIL; -+ -+ return DTRACE_MATCH_NEXT; -+} -+ -+void dtrace_ecb_destroy(struct dtrace_ecb *ecb) -+{ -+ struct dtrace_state *state = ecb->dte_state; -+ struct dtrace_vstate *vstate = &state->dts_vstate; -+ struct dtrace_predicate *pred; -+ dtrace_epid_t epid = ecb->dte_epid; -+ -+ ASSERT(MUTEX_HELD(&dtrace_lock)); -+ ASSERT(ecb->dte_next == NULL); -+ ASSERT(ecb->dte_probe == NULL || ecb->dte_probe->dtpr_ecb != ecb); -+ -+ pred = ecb->dte_predicate; -+ if (pred != NULL) -+ dtrace_predicate_release(pred, vstate); -+ -+ dtrace_ecb_action_remove(ecb); -+ -+ ASSERT(state->dts_ecbs[epid - 1] == ecb); -+ state->dts_ecbs[epid - 1] = NULL; -+ -+ kfree(ecb); -+} -+ -+void dtrace_ecb_resize(struct dtrace_ecb *ecb) -+{ -+ struct dtrace_action *act; -+ uint32_t maxalign = sizeof(dtrace_epid_t); -+ uint32_t align = sizeof(uint8_t), offs, diff; -+ int wastuple = 0; -+ uint32_t aggbase = UINT32_MAX; -+ struct dtrace_state *state = ecb->dte_state; -+ -+ /* -+ * If we record anything, we always record the epid. (And we always -+ * record it first.) -+ */ -+ offs = sizeof(dtrace_epid_t); -+ ecb->dte_size = ecb->dte_needed = sizeof(dtrace_epid_t); -+ -+ for (act = ecb->dte_action; act != NULL; act = act->dta_next) { -+ struct dtrace_recdesc *rec = &act->dta_rec; -+ -+ align = rec->dtrd_alignment; -+ if (align > maxalign) -+ maxalign = align; -+ -+ if (!wastuple && act->dta_intuple) { -+ /* -+ * This is the first record in a tuple. Align the -+ * offset to be at offset 4 in an 8-byte aligned -+ * block. -+ */ -+ diff = offs + sizeof(dtrace_aggid_t); -+ -+ diff &= sizeof(uint64_t) - 1; -+ if (diff) -+ offs += sizeof(uint64_t) - diff; -+ -+ aggbase = offs - sizeof(dtrace_aggid_t); -+ ASSERT(!(aggbase & (sizeof(uint64_t) - 1))); -+ } -+ -+ if (rec->dtrd_size != 0) { -+ diff = offs & (align - 1); -+ if (diff) -+ /* -+ * The current offset is not properly -+ * aligned; align it. -+ */ -+ offs += align - diff; -+ } -+ -+ rec->dtrd_offset = offs; -+ -+ if (offs + rec->dtrd_size > ecb->dte_needed) { -+ ecb->dte_needed = offs + rec->dtrd_size; -+ -+ if (ecb->dte_needed > state->dts_needed) -+ state->dts_needed = ecb->dte_needed; -+ } -+ -+ if (DTRACEACT_ISAGG(act->dta_kind)) { -+ struct dtrace_aggregation *agg; -+ struct dtrace_action *first, *prev; -+ -+ agg = (struct dtrace_aggregation *)act; -+ first = agg->dtag_first; -+ -+ ASSERT(rec->dtrd_size != 0 && first != NULL); -+ ASSERT(wastuple); -+ ASSERT(aggbase != UINT32_MAX); -+ -+ agg->dtag_base = aggbase; -+ -+ while ((prev = first->dta_prev) != NULL && -+ DTRACEACT_ISAGG(prev->dta_kind)) { -+ agg = (struct dtrace_aggregation *)prev; -+ first = agg->dtag_first; -+ } -+ -+ if (prev != NULL) { -+ offs = prev->dta_rec.dtrd_offset + -+ prev->dta_rec.dtrd_size; -+ } else -+ offs = sizeof(dtrace_epid_t); -+ -+ wastuple = 0; -+ } else { -+ if (!act->dta_intuple) -+ ecb->dte_size = offs + rec->dtrd_size; -+ -+ offs += rec->dtrd_size; -+ } -+ -+ wastuple = act->dta_intuple; -+ } -+ -+ act = ecb->dte_action; -+ if (act != NULL && -+ !(act->dta_kind == DTRACEACT_SPECULATE && act->dta_next == NULL) && -+ ecb->dte_size == sizeof(dtrace_epid_t)) { -+ /* -+ * If the size is still sizeof(dtrace_epid_t), then all -+ * actions store no data; set the size to 0. -+ */ -+ ecb->dte_alignment = maxalign; -+ ecb->dte_size = 0; -+ -+ /* -+ * If the needed space is still sizeof(dtrace_epid_t), then -+ * all actions need no additional space; set the needed -+ * size to 0. -+ */ -+ if (ecb->dte_needed == sizeof(dtrace_epid_t)) -+ ecb->dte_needed = 0; -+ -+ return; -+ } -+ -+ /* -+ * Set our alignment, and make sure that the dte_size and dte_needed -+ * are aligned to the size of an EPID. -+ */ -+ ecb->dte_alignment = maxalign; -+ ecb->dte_size = (ecb->dte_size + (sizeof(dtrace_epid_t) - 1)) & -+ ~(sizeof(dtrace_epid_t) - 1); -+ ecb->dte_needed = (ecb->dte_needed + (sizeof(dtrace_epid_t) - 1)) & -+ ~(sizeof(dtrace_epid_t) - 1); -+ ASSERT(ecb->dte_size <= ecb->dte_needed); -+} -+ -+int dtrace_ecb_enable(struct dtrace_ecb *ecb) -+{ -+ struct dtrace_probe *probe = ecb->dte_probe; -+ -+ ASSERT(MUTEX_HELD(&cpu_lock)); -+ ASSERT(MUTEX_HELD(&dtrace_lock)); -+ ASSERT(ecb->dte_next == NULL); -+ -+ if (probe == NULL) -+ return 0; -+ -+ if (probe->dtpr_ecb == NULL) { -+ struct dtrace_provider *prov = probe->dtpr_provider; -+ -+ probe->dtpr_ecb = probe->dtpr_ecb_last = ecb; -+ -+ -+ if (ecb->dte_predicate != NULL) -+ probe->dtpr_predcache = ecb->dte_predicate->dtp_cacheid; -+ -+ return prov->dtpv_pops.dtps_enable(prov->dtpv_arg, -+ probe->dtpr_id, -+ probe->dtpr_arg); -+ } else { -+ ASSERT(probe->dtpr_ecb_last != NULL); -+ -+ probe->dtpr_ecb_last->dte_next = ecb; -+ probe->dtpr_ecb_last = ecb; -+ probe->dtpr_predcache = 0; -+ -+ dtrace_sync(); -+ -+ return 0; -+ } -+} -+ -+struct dtrace_ecb *dtrace_epid2ecb(struct dtrace_state *state, -+ dtrace_epid_t id) -+{ -+ struct dtrace_ecb *ecb; -+ -+ ASSERT(MUTEX_HELD(&dtrace_lock)); -+ -+ if (id == 0 || id > state->dts_necbs) -+ return NULL; -+ -+ ASSERT(state->dts_necbs > 0 && state->dts_ecbs != NULL); -+ ecb = state->dts_ecbs[id - 1]; -+ ASSERT(ecb == NULL || ecb->dte_epid == id); -+ -+ return ecb; -+} -+ -+struct dtrace_aggregation *dtrace_aggid2agg(struct dtrace_state *state, -+ dtrace_aggid_t id) -+{ -+ ASSERT(MUTEX_HELD(&dtrace_lock)); -+ -+ return idr_find(&state->dts_agg_idr, id); -+} -diff --git a/dtrace/dtrace_enable.c b/dtrace/dtrace_enable.c -new file mode 100644 -index 000000000000..72f30149cb9c ---- /dev/null -+++ b/dtrace/dtrace_enable.c -@@ -0,0 +1,449 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_enable.c -+ * DESCRIPTION: DTrace - probe enabling implementation -+ * -+ * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/kernel.h> -+#include <linux/mutex.h> -+#include <linux/slab.h> -+#include <linux/vmalloc.h> -+ -+#include "dtrace.h" -+ -+size_t dtrace_retain_max = 1024; -+struct dtrace_enabling *dtrace_retained; -+dtrace_genid_t dtrace_retained_gen; -+ -+struct dtrace_enabling *dtrace_enabling_create(struct dtrace_vstate *vstate) -+{ -+ struct dtrace_enabling *enab; -+ -+ enab = kzalloc(sizeof(struct dtrace_enabling), GFP_KERNEL); -+ if (enab == NULL) -+ return NULL; -+ -+ enab->dten_vstate = vstate; -+ -+ return enab; -+} -+ -+void dtrace_enabling_add(struct dtrace_enabling *enab, -+ struct dtrace_ecbdesc *ecb) -+{ -+ struct dtrace_ecbdesc **ndesc; -+ size_t osize, nsize; -+ -+ /* -+ * We can't add to enablings after we've enabled them, or after we've -+ * retained them. -+ */ -+ ASSERT(enab->dten_probegen == 0); -+ ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL); -+ -+ if (enab->dten_ndesc < enab->dten_maxdesc) { -+ enab->dten_desc[enab->dten_ndesc++] = ecb; -+ return; -+ } -+ -+ osize = enab->dten_maxdesc * sizeof(struct dtrace_enabling *); -+ -+ if (enab->dten_maxdesc == 0) -+ enab->dten_maxdesc = 1; -+ else -+ enab->dten_maxdesc <<= 1; -+ -+ ASSERT(enab->dten_ndesc < enab->dten_maxdesc); -+ -+ nsize = enab->dten_maxdesc * sizeof(struct dtrace_enabling *); -+ ndesc = vzalloc(nsize); -+ memcpy(ndesc, enab->dten_desc, osize); -+ vfree(enab->dten_desc); -+ -+ enab->dten_desc = ndesc; -+ enab->dten_desc[enab->dten_ndesc++] = ecb; -+} -+ -+static void dtrace_enabling_addlike(struct dtrace_enabling *enab, -+ struct dtrace_ecbdesc *ecb, -+ struct dtrace_probedesc *pd) -+{ -+ struct dtrace_ecbdesc *new; -+ struct dtrace_predicate *pred; -+ struct dtrace_actdesc *act; -+ -+ /* -+ * We're going to create a new ECB description that matches the -+ * specified ECB in every way, but has the specified probe description. -+ */ -+ new = kzalloc(sizeof(struct dtrace_ecbdesc), GFP_KERNEL); -+ -+ pred = ecb->dted_pred.dtpdd_predicate; -+ if (pred != NULL) -+ dtrace_predicate_hold(pred); -+ -+ for (act = ecb->dted_action; act != NULL; act = act->dtad_next) -+ dtrace_actdesc_hold(act); -+ -+ new->dted_action = ecb->dted_action; -+ new->dted_pred = ecb->dted_pred; -+ new->dted_probe = *pd; -+ new->dted_uarg = ecb->dted_uarg; -+ -+ dtrace_enabling_add(enab, new); -+} -+ -+void dtrace_enabling_dump(struct dtrace_enabling *enab) -+{ -+ int i; -+ -+ for (i = 0; i < enab->dten_ndesc; i++) { -+ struct dtrace_probedesc *desc = -+ &enab->dten_desc[i]->dted_probe; -+ -+ pr_info("enabling probe %d (%s:%s:%s:%s)", -+ i, desc->dtpd_provider, desc->dtpd_mod, -+ desc->dtpd_func, desc->dtpd_name); -+ } -+} -+ -+void dtrace_enabling_destroy(struct dtrace_enabling *enab) -+{ -+ int i; -+ struct dtrace_ecbdesc *ep; -+ struct dtrace_vstate *vstate = enab->dten_vstate; -+ -+ ASSERT(MUTEX_HELD(&dtrace_lock)); -+ -+ for (i = 0; i < enab->dten_ndesc; i++) { -+ struct dtrace_actdesc *act, *next; -+ struct dtrace_predicate *pred; -+ -+ ep = enab->dten_desc[i]; -+ -+ pred = ep->dted_pred.dtpdd_predicate; -+ if (pred != NULL) -+ dtrace_predicate_release(pred, vstate); -+ -+ for (act = ep->dted_action; act != NULL; act = next) { -+ next = act->dtad_next; -+ dtrace_actdesc_release(act, vstate); -+ } -+ -+ kfree(ep); -+ } -+ -+ vfree(enab->dten_desc); -+ -+ /* -+ * If this was a retained enabling, decrement the dts_nretained count -+ * and remove it from the dtrace_retained list. -+ */ -+ if (enab->dten_prev != NULL || enab->dten_next != NULL || -+ dtrace_retained == enab) { -+ ASSERT(enab->dten_vstate->dtvs_state != NULL); -+ ASSERT(enab->dten_vstate->dtvs_state->dts_nretained > 0); -+ enab->dten_vstate->dtvs_state->dts_nretained--; -+ dtrace_retained_gen++; -+ } -+ -+ if (enab->dten_prev == NULL) { -+ if (dtrace_retained == enab) { -+ dtrace_retained = enab->dten_next; -+ -+ if (dtrace_retained != NULL) -+ dtrace_retained->dten_prev = NULL; -+ } -+ } else { -+ ASSERT(enab != dtrace_retained); -+ ASSERT(dtrace_retained != NULL); -+ enab->dten_prev->dten_next = enab->dten_next; -+ } -+ -+ if (enab->dten_next != NULL) { -+ ASSERT(dtrace_retained != NULL); -+ enab->dten_next->dten_prev = enab->dten_prev; -+ } -+ -+ kfree(enab); -+} -+ -+int dtrace_enabling_retain(struct dtrace_enabling *enab) -+{ -+ struct dtrace_state *state; -+ -+ ASSERT(MUTEX_HELD(&dtrace_lock)); -+ ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL); -+ ASSERT(enab->dten_vstate != NULL); -+ -+ state = enab->dten_vstate->dtvs_state; -+ ASSERT(state != NULL); -+ -+ /* -+ * We only allow each state to retain dtrace_retain_max enablings. -+ */ -+ if (state->dts_nretained >= dtrace_retain_max) -+ return -ENOSPC; -+ -+ state->dts_nretained++; -+ dtrace_retained_gen++; -+ -+ if (dtrace_retained == NULL) { -+ dtrace_retained = enab; -+ return 0; -+ } -+ -+ enab->dten_next = dtrace_retained; -+ dtrace_retained->dten_prev = enab; -+ dtrace_retained = enab; -+ -+ return 0; -+} -+ -+int dtrace_enabling_replicate(struct dtrace_state *state, -+ struct dtrace_probedesc *match, -+ struct dtrace_probedesc *create) -+{ -+ struct dtrace_enabling *new, *enab; -+ int found = 0, err = -ENOENT; -+ -+ ASSERT(MUTEX_HELD(&dtrace_lock)); -+ ASSERT(strlen(match->dtpd_provider) < DTRACE_PROVNAMELEN); -+ ASSERT(strlen(match->dtpd_mod) < DTRACE_MODNAMELEN); -+ ASSERT(strlen(match->dtpd_func) < DTRACE_FUNCNAMELEN); -+ ASSERT(strlen(match->dtpd_name) < DTRACE_NAMELEN); -+ -+ new = dtrace_enabling_create(&state->dts_vstate); -+ if (new == NULL) -+ return -ENOMEM; -+ -+ /* -+ * Iterate over all retained enablings, looking for enablings that -+ * match the specified state. -+ */ -+ for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) { -+ int i; -+ -+ /* -+ * dtvs_state can only be NULL for helper enablings -- and -+ * helper enablings can't be retained. -+ */ -+ ASSERT(enab->dten_vstate->dtvs_state != NULL); -+ -+ if (enab->dten_vstate->dtvs_state != state) -+ continue; -+ -+ /* -+ * Now iterate over each probe description; we're looking for -+ * an exact match to the specified probe description. -+ */ -+ for (i = 0; i < enab->dten_ndesc; i++) { -+ struct dtrace_ecbdesc *ep = enab->dten_desc[i]; -+ struct dtrace_probedesc *pd = &ep->dted_probe; -+ -+ if (strcmp(pd->dtpd_provider, match->dtpd_provider)) -+ continue; -+ -+ if (strcmp(pd->dtpd_mod, match->dtpd_mod)) -+ continue; -+ -+ if (strcmp(pd->dtpd_func, match->dtpd_func)) -+ continue; -+ -+ if (strcmp(pd->dtpd_name, match->dtpd_name)) -+ continue; -+ -+ /* -+ * We have a winning probe! Add it to our growing -+ * enabling. -+ */ -+ found = 1; -+ dtrace_enabling_addlike(new, ep, create); -+ } -+ } -+ -+ if (!found || (err = dtrace_enabling_retain(new)) != 0) { -+ dtrace_enabling_destroy(new); -+ return err; -+ } -+ -+ return 0; -+} -+ -+void dtrace_enabling_retract(struct dtrace_state *state) -+{ -+ struct dtrace_enabling *enab, *next; -+ -+ ASSERT(MUTEX_HELD(&dtrace_lock)); -+ -+ /* -+ * Iterate over all retained enablings, destroy the enablings retained -+ * for the specified state. -+ */ -+ for (enab = dtrace_retained; enab != NULL; enab = next) { -+ next = enab->dten_next; -+ -+ /* -+ * dtvs_state can only be NULL for helper enablings, and helper -+ * enablings can't be retained. -+ */ -+ ASSERT(enab->dten_vstate->dtvs_state != NULL); -+ -+ if (enab->dten_vstate->dtvs_state == state) { -+ ASSERT(state->dts_nretained > 0); -+ dtrace_enabling_destroy(enab); -+ } -+ } -+ -+ ASSERT(state->dts_nretained == 0); -+} -+ -+int dtrace_enabling_match(struct dtrace_enabling *enab, int *nmatched) -+{ -+ int i; -+ int total_matched = 0, matched = 0; -+ -+ for (i = 0; i < enab->dten_ndesc; i++) { -+ struct dtrace_ecbdesc *ep = enab->dten_desc[i]; -+ -+ enab->dten_current = ep; -+ enab->dten_error = 0; -+ -+ dt_dbg_enable(" Matching enabling %p[%d] for %s:%s:%s:%s\n", -+ enab, i, ep->dted_probe.dtpd_provider, -+ ep->dted_probe.dtpd_mod, -+ ep->dted_probe.dtpd_func, -+ ep->dted_probe.dtpd_name); -+ -+ matched = dtrace_probe_enable(&ep->dted_probe, enab); -+ if (matched < 0) { -+ dt_dbg_enable(" Matching enabling %p[%d] failed: " -+ "busy\n", enab, i); -+ return -EBUSY; -+ } -+ -+ dt_dbg_enable(" Matching enabling %p[%d] found %d matches.\n", -+ enab, i, matched); -+ -+ total_matched += matched; -+ -+ if (enab->dten_error != 0) { -+ if (nmatched == NULL) -+ pr_warn("%s error on %p: %d\n", __func__, -+ (void *)ep, enab->dten_error); -+ -+ return enab->dten_error; -+ } -+ } -+ -+ enab->dten_probegen = dtrace_probegen; -+ if (nmatched != NULL) -+ *nmatched = total_matched; -+ -+ return 0; -+} -+ -+void dtrace_enabling_matchall(void) -+{ -+ struct dtrace_enabling *enab; -+ -+ mutex_lock(&cpu_lock); -+ mutex_lock(&dtrace_lock); -+ -+ for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) -+ (void) dtrace_enabling_match(enab, NULL); -+ -+ mutex_unlock(&dtrace_lock); -+ mutex_unlock(&cpu_lock); -+} -+ -+/* -+ * If an enabling is to be enabled without having matched probes (that is, if -+ * dtrace_state_go() is to be called on the underlying dtrace_state_t), the -+ * enabling must be _primed_ by creating an ECB for every ECB description. -+ * This must be done to assure that we know the number of speculations, the -+ * number of aggregations, the minimum buffer size needed, etc. before we -+ * transition out of DTRACE_ACTIVITY_INACTIVE. To do this without actually -+ * enabling any probes, we create ECBs for every ECB description, but with a -+ * NULL probe -- which is exactly what this function does. -+ */ -+void dtrace_enabling_prime(struct dtrace_state *state) -+{ -+ struct dtrace_enabling *enab; -+ int i; -+ -+ for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) { -+ ASSERT(enab->dten_vstate->dtvs_state != NULL); -+ -+ if (enab->dten_vstate->dtvs_state != state) -+ continue; -+ -+ /* -+ * We don't want to prime an enabling more than once, lest -+ * we allow a malicious user to induce resource exhaustion. -+ * (The ECBs that result from priming an enabling aren't -+ * leaked -- but they also aren't deallocated until the -+ * consumer state is destroyed.) -+ */ -+ if (enab->dten_primed) -+ continue; -+ -+ for (i = 0; i < enab->dten_ndesc; i++) { -+ enab->dten_current = enab->dten_desc[i]; -+ dtrace_probe_enable(NULL, enab); -+ } -+ -+ enab->dten_primed = 1; -+ } -+} -+ -+void dtrace_enabling_provide(struct dtrace_provider *prv) -+{ -+ int all = 0; -+ dtrace_genid_t gen; -+ -+ if (prv == NULL) { -+ all = 1; -+ prv = dtrace_provider; -+ } -+ -+ do { -+ struct dtrace_enabling *enab; -+ void *parg = prv->dtpv_arg; -+ -+retry: -+ gen = dtrace_retained_gen; -+ for (enab = dtrace_retained; enab != NULL; -+ enab = enab->dten_next) { -+ int i; -+ -+ for (i = 0; i < enab->dten_ndesc; i++) { -+ struct dtrace_probedesc desc; -+ -+ desc = enab->dten_desc[i]->dted_probe; -+ mutex_unlock(&dtrace_lock); -+ prv->dtpv_pops.dtps_provide(parg, &desc); -+ mutex_lock(&dtrace_lock); -+ -+ if (gen != dtrace_retained_gen) -+ goto retry; -+ } -+ } -+ } while (all && (prv = prv->dtpv_next) != NULL); -+ -+ mutex_unlock(&dtrace_lock); -+ dtrace_probe_provide(NULL, all ? NULL : prv); -+ mutex_lock(&dtrace_lock); -+} -diff --git a/dtrace/dtrace_fmt.c b/dtrace/dtrace_fmt.c -new file mode 100644 -index 000000000000..78fcc8e6efb8 ---- /dev/null -+++ b/dtrace/dtrace_fmt.c -@@ -0,0 +1,104 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_fmt.c -+ * DESCRIPTION: DTrace - format string implementation -+ * -+ * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/slab.h> -+#include <linux/vmalloc.h> -+ -+#include "dtrace.h" -+ -+uint16_t dtrace_format_add(struct dtrace_state *state, char *str) -+{ -+ char *fmt, **new; -+ uint16_t ndx; -+ -+ fmt = dtrace_strdup(str); -+ if (fmt == NULL) -+ return 0; -+ -+ for (ndx = 0; ndx < state->dts_nformats; ndx++) { -+ if (state->dts_formats[ndx] == NULL) { -+ state->dts_formats[ndx] = fmt; -+ -+ return ndx + 1; -+ } -+ } -+ -+ if (state->dts_nformats == UINT16_MAX) { -+ kfree(fmt); -+ -+ return 0; -+ } -+ -+ ndx = state->dts_nformats; -+ new = vmalloc((ndx + 1) * sizeof(char *)); -+ if (new == NULL) { -+ kfree(fmt); -+ return 0; -+ } -+ -+ state->dts_nformats++; -+ -+ if (state->dts_formats != NULL) { -+ ASSERT(ndx != 0); -+ memcpy(new, state->dts_formats, ndx * sizeof(char *)); -+ vfree(state->dts_formats); -+ } -+ -+ state->dts_formats = new; -+ state->dts_formats[ndx] = fmt; -+ -+ return ndx + 1; -+} -+ -+void dtrace_format_remove(struct dtrace_state *state, uint16_t format) -+{ -+ char *fmt; -+ -+ ASSERT(state->dts_formats != NULL); -+ ASSERT(format <= state->dts_nformats); -+ ASSERT(state->dts_formats[format - 1] != NULL); -+ -+ fmt = state->dts_formats[format - 1]; -+ kfree(fmt); -+ state->dts_formats[format - 1] = NULL; -+} -+ -+void dtrace_format_destroy(struct dtrace_state *state) -+{ -+ int i; -+ -+ if (state->dts_nformats == 0) { -+ ASSERT(state->dts_formats == NULL); -+ return; -+ } -+ -+ ASSERT(state->dts_formats != NULL); -+ -+ for (i = 0; i < state->dts_nformats; i++) { -+ char *fmt = state->dts_formats[i]; -+ -+ if (fmt == NULL) -+ continue; -+ -+ kfree(fmt); -+ } -+ -+ vfree(state->dts_formats); -+ state->dts_nformats = 0; -+ state->dts_formats = NULL; -+} -diff --git a/dtrace/dtrace_hash.c b/dtrace/dtrace_hash.c -new file mode 100644 -index 000000000000..0773c60e7897 ---- /dev/null -+++ b/dtrace/dtrace_hash.c -@@ -0,0 +1,266 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_hash.c -+ * DESCRIPTION: DTrace - hash table implementation -+ * -+ * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/slab.h> -+#include <linux/vmalloc.h> -+ -+#include "dtrace.h" -+ -+#define DTRACE_HASHSTR(hash, probe) \ -+ dtrace_hash_str(*((char **)((uintptr_t)(probe) + (hash)->dth_stroffs))) -+#define DTRACE_HASHEQ(hash, lhs, rhs) \ -+ (strcmp(*((char **)((uintptr_t)(lhs) + (hash)->dth_stroffs)), \ -+ *((char **)((uintptr_t)(rhs) + (hash)->dth_stroffs))) == 0) -+ -+static uint_t dtrace_hash_str(char *p) -+{ -+ uint_t g; -+ uint_t hval = 0; -+ -+ while (*p) { -+ hval = (hval << 4) + *p++; -+ g = hval & 0xf0000000; -+ if (g != 0) -+ hval ^= g >> 24; -+ -+ hval &= ~g; -+ } -+ -+ return hval; -+} -+ -+struct dtrace_hash *dtrace_hash_create(uintptr_t stroffs, uintptr_t nextoffs, -+ uintptr_t prevoffs) -+{ -+ struct dtrace_hash *hash; -+ -+ hash = kzalloc(sizeof(struct dtrace_hash), GFP_KERNEL); -+ if (hash == NULL) -+ return NULL; -+ -+ hash->dth_stroffs = stroffs; -+ hash->dth_nextoffs = nextoffs; -+ hash->dth_prevoffs = prevoffs; -+ -+ hash->dth_size = 1; -+ hash->dth_mask = hash->dth_size - 1; -+ -+ hash->dth_tab = vzalloc(hash->dth_size * -+ sizeof(struct dtrace_hashbucket *)); -+ -+ if (hash->dth_tab == NULL) { -+ kfree(hash); -+ return NULL; -+ } -+ -+ return hash; -+} -+ -+void dtrace_hash_destroy(struct dtrace_hash *hash) -+{ -+#ifdef DEBUG -+ int i; -+ -+ for (i = 0; i < hash->dth_size; i++) -+ ASSERT(hash->dth_tab[i] == NULL); -+#endif -+ -+ if (hash == NULL) -+ return; -+ -+ vfree(hash->dth_tab); -+ kfree(hash); -+} -+ -+static int dtrace_hash_resize(struct dtrace_hash *hash) -+{ -+ int size = hash->dth_size, i, ndx; -+ int new_size = hash->dth_size << 1; -+ int new_mask = new_size - 1; -+ struct dtrace_hashbucket **new_tab, *bucket, *next; -+ -+ ASSERT((new_size & new_mask) == 0); -+ -+ new_tab = vzalloc(new_size * sizeof(void *)); -+ if (new_tab == NULL) -+ return -ENOMEM; -+ -+ for (i = 0; i < size; i++) { -+ for (bucket = hash->dth_tab[i]; bucket != NULL; -+ bucket = next) { -+ struct dtrace_probe *probe = bucket->dthb_chain; -+ -+ ASSERT(probe != NULL); -+ ndx = DTRACE_HASHSTR(hash, probe) & new_mask; -+ -+ next = bucket->dthb_next; -+ bucket->dthb_next = new_tab[ndx]; -+ new_tab[ndx] = bucket; -+ } -+ } -+ -+ vfree(hash->dth_tab); -+ hash->dth_tab = new_tab; -+ hash->dth_size = new_size; -+ hash->dth_mask = new_mask; -+ -+ return 0; -+} -+ -+int dtrace_hash_add(struct dtrace_hash *hash, struct dtrace_probe *new) -+{ -+ int hashval = DTRACE_HASHSTR(hash, new); -+ int ndx = hashval & hash->dth_mask; -+ struct dtrace_hashbucket *bucket = hash->dth_tab[ndx]; -+ struct dtrace_probe **nextp, **prevp; -+ -+ for (; bucket != NULL; bucket = bucket->dthb_next) { -+ if (DTRACE_HASHEQ(hash, bucket->dthb_chain, new)) -+ goto add; -+ } -+ -+ if ((hash->dth_nbuckets >> 1) > hash->dth_size) { -+ int err = 0; -+ -+ err = dtrace_hash_resize(hash); -+ if (err != 0) -+ return err; -+ -+ dtrace_hash_add(hash, new); -+ return 0; -+ } -+ -+ bucket = kzalloc(sizeof(struct dtrace_hashbucket), GFP_KERNEL); -+ if (bucket == NULL) -+ return -ENOMEM; -+ -+ bucket->dthb_next = hash->dth_tab[ndx]; -+ hash->dth_tab[ndx] = bucket; -+ hash->dth_nbuckets++; -+ -+add: -+ nextp = DTRACE_HASHNEXT(hash, new); -+ -+ ASSERT(*nextp == NULL && *(DTRACE_HASHPREV(hash, new)) == NULL); -+ -+ *nextp = bucket->dthb_chain; -+ -+ if (bucket->dthb_chain != NULL) { -+ prevp = DTRACE_HASHPREV(hash, bucket->dthb_chain); -+ -+ ASSERT(*prevp == NULL); -+ -+ *prevp = new; -+ } -+ -+ bucket->dthb_chain = new; -+ bucket->dthb_len++; -+ -+ return 0; -+} -+ -+struct dtrace_probe *dtrace_hash_lookup(struct dtrace_hash *hash, -+ struct dtrace_probe *template) -+{ -+ int hashval = DTRACE_HASHSTR(hash, template); -+ int ndx = hashval & hash->dth_mask; -+ -+ struct dtrace_hashbucket *bucket = hash->dth_tab[ndx]; -+ -+ for (; bucket != NULL; bucket = bucket->dthb_next) { -+ if (DTRACE_HASHEQ(hash, bucket->dthb_chain, template)) -+ return bucket->dthb_chain; -+ } -+ -+ return NULL; -+} -+ -+/* -+ * FIXME: -+ * It would be more accurate to calculate a lookup cost based on the number -+ * of buckets in the hash table slot, the length of the chain, and the length -+ * of the string being looked up. -+ * The hash tables can also be optimized by storing the hashval in each element -+ * rather than always performing string comparisons. -+ */ -+int dtrace_hash_collisions(struct dtrace_hash *hash, -+ struct dtrace_probe *template) -+{ -+ int hashval = DTRACE_HASHSTR(hash, template); -+ int ndx = hashval & hash->dth_mask; -+ -+ struct dtrace_hashbucket *bucket = hash->dth_tab[ndx]; -+ -+ for (; bucket != NULL; bucket = bucket->dthb_next) { -+ if (DTRACE_HASHEQ(hash, bucket->dthb_chain, template)) -+ return bucket->dthb_len; -+ } -+ -+ return 0; -+} -+ -+void dtrace_hash_remove(struct dtrace_hash *hash, struct dtrace_probe *probe) -+{ -+ int ndx = DTRACE_HASHSTR(hash, probe) & hash->dth_mask; -+ -+ struct dtrace_hashbucket *bucket = hash->dth_tab[ndx]; -+ struct dtrace_probe **prevp = DTRACE_HASHPREV(hash, probe); -+ struct dtrace_probe **nextp = DTRACE_HASHNEXT(hash, probe); -+ -+ for (; bucket != NULL; bucket = bucket->dthb_next) { -+ if (DTRACE_HASHEQ(hash, bucket->dthb_chain, probe)) -+ break; -+ } -+ -+ ASSERT(bucket != NULL); -+ -+ if (*prevp == NULL) { -+ if (*nextp == NULL) { -+ /* -+ * This is the last probe in the bucket; we can remove -+ * the bucket. -+ */ -+ struct dtrace_hashbucket *b = hash->dth_tab[ndx]; -+ -+ ASSERT(bucket->dthb_chain == probe); -+ ASSERT(b != NULL); -+ -+ if (b == bucket) -+ hash->dth_tab[ndx] = bucket->dthb_next; -+ else { -+ while (b->dthb_next != bucket) -+ b = b->dthb_next; -+ -+ b->dthb_next = bucket->dthb_next; -+ } -+ -+ ASSERT(hash->dth_nbuckets > 0); -+ -+ hash->dth_nbuckets--; -+ kfree(bucket); -+ -+ return; -+ } -+ -+ bucket->dthb_chain = *nextp; -+ } else -+ *(DTRACE_HASHNEXT(hash, *prevp)) = *nextp; -+ -+ if (*nextp != NULL) -+ *(DTRACE_HASHPREV(hash, *nextp)) = *prevp; -+} -diff --git a/dtrace/dtrace_isa.c b/dtrace/dtrace_isa.c -new file mode 100644 -index 000000000000..f84ce1cd52cc ---- /dev/null -+++ b/dtrace/dtrace_isa.c -@@ -0,0 +1,361 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_isa.c -+ * DESCRIPTION: DTrace - architecture specific code -+ * -+ * Copyright (c) 2010, 2018, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/dtrace_cpu.h> -+#include <linux/dtrace_task_impl.h> -+#include <linux/hardirq.h> -+#include <linux/mm.h> -+#include <linux/smp.h> -+#include <linux/uaccess.h> -+#include <linux/cpumask.h> -+#include <asm/cacheflush.h> -+#include <asm/ptrace.h> -+#include <asm/stacktrace.h> -+ -+#include "dtrace.h" -+ -+DEFINE_MUTEX(cpu_lock); -+EXPORT_SYMBOL(cpu_lock); -+ -+int dtrace_getipl(void) -+{ -+ return in_interrupt(); -+} -+ -+void dtrace_xcall(processorid_t cpu, dtrace_xcall_t func, void *arg) -+{ -+ if (cpu == DTRACE_CPUALL) -+ smp_call_function(func, arg, 1); -+ else -+ smp_call_function_single(cpu, func, arg, 1); -+} -+ -+void dtrace_toxic_ranges(void (*func)(uintptr_t, uintptr_t)) -+{ -+ /* FIXME */ -+} -+ -+/* -+ * Note: not called from probe context. This function is called -+ * asynchronously (and at a regular interval) from outside of probe context -+ * by the DTrace framework to sync shared data which DTrace probe context -+ * may access without locks. -+ * -+ * Whenever the framework updates data which can be accessed from probe context, -+ * the framework then calls dtrace_sync(). dtrace_sync() guarantees all probes -+ * are using the new data before returning. -+ * -+ * See the comment in dtrace_impl.h which describes this algorithm. -+ * The cpuc_in_probe_ctxt flag is an increasing 16-bit count. It is odd when -+ * in DTrace probe context and even when not in DTrace probe context. -+ * The upper 15 bits are a counter which are incremented when exiting DTrace -+ * probe context. These upper 15 bits are used to detect "sample aliasing": -+ * i.e. the target CPU is not in DTrace probe context between samples but -+ * continually enters probe context just before being sampled. -+ * -+ * dtrace_sync() loops over NCPUs. CPUs which are not in DTrace probe context -+ * (cpuc_in_probe_ctxt is even) are removed from the list. This is repeated -+ * until there are no CPUs left in the sync list. -+ * -+ * In the rare cases where dtrace_sync() loops over all NCPUs more than -+ * dtrace_sync_sample_count times, dtrace_sync() then spins on one CPU's -+ * cpuc_in_probe_ctxt count until the count increments. This is intended to -+ * avoid sample aliasing. -+ */ -+void dtrace_sync(void) -+{ -+ /* -+ * sync_cpus is a bitmap of CPUs that need to be synced with. -+ */ -+ cpumask_t sync_cpus; -+ uint64_t sample_count = 0; -+ int cpuid, sample_cpuid = 0; -+ int outstanding; -+ -+ /* -+ * Create bitmap of CPUs that need to be synced with. -+ */ -+ cpumask_copy(&sync_cpus, cpu_online_mask); -+ outstanding = 0; -+ for_each_cpu(cpuid, &sync_cpus) { -+ ++outstanding; -+ -+ /* -+ * Set a flag to let the CPU know we are syncing with it. -+ */ -+ DTRACE_SYNC_START(cpuid); -+ } -+ -+ /* -+ * The preceding stores by DTRACE_SYNC_START() must complete before -+ * subsequent loads or stores. No membar is needed because the -+ * atomic-add operation in DTRACE_SYNC_START is a memory barrier on -+ * SPARC and X86. -+ */ -+ -+ while (outstanding > 0) { -+ /* -+ * Loop over the map of CPUs that need to be synced with. -+ */ -+ for_each_cpu(cpuid, &sync_cpus) { -+ if (!DTRACE_SYNC_IN_CRITICAL(cpuid)) { -+ -+ /* Clear the CPU's sync request flag */ -+ DTRACE_SYNC_END(cpuid); -+ -+ /* -+ * remove cpuid from list of CPUs that -+ * still need to be synced with. -+ */ -+ DTRACE_SYNC_DONE(cpuid, &sync_cpus); -+ --outstanding; -+ } else { -+ /* -+ * Remember one of the outstanding CPUs to spin -+ * on once we reach the sampling limit. -+ */ -+ sample_cpuid = cpuid; -+ } -+ } -+ -+ /* -+ * dtrace_probe may be running in sibling threads in this core. -+ */ -+ if (outstanding > 0) { -+ dtrace_safe_smt_pause(); -+ -+ /* -+ * After sample_count loops, spin on one CPU's count -+ * instead of just checking for odd/even. -+ */ -+ if (++sample_count > dtrace_sync_sample_count) { -+ uint64_t count = -+ DTRACE_SYNC_CRITICAL_COUNT(sample_cpuid); -+ -+ /* -+ * Spin until critical section count increments. -+ */ -+ if (DTRACE_SYNC_IN_CRITICAL(sample_cpuid)) { -+ while (count == -+ DTRACE_SYNC_CRITICAL_COUNT( -+ sample_cpuid)) { -+ -+ dtrace_safe_smt_pause(); -+ } -+ } -+ -+ DTRACE_SYNC_END(sample_cpuid); -+ DTRACE_SYNC_DONE(sample_cpuid, &sync_cpus); -+ --outstanding; -+ } -+ } -+ } -+ -+/* -+ * All preceding loads by DTRACE_SYNC_IN_CRITICAL() and -+ * DTRACE_SYNC_CRITICAL_COUNT() must complete before subsequent loads -+ * or stores. No membar is needed because the atomic-add operation in -+ * DTRACE_SYNC_END() is a memory barrier on SPARC and X86. -+ */ -+} -+ -+/* -+ * Handle a few special cases where we store information in kernel memory that -+ * in other systems is typically found in userspace. -+ */ -+static int dtrace_fake_copyin(intptr_t addr, size_t size) -+{ -+ struct dtrace_psinfo *psinfo; -+ uintptr_t argv; -+ unsigned long argc; -+ uintptr_t envp; -+ unsigned long envc; -+ -+ if (current->dt_task == NULL) -+ return 0; -+ -+ psinfo = current->dt_task->dt_psinfo; -+ if (psinfo == NULL) -+ return 0; -+ -+ argv = (uintptr_t)psinfo->dtps_argv; -+ argc = psinfo->dtps_argc; -+ envp = (uintptr_t)psinfo->dtps_envp; -+ envc = psinfo->dtps_envc; -+ -+ /* -+ * Ensure addr is within the argv array (or the envp array): -+ * addr in [argv..argv + argc * sizeof(psinfo->argv[0])[ -+ * Ensure that addr + size is within the same array -+ * addr + size in [argv..argv * sizeof(psinfo->argv[0])] -+ * -+ * To guard against overflows on (addr + size) we rewrite this basic -+ * equation: -+ * addr + size <= argv + argc * sizeof(psinfo->argv[0]) -+ * into: -+ * addr - argv <= argc * sizeof(psinfo->argv[0]) - size -+ */ -+ return (addr >= argv && -+ addr - argv < argc * sizeof(psinfo->dtps_argv[0]) && -+ addr - argv <= argc * sizeof(psinfo->dtps_argv[0]) - size) || -+ (addr >= envp && -+ addr - envp < envc * sizeof(psinfo->dtps_envp[0]) && -+ addr - envp <= envc * sizeof(psinfo->dtps_envp[0]) - size); -+} -+ -+void dtrace_copyin(uintptr_t uaddr, uintptr_t kaddr, size_t size, -+ volatile uint16_t *flags) -+{ -+ if (dtrace_fake_copyin(uaddr, size)) { -+ memcpy((char *)kaddr, (char *)uaddr, size); -+ return; -+ } -+ -+ dtrace_copyin_arch(uaddr, kaddr, size, flags); -+} -+ -+void dtrace_copyinstr(uintptr_t uaddr, uintptr_t kaddr, size_t size, -+ volatile uint16_t *flags) -+{ -+ if (dtrace_fake_copyin(uaddr, size)) { -+ strncpy((char *)kaddr, (char *)uaddr, -+ min(size, (size_t)PR_PSARGS_SZ)); -+ return; -+ } -+ -+ dtrace_copyinstr_arch(uaddr, kaddr, size, flags); -+} -+ -+/* -+ * FIXME: aframes + 3 should really be aframes + 1, dtrace_stacktrace() in the -+ * kernel should do its own aframes + 2 -+ */ -+void dtrace_getpcstack(uint64_t *pcstack, int pcstack_limit, int aframes, -+ uint32_t *intrpc) -+{ -+ struct stacktrace_state st = { -+ pcstack, -+ NULL, -+ pcstack_limit, -+ aframes + 3, -+ STACKTRACE_KERNEL -+ }; -+ -+ dtrace_stacktrace(&st); -+ -+ while (st.depth < st.limit) -+ pcstack[st.depth++] = 0; -+} -+EXPORT_SYMBOL(dtrace_getpcstack); -+ -+/* -+ * Get user stack entries up to the pcstack_limit; return the number of entries -+ * acquired. If pcstack is NULL, return the number of entries potentially -+ * acquirable. -+ */ -+unsigned long dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, -+ int pcstack_limit) -+{ -+ struct task_struct *p = current; -+ struct stacktrace_state st; -+ unsigned long depth; -+ -+ if (pcstack) { -+ if (unlikely(pcstack_limit < 2)) { -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); -+ return 0; -+ } -+ *pcstack++ = (uint64_t)p->pid; -+ *pcstack++ = (uint64_t)p->tgid; -+ pcstack_limit -= 2; -+ } -+ -+ st.pcs = pcstack; -+ st.fps = fpstack; -+ st.limit = pcstack_limit; -+ st.depth = 0; -+ st.flags = STACKTRACE_USER; -+ -+ dtrace_stacktrace(&st); -+ -+ depth = st.depth; -+ if (pcstack) { -+ while (st.depth < st.limit) { -+ pcstack[st.depth++] = 0; -+ if (fpstack) -+ fpstack[st.depth++] = 0; -+ } -+ } -+ -+ return depth; -+} -+ -+void dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit) -+{ -+ dtrace_getufpstack(pcstack, NULL, pcstack_limit); -+} -+ -+/* -+ * FIXME: aframes + 3 should really be aframes + 1, dtrace_stacktrace() in the -+ * kernel should do its own aframes + 2 -+ */ -+int dtrace_getstackdepth(struct dtrace_mstate *mstate, int aframes) -+{ -+ uintptr_t old = mstate->dtms_scratch_ptr; -+ struct stacktrace_state st = { -+ NULL, -+ NULL, -+ 0, -+ aframes + 3, -+ STACKTRACE_KERNEL -+ }; -+ -+ st.pcs = (uint64_t *)ALIGN(old, 8); -+ if ((uintptr_t)st.pcs > -+ mstate->dtms_scratch_base + mstate->dtms_scratch_size) { -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); -+ return 0; -+ } -+ -+ /* -+ * Calculate how many (64-bit) PCs we can fit in the remaining scratch -+ * memory. -+ */ -+ st.limit = (mstate->dtms_scratch_base + mstate->dtms_scratch_size - -+ (uintptr_t)st.pcs) >> 3; -+ -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); -+ dtrace_stacktrace(&st); -+ DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); -+ -+ mstate->dtms_scratch_ptr = old; -+ -+ return st.depth; -+} -+ -+int dtrace_getustackdepth(void) -+{ -+ return dtrace_getufpstack(NULL, NULL, INT_MAX); -+} -+ -+void dtrace_probe_error(struct dtrace_state *state, dtrace_epid_t epid, -+ int act, int fltoffs, int flags, uintptr_t addr) -+{ -+ dtrace_probe(dtrace_probeid_error, (uintptr_t)state, epid, act, -+ fltoffs, flags, addr, 0); -+} -diff --git a/dtrace/dtrace_match.c b/dtrace/dtrace_match.c -new file mode 100644 -index 000000000000..a63e3f8be1cd ---- /dev/null -+++ b/dtrace/dtrace_match.c -@@ -0,0 +1,364 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_match.c -+ * DESCRIPTION: DTrace - probe match implementation -+ * -+ * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include "dtrace.h" -+ -+struct dtrace_hash *dtrace_bymod; -+struct dtrace_hash *dtrace_byfunc; -+struct dtrace_hash *dtrace_byname; -+ -+int dtrace_match_priv(const struct dtrace_probe *prp, uint32_t priv, -+ kuid_t uid) -+{ -+ if (priv != DTRACE_PRIV_ALL) { -+ uint32_t ppriv = -+ prp->dtpr_provider->dtpv_priv.dtpp_flags; -+ uint32_t match = priv & ppriv; -+ -+ if ((priv & (DTRACE_PRIV_PROC | DTRACE_PRIV_USER | -+ DTRACE_PRIV_KERNEL)) == 0) -+ return 0; -+ -+ if (match == 0 && ppriv != 0) -+ return 0; -+ -+ if (((ppriv & ~match) & DTRACE_PRIV_OWNER) != 0 && -+ !uid_eq(uid, make_kuid(init_user_namespace, -+ prp->dtpr_provider->dtpv_priv.dtpp_uid))) -+ return 0; -+ } -+ -+ return 1; -+} -+ -+int dtrace_match_probe(const struct dtrace_probe *prp, -+ const struct dtrace_probekey *pkp, -+ uint32_t priv, kuid_t uid) -+{ -+ struct dtrace_provider *pvp = prp->dtpr_provider; -+ int rv; -+ -+ if (pvp->dtpv_defunct) -+ return 0; -+ -+ rv = pkp->dtpk_pmatch(pvp->dtpv_name, pkp->dtpk_prov, 0); -+ if (rv <= 0) -+ return rv; -+ -+ rv = pkp->dtpk_mmatch(prp->dtpr_mod, pkp->dtpk_mod, 0); -+ if (rv <= 0) -+ return rv; -+ -+ rv = pkp->dtpk_fmatch(prp->dtpr_func, pkp->dtpk_func, 0); -+ if (rv <= 0) -+ return rv; -+ -+ rv = pkp->dtpk_nmatch(prp->dtpr_name, pkp->dtpk_name, 0); -+ if (rv <= 0) -+ return rv; -+ -+ if (dtrace_match_priv(prp, priv, uid) == 0) -+ return 0; -+ -+ return rv; -+} -+ -+int dtrace_match_glob(const char *s, const char *p, int depth) -+{ -+ const char *olds; -+ char s1, c; -+ int gs; -+ -+ if (depth > DTRACE_PROBEKEY_MAXDEPTH) -+ return -1; -+ -+ if (s == NULL) -+ s = ""; -+ -+top: -+ olds = s; -+ s1 = *s++; -+ -+ if (p == NULL) -+ return 0; -+ -+ c = *p++; -+ if (c == '\0') -+ return s1 == '\0'; -+ -+ switch (c) { -+ case '[': -+ { -+ int ok = 0, notflag = 0; -+ char lc = '\0'; -+ -+ if (s1 == '\0') -+ return 0; -+ -+ if (*p == '!') { -+ notflag = 1; -+ p++; -+ } -+ -+ c = *p++; -+ if (c == '\0') -+ return 0; -+ -+ do { -+ if (c == '-' && lc != '\0' && *p != ']') { -+ c = *p++; -+ if (c == '\0') -+ return 0; -+ if (c == '\\') { -+ c = *p++; -+ if (c == '\0') -+ return 0; -+ } -+ if (notflag) { -+ if (s1 < lc || s1 > c) -+ ok++; -+ else -+ return 0; -+ } else if (lc <= s1 && s1 <= c) -+ ok++; -+ } else if (c == '\\') { -+ c = *p++; -+ if (c == '\0') -+ return 0; -+ } -+ lc = c; -+ -+ if (notflag) { -+ if (s1 != c) -+ ok++; -+ else -+ return 0; -+ } else if (s1 == c) -+ ok++; -+ -+ c = *p++; -+ if (c == '\0') -+ return 0; -+ } while (c != ']'); -+ -+ if (ok) -+ goto top; -+ -+ return 0; -+ } -+ -+ case '\\': -+ c = *p++; -+ if (c == '\0') -+ return 0; -+ /* fallthru */ -+ default: -+ if (c != s1) -+ return 0; -+ /* fallthru */ -+ -+ case '?': -+ if (s1 != '\0') -+ goto top; -+ -+ return 0; -+ -+ case '*': -+ while (*p == '*') -+ p++; -+ -+ if (*p == '\0') -+ return 1; -+ -+ for (s = olds; *s != '\0'; s++) { -+ gs = dtrace_match_glob(s, p, depth + 1); -+ if (gs != 0) -+ return gs; -+ } -+ -+ return 0; -+ } -+} -+ -+int dtrace_match_string(const char *s, const char *p, int depth) -+{ -+ return s != NULL && strcmp(s, p) == 0; -+} -+ -+int dtrace_match_nul(const char *s, const char *p, int depth) -+{ -+ return 1; -+} -+ -+int dtrace_match_nonzero(const char *s, const char *p, int depth) -+{ -+ return s != NULL && s[0] != '\0'; -+} -+ -+struct probe_match { -+ const struct dtrace_probekey *pkp; -+ uint32_t priv; -+ kuid_t uid; -+ int (*matched)(struct dtrace_probe *, void *); -+ void *arg; -+ int nmatched; -+}; -+ -+static int dtrace_match_one(int id, void *p, void *data) -+{ -+ struct probe_match *pbm = (struct probe_match *)data; -+ struct dtrace_probe *probe = (struct dtrace_probe *)p; -+ int rc; -+ -+ if (dtrace_match_probe(probe, pbm->pkp, pbm->priv, pbm->uid) <= 0) -+ return 0; -+ -+ pbm->nmatched++; -+ -+ rc = (pbm->matched)(probe, pbm->arg); -+ if (rc != DTRACE_MATCH_NEXT) { -+ if (rc == DTRACE_MATCH_FAIL) -+ return DTRACE_MATCH_FAIL; -+ } -+ -+ return 0; -+} -+ -+int dtrace_match(const struct dtrace_probekey *pkp, uint32_t priv, kuid_t uid, -+ int (*matched)(struct dtrace_probe *, void *), void *arg) -+{ -+ struct dtrace_probe template, *probe; -+ struct dtrace_hash *hash = NULL; -+ int len, rc, best = INT_MAX, nmatched = 0; -+ -+ if (pkp->dtpk_id != DTRACE_IDNONE) { -+ probe = dtrace_probe_lookup_id(pkp->dtpk_id); -+ if (probe != NULL && -+ dtrace_match_probe(probe, pkp, priv, uid) > 0) { -+ if ((*matched)(probe, arg) == DTRACE_MATCH_FAIL) -+ return DTRACE_MATCH_FAIL; -+ -+ nmatched++; -+ } -+ -+ return nmatched; -+ } -+ -+ template.dtpr_mod = (char *)pkp->dtpk_mod; -+ template.dtpr_func = (char *)pkp->dtpk_func; -+ template.dtpr_name = (char *)pkp->dtpk_name; -+ -+ if (pkp->dtpk_mmatch == &dtrace_match_string) { -+ len = dtrace_hash_collisions(dtrace_bymod, &template); -+ if (len < best) { -+ best = len; -+ hash = dtrace_bymod; -+ } -+ } -+ -+ if (pkp->dtpk_fmatch == &dtrace_match_string) { -+ len = dtrace_hash_collisions(dtrace_byfunc, &template); -+ if (len < best) { -+ best = len; -+ hash = dtrace_byfunc; -+ } -+ } -+ -+ if (pkp->dtpk_nmatch == &dtrace_match_string) { -+ len = dtrace_hash_collisions(dtrace_byname, &template); -+ if (len < best) { -+ best = len; -+ hash = dtrace_byname; -+ } -+ } -+ -+ if (hash == NULL) { -+ struct probe_match pbm; -+ -+ pbm.pkp = pkp; -+ pbm.priv = priv; -+ pbm.uid = uid; -+ pbm.matched = matched; -+ pbm.arg = arg; -+ pbm.nmatched = 0; -+ -+ rc = dtrace_probe_for_each(dtrace_match_one, &pbm); -+ if (rc == DTRACE_MATCH_FAIL) -+ return DTRACE_MATCH_FAIL; -+ -+ return pbm.nmatched; -+ } -+ -+ for (probe = dtrace_hash_lookup(hash, &template); probe != NULL; -+ probe = *(DTRACE_HASHNEXT(hash, probe))) { -+ if (dtrace_match_probe(probe, pkp, priv, uid) <= 0) -+ continue; -+ -+ nmatched++; -+ -+ rc = (*matched)(probe, arg); -+ if (rc != DTRACE_MATCH_NEXT) { -+ if (rc == DTRACE_MATCH_FAIL) -+ return DTRACE_MATCH_FAIL; -+ -+ break; -+ } -+ } -+ -+ return nmatched; -+} -+ -+static dtrace_probekey_f *dtrace_probekey_func(const char *p) -+{ -+ char c; -+ -+ if (p == NULL || *p == '\0') -+ return &dtrace_match_nul; -+ -+ while ((c = *p++) != '\0') { -+ if (c == '[' || c == '?' || c == '*' || c == '\\') -+ return &dtrace_match_glob; -+ } -+ -+ return &dtrace_match_string; -+} -+ -+void dtrace_probekey(const struct dtrace_probedesc *pdp, -+ struct dtrace_probekey *pkp) -+{ -+ pkp->dtpk_prov = pdp->dtpd_provider; -+ pkp->dtpk_pmatch = dtrace_probekey_func(pdp->dtpd_provider); -+ -+ pkp->dtpk_mod = pdp->dtpd_mod; -+ pkp->dtpk_mmatch = dtrace_probekey_func(pdp->dtpd_mod); -+ -+ pkp->dtpk_func = pdp->dtpd_func; -+ pkp->dtpk_fmatch = dtrace_probekey_func(pdp->dtpd_func); -+ -+ pkp->dtpk_name = pdp->dtpd_name; -+ pkp->dtpk_nmatch = dtrace_probekey_func(pdp->dtpd_name); -+ -+ pkp->dtpk_id = pdp->dtpd_id; -+ -+ if (pkp->dtpk_id == DTRACE_IDNONE && -+ pkp->dtpk_pmatch == &dtrace_match_nul && -+ pkp->dtpk_mmatch == &dtrace_match_nul && -+ pkp->dtpk_fmatch == &dtrace_match_nul && -+ pkp->dtpk_nmatch == &dtrace_match_nul) -+ pkp->dtpk_fmatch = &dtrace_match_nonzero; -+} -diff --git a/dtrace/dtrace_mod.c b/dtrace/dtrace_mod.c -new file mode 100644 -index 000000000000..4da08c3cd816 ---- /dev/null -+++ b/dtrace/dtrace_mod.c -@@ -0,0 +1,45 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_mod.c -+ * DESCRIPTION: DTrace - framework kernel module -+ * -+ * Copyright (c) 2010, 2011, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/module.h> -+ -+#include "dtrace_dev.h" -+ -+MODULE_AUTHOR("Kris Van Hees (kris.van.hees@oracle.com)"); -+MODULE_DESCRIPTION("Dynamic Tracing"); -+MODULE_VERSION("v0.1"); -+MODULE_LICENSE("GPL"); -+ -+/* -+ * Initialize the module. -+ */ -+static int __init dtrace_init(void) -+{ -+ return dtrace_dev_init(); -+} -+ -+/* -+ * Perform cleanup before the module is removed. -+ */ -+static void __exit dtrace_exit(void) -+{ -+ dtrace_dev_exit(); -+} -+ -+module_init(dtrace_init); -+module_exit(dtrace_exit); -diff --git a/dtrace/dtrace_predicate.c b/dtrace/dtrace_predicate.c -new file mode 100644 -index 000000000000..004a1c542c76 ---- /dev/null -+++ b/dtrace/dtrace_predicate.c -@@ -0,0 +1,80 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_predicate.c -+ * DESCRIPTION: DTrace - predicate cache implementation -+ * -+ * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/slab.h> -+ -+#include "dtrace.h" -+ -+static dtrace_cacheid_t dtrace_predcache_id = DTRACE_CACHEIDNONE + 1; -+ -+struct dtrace_predicate *dtrace_predicate_create(struct dtrace_difo *dp) -+{ -+ struct dtrace_predicate *pred; -+ -+ ASSERT(MUTEX_HELD(&dtrace_lock)); -+ ASSERT(dp->dtdo_refcnt != 0); -+ -+ pred = kzalloc(sizeof(struct dtrace_predicate), GFP_KERNEL); -+ if (pred == NULL) -+ return NULL; -+ -+ pred->dtp_difo = dp; -+ pred->dtp_refcnt = 1; -+ -+ if (!dtrace_difo_cacheable(dp)) -+ return pred; -+ -+ /* -+ * This is only theoretically possible -- we have had 2^32 cacheable -+ * predicates on this machine. We cannot allow any more predicates to -+ * become cacheable: as unlikely as it is, there may be a thread -+ * caching a (now stale) predicate cache ID. (N.B.: the temptation is -+ * being successfully resisted to have this cmn_err() "Holy shit -- we -+ * executed this code!") -+ */ -+ if (dtrace_predcache_id == DTRACE_CACHEIDNONE) -+ return pred; -+ -+ pred->dtp_cacheid = dtrace_predcache_id++; -+ -+ return pred; -+} -+ -+void dtrace_predicate_hold(struct dtrace_predicate *pred) -+{ -+ ASSERT(MUTEX_HELD(&dtrace_lock)); -+ ASSERT(pred->dtp_difo != NULL && pred->dtp_difo->dtdo_refcnt != 0); -+ ASSERT(pred->dtp_refcnt > 0); -+ -+ pred->dtp_refcnt++; -+} -+ -+void dtrace_predicate_release(struct dtrace_predicate *pred, -+ struct dtrace_vstate *vstate) -+{ -+ struct dtrace_difo *dp = pred->dtp_difo; -+ -+ ASSERT(MUTEX_HELD(&dtrace_lock)); -+ ASSERT(dp != NULL && dp->dtdo_refcnt != 0); -+ ASSERT(pred->dtp_refcnt > 0); -+ -+ if (--pred->dtp_refcnt == 0) { -+ dtrace_difo_release(dp, vstate); -+ kfree(pred); -+ } -+} -diff --git a/dtrace/dtrace_priv.c b/dtrace/dtrace_priv.c -new file mode 100644 -index 000000000000..f50133de572d ---- /dev/null -+++ b/dtrace/dtrace_priv.c -@@ -0,0 +1,120 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_priv.c -+ * DESCRIPTION: DTrace - privilege support implementation -+ * -+ * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/dtrace_cpu.h> -+ -+#include "dtrace.h" -+ -+/* -+ * This privilege check should be used by actions and subroutines to -+ * verify that the user credentials of the process that enabled the -+ * invoking ECB match the target credentials -+ */ -+int dtrace_priv_proc_common_user(struct dtrace_state *state) -+{ -+ const struct cred *cr, *s_cr = state->dts_cred.dcr_cred; -+ -+ /* -+ * We should always have a non-NULL state cred here, since if cred -+ * is null (anonymous tracing), we fast-path bypass this routine. -+ */ -+ ASSERT(s_cr != NULL); -+ -+ cr = current_cred(); -+ if (cr != NULL && -+ uid_eq(s_cr->euid, cr->euid) && -+ uid_eq(s_cr->euid, cr->uid) && -+ uid_eq(s_cr->euid, cr->suid) && -+ gid_eq(s_cr->egid, cr->egid) && -+ gid_eq(s_cr->egid, cr->gid) && -+ gid_eq(s_cr->egid, cr->sgid)) -+ return 1; -+ -+ return 0; -+} -+ -+/* -+ * This privilege check should be used by actions and subroutines to -+ * verify that the process has not setuid or changed credentials. -+ */ -+int dtrace_priv_proc_common_nocd(void) -+{ -+#ifdef FIXME -+ proc_t *proc; -+ -+ proc = ttoproc(curthread); -+ if (proc != NULL && !(proc->p_flag & SNOCD)) -+ return 1; -+#endif -+ -+ return 0; -+} -+ -+int dtrace_priv_proc_destructive(struct dtrace_state *state) -+{ -+ int action = state->dts_cred.dcr_action; -+ -+ if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER) == 0) && -+ dtrace_priv_proc_common_user(state) == 0) -+ goto bad; -+ -+ if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG) == 0) && -+ dtrace_priv_proc_common_nocd() == 0) -+ goto bad; -+ -+ return 1; -+ -+bad: -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_UPRIV); -+ -+ return 0; -+} -+ -+int dtrace_priv_proc_control(struct dtrace_state *state) -+{ -+ if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL) -+ return 1; -+ -+ if (dtrace_priv_proc_common_user(state) && -+ dtrace_priv_proc_common_nocd()) -+ return 1; -+ -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_UPRIV); -+ -+ return 0; -+} -+ -+int dtrace_priv_proc(struct dtrace_state *state) -+{ -+ if (state->dts_cred.dcr_action & DTRACE_CRA_PROC) -+ return 1; -+ -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_UPRIV); -+ -+ return 0; -+} -+ -+int dtrace_priv_kernel(struct dtrace_state *state) -+{ -+ if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL) -+ return 1; -+ -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV); -+ -+ return 0; -+} -diff --git a/dtrace/dtrace_probe.c b/dtrace/dtrace_probe.c -new file mode 100644 -index 000000000000..8e2e04cb9c13 ---- /dev/null -+++ b/dtrace/dtrace_probe.c -@@ -0,0 +1,1542 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_probe.c -+ * DESCRIPTION: DTrace - probe implementation -+ * -+ * Copyright (c) 2010, 2018, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/dtrace_cpu.h> -+#include <linux/dtrace_task_impl.h> -+#include <linux/hardirq.h> -+#include <linux/highmem.h> -+#include <linux/idr.h> -+#include <linux/module.h> -+#include <linux/skbuff.h> -+#include <linux/slab.h> -+#include <linux/vmalloc.h> -+#include <asm/pgtable.h> -+#include <asm/cmpxchg.h> -+#include <linux/sched/signal.h> -+ -+#include "dtrace.h" -+ -+ktime_t dtrace_chill_interval = -+ KTIME_INIT(1, 0); -+ktime_t dtrace_chill_max = -+ KTIME_INIT(0, -+ 500 * (NANOSEC / MILLISEC)); -+ -+dtrace_genid_t dtrace_probegen; -+struct kmem_cache *dtrace_probe_cachep; -+ -+static struct idr dtrace_probe_idr; -+ -+static struct task_struct *dtrace_panicked; -+ -+/* -+ * Free probe structure (including partially filled in ones). -+ */ -+void dtrace_probe_free(struct dtrace_probe *probe) -+{ -+ if (probe == NULL) -+ return; -+ -+ dtrace_probe_remove_id(probe->dtpr_id); -+ -+ kfree(probe->dtpr_mod); -+ kfree(probe->dtpr_func); -+ kfree(probe->dtpr_name); -+ -+ kmem_cache_free(dtrace_probe_cachep, probe); -+} -+ -+/* -+ * Create a new probe. -+ */ -+dtrace_id_t dtrace_probe_create(dtrace_provider_id_t prov, const char *mod, -+ const char *func, const char *name, -+ int aframes, void *arg) -+{ -+ struct dtrace_probe *probe; -+ struct dtrace_provider *provider = (struct dtrace_provider *)prov; -+ dtrace_id_t id; -+ -+ probe = kmem_cache_alloc(dtrace_probe_cachep, GFP_KERNEL); -+ if (probe == NULL) -+ goto err_probe; -+ -+ /* -+ * The idr_preload() should be called without holding locks as it may -+ * block. At the same time it is required to protect DTrace structures. -+ * We can't drop it before idr_preload() and acquire after it because -+ * we can't sleep in atomic context (until we reach idr_preload_end()). -+ * -+ * It is better to delay DTrace framework than traced host so the lock -+ * is being held for the duration of idr allocation. -+ * -+ * When the provider is the DTrace core itself, dtrace_lock will be -+ * held when we enter this function. -+ */ -+ if (provider == dtrace_provider) -+ ASSERT(MUTEX_HELD(&dtrace_lock)); -+ else -+ mutex_lock(&dtrace_lock); -+ -+ idr_preload(GFP_KERNEL); -+ id = idr_alloc_cyclic(&dtrace_probe_idr, probe, 0, 0, GFP_NOWAIT); -+ idr_preload_end(); -+ if (id < 0) -+ goto err_probe; -+ -+ probe->dtpr_id = id; -+ probe->dtpr_ecb = NULL; -+ probe->dtpr_ecb_last = NULL; -+ probe->dtpr_arg = arg; -+ probe->dtpr_predcache = DTRACE_CACHEIDNONE; -+ probe->dtpr_aframes = aframes; -+ probe->dtpr_provider = provider; -+ -+ probe->dtpr_mod = dtrace_strdup(mod); -+ if (probe->dtpr_mod == NULL) -+ goto err_probe; -+ -+ probe->dtpr_func = dtrace_strdup(func); -+ if (probe->dtpr_func == NULL) -+ goto err_probe; -+ -+ probe->dtpr_name = dtrace_strdup(name); -+ if (probe->dtpr_name == NULL) -+ goto err_probe; -+ -+ probe->dtpr_nextmod = probe->dtpr_prevmod = NULL; -+ probe->dtpr_nextfunc = probe->dtpr_prevfunc = NULL; -+ probe->dtpr_nextname = probe->dtpr_prevname = NULL; -+ probe->dtpr_gen = dtrace_probegen++; -+ -+ if (dtrace_hash_add(dtrace_bymod, probe) != 0) -+ goto err_probe; -+ -+ if (dtrace_hash_add(dtrace_byfunc, probe) != 0) -+ goto err_hash_byfunc; -+ -+ if (dtrace_hash_add(dtrace_byname, probe) != 0) -+ goto err_hash_byname; -+ -+ if (provider != dtrace_provider) -+ mutex_unlock(&dtrace_lock); -+ -+ return id; -+ -+err_hash_byname: -+ dtrace_hash_remove(dtrace_byfunc, probe); -+err_hash_byfunc: -+ dtrace_hash_remove(dtrace_bymod, probe); -+err_probe: -+ dtrace_probe_free(probe); -+ if (provider != dtrace_provider) -+ mutex_unlock(&dtrace_lock); -+ return DTRACE_IDNONE; -+} -+EXPORT_SYMBOL(dtrace_probe_create); -+ -+int dtrace_probe_enable(const struct dtrace_probedesc *desc, -+ struct dtrace_enabling *enab) -+{ -+ struct dtrace_probekey pkey; -+ uint32_t priv; -+ kuid_t uid; -+ -+ dtrace_ecb_create_cache = NULL; -+ -+ if (desc == NULL) { -+ (void) dtrace_ecb_create_enable(NULL, enab); -+ -+ return 0; -+ } -+ -+ dtrace_probekey(desc, &pkey); -+ dtrace_cred2priv(enab->dten_vstate->dtvs_state->dts_cred.dcr_cred, -+ &priv, &uid); -+ -+ return dtrace_match(&pkey, priv, uid, dtrace_ecb_create_enable, enab); -+} -+ -+/* -+ * Return the probe argument associated with the specified probe. -+ */ -+void *dtrace_probe_arg(dtrace_provider_id_t id, dtrace_id_t pid) -+{ -+ struct dtrace_probe *probe; -+ void *rval = NULL; -+ -+ mutex_lock(&dtrace_lock); -+ -+ probe = dtrace_probe_lookup_id(pid); -+ if (probe != NULL && -+ probe->dtpr_provider == (struct dtrace_provider *)id) -+ rval = probe->dtpr_arg; -+ -+ mutex_unlock(&dtrace_lock); -+ -+ return rval; -+} -+EXPORT_SYMBOL(dtrace_probe_arg); -+ -+/* -+ * Copy a probe into a probe description. -+ */ -+void dtrace_probe_description(const struct dtrace_probe *prp, -+ struct dtrace_probedesc *pdp) -+{ -+ memset(pdp, 0, sizeof(struct dtrace_probedesc)); -+ pdp->dtpd_id = prp->dtpr_id; -+ -+ strncpy(pdp->dtpd_provider, prp->dtpr_provider->dtpv_name, -+ DTRACE_PROVNAMELEN - 1); -+ -+ strncpy(pdp->dtpd_mod, prp->dtpr_mod, DTRACE_MODNAMELEN - 1); -+ strncpy(pdp->dtpd_func, prp->dtpr_func, DTRACE_FUNCNAMELEN - 1); -+ strncpy(pdp->dtpd_name, prp->dtpr_name, DTRACE_NAMELEN - 1); -+} -+ -+void dtrace_probe_provide(struct dtrace_probedesc *desc, -+ struct dtrace_provider *prv) -+{ -+ int all = 0; -+ -+ if (prv == NULL) { -+ all = 1; -+ prv = dtrace_provider; -+ } -+ -+ do { -+ prv->dtpv_pops.dtps_provide(prv->dtpv_arg, desc); -+ dtrace_for_each_module(prv->dtpv_pops.dtps_provide_module, -+ prv->dtpv_arg); -+ } while (all && (prv = prv->dtpv_next) != NULL); -+} -+ -+/* -+ * Atomically increment a specified error counter from probe context. -+ */ -+static void dtrace_error(uint32_t *counter) -+{ -+ /* -+ * Most counters stored to in probe context are per-CPU counters. -+ * However, there are some error conditions that are sufficiently -+ * arcane that they don't merit per-CPU storage. If these counters -+ * are incremented concurrently on different CPUs, scalability will be -+ * adversely affected -- but we don't expect them to be white-hot in a -+ * correctly constructed enabling... -+ */ -+ uint32_t oval, nval; -+ -+ do { -+ oval = *counter; -+ -+ nval = oval + 1; -+ if (nval == 0) { -+ /* -+ * If the counter would wrap, set it to 1 -- assuring -+ * that the counter is never zero when we have seen -+ * errors. (The counter must be 32-bits because we -+ * aren't guaranteed a 64-bit compare&swap operation.) -+ * To save this code both the infamy of being fingered -+ * by a priggish news story and the indignity of being -+ * the target of a neo-puritan witch trial, we're -+ * carefully avoiding any colorful description of the -+ * likelihood of this condition -- but suffice it to -+ * say that it is only slightly more likely than the -+ * overflow of predicate cache IDs, as discussed in -+ * dtrace_predicate_create(). -+ */ -+ nval = 1; -+ } -+ } while (cmpxchg(counter, oval, nval) != oval); -+} -+ -+static int dtrace_priv_kernel_destructive(struct dtrace_state *state) -+{ -+ if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL_DESTRUCTIVE) -+ return 1; -+ -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV); -+ -+ return 0; -+} -+ -+static void dtrace_action_breakpoint(struct dtrace_ecb *ecb) -+{ -+ struct dtrace_probe *probe = ecb->dte_probe; -+ struct dtrace_provider *prov = probe->dtpr_provider; -+ char c[DTRACE_FULLNAMELEN + 80], *str; -+ char *msg = "dtrace: breakpoint action at probe "; -+ char *ecbmsg = " (ecb "; -+ uintptr_t mask = (0xf << (sizeof(uintptr_t) * NBBY / 4)); -+ uintptr_t val = (uintptr_t)ecb; -+ int shift = (sizeof(uintptr_t) * NBBY) - 4, i = 0; -+ -+ if (dtrace_destructive_disallow) -+ return; -+ -+ /* -+ * It's impossible to be taking action on the NULL probe. -+ */ -+ ASSERT(probe != NULL); -+ -+ /* -+ * This is a poor man's (destitute man's?) sprintf(): we want to -+ * print the provider name, module name, function name and name of -+ * the probe, along with the hex address of the ECB with the breakpoint -+ * action -- all of which we must place in the character buffer by -+ * hand. -+ */ -+ while (*msg != '\0') -+ c[i++] = *msg++; -+ -+ for (str = prov->dtpv_name; *str != '\0'; str++) -+ c[i++] = *str; -+ c[i++] = ':'; -+ -+ for (str = probe->dtpr_mod; *str != '\0'; str++) -+ c[i++] = *str; -+ c[i++] = ':'; -+ -+ for (str = probe->dtpr_func; *str != '\0'; str++) -+ c[i++] = *str; -+ c[i++] = ':'; -+ -+ for (str = probe->dtpr_name; *str != '\0'; str++) -+ c[i++] = *str; -+ -+ while (*ecbmsg != '\0') -+ c[i++] = *ecbmsg++; -+ -+ while (shift >= 0) { -+ mask = (uintptr_t)0xf << shift; -+ -+ if (val >= ((uintptr_t)1 << shift)) -+ c[i++] = "0123456789abcdef"[(val & mask) >> shift]; -+ -+ shift -= 4; -+ } -+ -+ c[i++] = ')'; -+ c[i] = '\0'; -+ -+// debug_enter(c); /* FIXME */ -+} -+ -+static void dtrace_action_panic(struct dtrace_ecb *ecb) -+{ -+ struct dtrace_probe *probe = ecb->dte_probe; -+ -+ /* -+ * It's impossible to be taking action on the NULL probe. -+ */ -+ ASSERT(probe != NULL); -+ -+ if (dtrace_destructive_disallow) -+ return; -+ -+ if (dtrace_panicked != NULL) -+ return; -+ -+ if (cmpxchg(&dtrace_panicked, NULL, current) != NULL) -+ return; -+ -+ /* -+ * We won the right to panic. (We want to be sure that only one -+ * thread calls panic() from dtrace_probe(), and that panic() is -+ * called exactly once.) -+ */ -+ dtrace_panic(KERN_EMERG -+ "dtrace: panic action at probe %s:%s:%s:%s (ecb %p)", -+ probe->dtpr_provider->dtpv_name, probe->dtpr_mod, -+ probe->dtpr_func, probe->dtpr_name, (void *)ecb); -+} -+ -+static void dtrace_action_raise(uint64_t sig) -+{ -+ if (current->dt_task == NULL) -+ return; -+ -+ if (dtrace_destructive_disallow) -+ return; -+ -+ if (sig >= _NSIG) { -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); -+ return; -+ } -+ -+ /* -+ * raise() has a queue depth of 1 -- we ignore all subsequent -+ * invocations of the raise() action. -+ */ -+ if (current->dt_task->dt_sig == 0) -+ current->dt_task->dt_sig = (uint8_t)sig; -+} -+ -+static void dtrace_action_stop(void) -+{ -+ if (current->dt_task == NULL) -+ return; -+ -+ if (dtrace_destructive_disallow) -+ return; -+ -+ if (!current->dt_task->dt_stop) { -+ current->dt_task->dt_stop = 1; -+// current->sig_check = 1; /* FIXME */ -+// aston(current); /* FIXME */ -+ } -+} -+ -+static void dtrace_action_chill(struct dtrace_mstate *mstate, ktime_t val) -+{ -+ ktime_t now; -+ volatile uint16_t *flags; -+ struct cpu_core *cpu = this_cpu_core; -+ -+ if (dtrace_destructive_disallow) -+ return; -+ -+ flags = (volatile uint16_t *)&cpu->cpuc_dtrace_flags; -+ -+ now = dtrace_gethrtime(); -+ -+ if (ktime_gt(ktime_sub(now, cpu->cpu_dtrace_chillmark), -+ dtrace_chill_interval)) { -+ /* -+ * We need to advance the mark to current time. -+ */ -+ cpu->cpu_dtrace_chillmark = now; -+ cpu->cpu_dtrace_chilled = ktime_set(0, 0); -+ } -+ -+ /* -+ * Now check to see if the requested chill time would take us over -+ * the maximum amount of time allowed in the chill interval. (Or -+ * worse, if the calculation itself induces overflow.) -+ */ -+ if (ktime_gt(ktime_add(cpu->cpu_dtrace_chilled, val), -+ dtrace_chill_max) || -+ ktime_lt(ktime_add(cpu->cpu_dtrace_chilled, val), -+ cpu->cpu_dtrace_chilled)) { -+ *flags |= CPU_DTRACE_ILLOP; -+ return; -+ } -+ -+ while (ktime_lt(ktime_sub(dtrace_gethrtime(), now), val)) -+ continue; -+ -+ /* -+ * Normally, we assure that the value of the variable "timestamp" does -+ * not change within an ECB. The presence of chill() represents an -+ * exception from this rule, however. -+ */ -+ mstate->dtms_present &= ~DTRACE_MSTATE_TIMESTAMP; -+ cpu->cpu_dtrace_chilled = ktime_add(cpu->cpu_dtrace_chilled, val); -+} -+ -+static void dtrace_action_ustack(struct dtrace_mstate *mstate, -+ struct dtrace_state *state, uint64_t *buf, -+ uint64_t arg) -+{ -+ int nframes = DTRACE_USTACK_NFRAMES(arg); -+ int strsize = DTRACE_USTACK_STRSIZE(arg); -+ uint64_t *pcs = &buf[2], *fps; -+ char *str = (char *)&pcs[nframes]; -+ int size, offs = 0, i, j; -+ uintptr_t old = mstate->dtms_scratch_ptr, saved; -+ uint16_t *flags = &this_cpu_core->cpuc_dtrace_flags; -+ char *sym; -+ -+ /* -+ * Should be taking a faster path if string space has not been -+ * allocated. -+ */ -+ ASSERT(strsize != 0); -+ -+ /* -+ * We will first allocate some temporary space for the frame pointers. -+ */ -+ fps = (uint64_t *)P2ROUNDUP(mstate->dtms_scratch_ptr, 8); -+ size = (uintptr_t)fps - mstate->dtms_scratch_ptr + -+ (nframes * sizeof(uint64_t)); -+ -+ if (!DTRACE_INSCRATCH(mstate, size)) { -+ /* -+ * Not enough room for our frame pointers -- need to indicate -+ * that we ran out of scratch space. -+ */ -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); -+ return; -+ } -+ -+ mstate->dtms_scratch_ptr += size; -+ saved = mstate->dtms_scratch_ptr; -+ -+ /* -+ * Now get a stack with both program counters and frame pointers. -+ */ -+ dtrace_getufpstack(buf, fps, nframes + 2); -+ -+ /* -+ * If that faulted, we're cooked. -+ */ -+ if (*flags & CPU_DTRACE_FAULT) -+ goto out; -+ -+ /* -+ * Now we want to walk up the stack, calling the USTACK helper. For -+ * each iteration, we restore the scratch pointer. -+ */ -+ for (i = 0; i < nframes; i++) { -+ mstate->dtms_scratch_ptr = saved; -+ -+ if (offs >= strsize) -+ break; -+ -+ sym = (char *)(uintptr_t)dtrace_helper( -+ DTRACE_HELPER_ACTION_USTACK, -+ mstate, state, pcs[i], fps[i]); -+ -+ /* -+ * If we faulted while running the helper, we're going to -+ * clear the fault and null out the corresponding string. -+ */ -+ if (*flags & CPU_DTRACE_FAULT) { -+ *flags &= ~CPU_DTRACE_FAULT; -+ str[offs++] = '\0'; -+ continue; -+ } -+ -+ if (sym == NULL) { -+ str[offs++] = '\0'; -+ continue; -+ } -+ -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); -+ -+ /* -+ * Now copy in the string that the helper returned to us. -+ */ -+ for (j = 0; offs + j < strsize; j++) { -+ str[offs + j] = sym[j]; -+ if (str[offs + j] == '\0') -+ break; -+ } -+ -+ DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); -+ -+ offs += j + 1; -+ } -+ -+ /* -+ * If we didn't have room for all of the strings, we don't abort -+ * processing -- this needn't be a fatal error -- but we still want -+ * to increment a counter (dts_stkstroverflows) to allow this condition -+ * to be warned about. (If this is from a jstack() action, it is -+ * easily tuned via jstackstrsize.) -+ */ -+ if (offs >= strsize) -+ dtrace_error(&state->dts_stkstroverflows); -+ -+ while (offs < strsize) -+ str[offs++] = '\0'; -+ -+out: -+ mstate->dtms_scratch_ptr = old; -+} -+ -+/* -+ * This macro is used by dtrace_probe_pcap() below. See linux/skbuff.h for the -+ * original. Only change is we pass in an already dereferenced page.p as -+ * the fragment f. -+ */ -+#define dtrace_skb_frag_foreach_page(f, f_off, f_len, p, p_off, p_len, copied) \ -+ for (p = f + ((f_off) >> PAGE_SHIFT), \ -+ p_off = (f_off) & (PAGE_SIZE - 1), \ -+ p_len = skb_frag_must_loop(p) ? \ -+ min_t(u32, f_len, PAGE_SIZE - p_off) : f_len, \ -+ copied = 0; \ -+ copied < f_len; \ -+ copied += p_len, p++, p_off = 0, \ -+ p_len = min_t(u32, f_len - copied, PAGE_SIZE)) \ -+ -+ -+/* -+ * Capture skb data in linear and non-linear portions. Returns 0 on success, -+ * -1 if an error is encountered. -+ */ -+static __always_inline int dtrace_probe_pcap(uint64_t val, size_t *valoffs, -+ size_t size, caddr_t tomax, -+ ktime_t now, -+ struct dtrace_mstate *mstate, -+ struct dtrace_vstate *vstate, -+ volatile uint16_t *flags) -+ -+{ -+ uintptr_t start = *valoffs, end = *valoffs + size; -+ uintptr_t skb_head, skb_data, skb_tail, shinfo; -+ uint32_t skb_end, tail, skb_len = 0; -+ uintptr_t baddr = val; -+ uint8_t nr_frags, f; -+ uint32_t data_len; -+ -+ DTRACE_STORE(uint64_t, tomax, start, ktime_to_ns(now)); -+ -+ *valoffs += (2 * sizeof(uint64_t)); -+ -+ /* -+ * Skip capture of NULL skbs. -+ */ -+ if ((void *)baddr == NULL) -+ goto pcap_done; -+ -+ if (!dtrace_canload(baddr, sizeof(struct sk_buff), mstate, vstate)) -+ return -1; -+ -+ skb_data = dtrace_loadptr(baddr + offsetof(struct sk_buff, data)); -+ skb_head = dtrace_loadptr(baddr + offsetof(struct sk_buff, head)); -+ skb_len = dtrace_load32(baddr + offsetof(struct sk_buff, len)); -+ tail = dtrace_load32(baddr + offsetof(struct sk_buff, tail)); -+ skb_tail = skb_head + tail; -+ -+ if (skb_tail < skb_data) { -+ *flags |= CPU_DTRACE_BADADDR; -+ return -1; -+ } -+ while (*valoffs < end && skb_data < skb_tail) { -+ DTRACE_STORE(uint8_t, tomax, (*valoffs)++, -+ dtrace_load8(skb_data++)); -+ } -+ -+ data_len = dtrace_load32(baddr + offsetof(struct sk_buff, data_len)); -+ -+ /* -+ * If skb is linear, no need to explore fragments. -+ */ -+ if (data_len == 0) -+ goto pcap_done; -+ -+ skb_end = dtrace_load32(baddr + offsetof(struct sk_buff, end)); -+ shinfo = skb_head + skb_end; -+ -+ if (!dtrace_canload(shinfo, sizeof(struct skb_shared_info), -+ mstate, vstate)) -+ return -1; -+ -+ nr_frags = dtrace_load8(shinfo + offsetof(struct skb_shared_info, -+ nr_frags)); -+ -+ /* -+ * See skb_frag_foreach_page() macro usage elsewhere to understand the -+ * manipulations here; the reason we need this complexity is to support -+ * compound pages. -+ */ -+ for (f = 0; f < nr_frags; f++) { -+ uint32_t poff, plen, copied, flen; -+ struct page *p, *frag; -+ uintptr_t foff, v; -+ void *vaddr; -+ -+ flen = dtrace_load32(shinfo + offsetof(struct skb_shared_info, -+ frags[f].bv_len)); -+ foff = dtrace_load32(shinfo + offsetof(struct skb_shared_info, -+ frags[f].bv_offset)); -+ frag = (struct page *)dtrace_loadptr(shinfo + offsetof( -+ struct skb_shared_info, -+ frags[f].bv_page)); -+ -+ dtrace_skb_frag_foreach_page(frag, foff, flen, -+ p, poff, plen, copied) { -+ if (data_len == 0) -+ break; -+ -+ vaddr = kmap_atomic(p); -+ v = (uintptr_t)vaddr + poff; -+ if (!dtrace_canload(v, plen, mstate, vstate)) { -+ kunmap_atomic(vaddr); -+ return -1; -+ } -+ while (*valoffs < end && data_len-- > 0) { -+ DTRACE_STORE(uint8_t, tomax, (*valoffs)++, -+ dtrace_load8(v++)); -+ } -+ kunmap_atomic(vaddr); -+ } -+ } -+ -+pcap_done: -+ /* -+ * Note that we store the skb len here rather than the portion of it we -+ * capture; we can determine the latter when collecting data by using -+ * the "pcapsize" option. Packet capture headers specify a packet size -+ * and a capture size, so we want to be able to provide both. Since -+ * the capture size can be determined from the packet length when -+ * consuming records, we don't need to store it. -+ */ -+ DTRACE_STORE(uint64_t, tomax, start + sizeof(uint64_t), -+ (uint64_t)skb_len); -+ -+ return 0; -+} -+void dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, -+ uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, -+ uintptr_t arg5, uintptr_t arg6) -+{ -+ processorid_t cpuid; -+ dtrace_icookie_t cookie; -+ struct dtrace_probe *probe; -+ struct dtrace_mstate mstate; -+ struct dtrace_ecb *ecb; -+ struct dtrace_action *act; -+ intptr_t offs; -+ size_t size; -+ int onintr; -+ int vtime; -+ volatile uint16_t *flags; -+ ktime_t now; -+ uint32_t re_entry; -+ struct dtrace_task *dtsk = current->dt_task; -+ dtrace_id_t old_id; -+ -+#ifdef FIXME -+ /* -+ * Kick out immediately if this CPU is still being born (in which case -+ * curthread will be set to -1) or the current thread can't allow -+ * probes in its current context. -+ */ -+ if (((uintptr_t)curthread & 1) || (curthread->t_flag & T_DONTDTRACE)) -+ return; -+#endif -+ -+ DTRACE_SYNC_ENTER_CRITICAL(cookie, re_entry); -+ -+ /* -+ * Probe context is not re-entrant, unless we're getting called to -+ * process an ERROR probe. -+ */ -+ flags = (volatile uint16_t *)&this_cpu_core->cpuc_dtrace_flags; -+ cpuid = smp_processor_id(); -+ if (re_entry && id != dtrace_probeid_error) { -+ dt_dbg_probe("Attempt to fire probe from within a probe " \ -+ "(ID %d, oID %d, CPU %d)\n", id, -+ (int)this_cpu_core->cpuc_current_probe, cpuid); -+ DTRACE_SYNC_EXIT_CRITICAL(cookie, re_entry); -+ return; -+ } -+ -+ probe = dtrace_probe_lookup_id(id); -+ onintr = in_interrupt(); -+ -+ if (!onintr && probe->dtpr_predcache != DTRACE_CACHEIDNONE && -+ dtsk != NULL && probe->dtpr_predcache == dtsk->dt_predcache) { -+ DTRACE_SYNC_EXIT_CRITICAL(cookie, re_entry); -+ return; -+ } -+ -+ if (oops_in_progress) { -+ DTRACE_SYNC_EXIT_CRITICAL(cookie, re_entry); -+ return; -+ } -+ -+ old_id = this_cpu_core->cpuc_current_probe; -+ this_cpu_core->cpuc_current_probe = id; -+ -+ now = dtrace_gethrtime(); -+ vtime = (dtrace_vtime_references > 0); -+ -+ if (vtime && dtsk != NULL && ktime_nz(dtsk->dt_start)) -+ dtsk->dt_vtime = ktime_add(dtsk->dt_vtime, -+ ktime_sub(now, dtsk->dt_start)); -+ -+ mstate.dtms_difo = NULL; -+ mstate.dtms_probe = probe; -+ mstate.dtms_strtok = (uintptr_t)NULL; -+ mstate.dtms_arg[0] = arg0; -+ mstate.dtms_arg[1] = arg1; -+ mstate.dtms_arg[2] = arg2; -+ mstate.dtms_arg[3] = arg3; -+ mstate.dtms_arg[4] = arg4; -+ mstate.dtms_arg[5] = arg5; -+ mstate.dtms_arg[6] = arg6; -+ -+ for (ecb = probe->dtpr_ecb; ecb != NULL; ecb = ecb->dte_next) { -+ struct dtrace_predicate *pred = ecb->dte_predicate; -+ struct dtrace_state *state = ecb->dte_state; -+ struct dtrace_buffer *buf = &state->dts_buffer[cpuid]; -+ struct dtrace_buffer *aggbuf = &state->dts_aggbuffer[cpuid]; -+ struct dtrace_vstate *vstate = &state->dts_vstate; -+ struct dtrace_provider *prov = probe->dtpr_provider; -+ int committed = 0; -+ caddr_t tomax; -+ -+ /* -+ * A little subtlety with the following (seemingly innocuous) -+ * declaration of the automatic 'val': by looking at the -+ * code, you might think that it could be declared in the -+ * action processing loop, below. (That is, it's only used in -+ * the action processing loop.) However, it must be declared -+ * out of that scope because in the case of DIF expression -+ * arguments to aggregating actions, one iteration of the -+ * action loop will use the last iteration's value. -+ */ -+ uint64_t val = 0; -+ -+ mstate.dtms_present = DTRACE_MSTATE_ARGS | DTRACE_MSTATE_PROBE; -+ *flags &= ~CPU_DTRACE_ERROR; -+ -+ if (prov == dtrace_provider) { -+ /* -+ * If dtrace itself is the provider of this probe, -+ * we're only going to continue processing the ECB if -+ * arg0 (the dtrace_state_t) is equal to the ECB's -+ * creating state. (This prevents disjoint consumers -+ * from seeing one another's metaprobes.) -+ */ -+ if (arg0 != (uint64_t)(uintptr_t)state) -+ continue; -+ } -+ -+ if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE) { -+ /* -+ * We're not currently active. If our provider isn't -+ * the dtrace pseudo provider, we're not interested. -+ */ -+ if (prov != dtrace_provider) -+ continue; -+ -+ /* -+ * Now we must further check if we are in the BEGIN -+ * probe. If we are, we will only continue orocessing -+ * if we're still in WARMUP -- if one BEGIN enabling -+ * has invoked the exit() action, we don't want to -+ * evaluate subsequent BEGIN enablings. -+ */ -+ if (probe->dtpr_id == dtrace_probeid_begin && -+ state->dts_activity != DTRACE_ACTIVITY_WARMUP) { -+ ASSERT(state->dts_activity == -+ DTRACE_ACTIVITY_DRAINING); -+ continue; -+ } -+ } -+ -+ dt_dbg_probe("Probe (ID %d EPID %d) on CPU %d...\n", -+ id, ecb->dte_epid, cpuid); -+ if (ecb->dte_cond) { -+ /* -+ * If the dte_cond bits indicate that this -+ * consumer is only allowed to see user-mode firings -+ * of this probe, call the provider's dtps_usermode() -+ * entry point to check that the probe was fired -+ * while in a user context. Skip this ECB if that's -+ * not the case. -+ */ -+ if ((ecb->dte_cond & DTRACE_COND_USERMODE) && -+ prov->dtpv_pops.dtps_usermode( -+ prov->dtpv_arg, probe->dtpr_id, probe->dtpr_arg -+ ) == 0) { -+ dt_dbg_probe("Probe (ID %d EPID %d) Skipped\n", -+ id, ecb->dte_epid); -+ continue; -+ } -+ -+ /* -+ * This is more subtle than it looks. We have to be -+ * absolutely certain that current_cred() isn't going -+ * to change out from under us so it's only legit to -+ * examine that structure if we're in constrained -+ * situations. Currently, the only times we'll use this -+ * check is if a non-super-user has enabled the -+ * profile or syscall providers -- providers that -+ * allow visibility of all processes. For the -+ * profile case, the check above will ensure that -+ * we're examining a user context. -+ */ -+ if (ecb->dte_cond & DTRACE_COND_OWNER) { -+ const struct cred *cr; -+ const struct cred *s_cr = -+ ecb->dte_state->dts_cred.dcr_cred; -+ -+ ASSERT(s_cr != NULL); -+ -+ cr = current_cred(); -+ if (cr == NULL || -+ !uid_eq(s_cr->euid, cr->euid) || -+ !uid_eq(s_cr->euid, cr->uid) || -+ !uid_eq(s_cr->euid, cr->suid) || -+ !gid_eq(s_cr->egid, cr->egid) || -+ !gid_eq(s_cr->egid, cr->gid) || -+ !gid_eq(s_cr->egid, cr->sgid)) { -+ dt_dbg_probe("Probe (ID %d EPID %d) " -+ "Skipped\n", -+ id, ecb->dte_epid); -+ continue; -+ } -+ } -+ } -+ -+ if (ktime_gt(ktime_sub(now, state->dts_alive), -+ dtrace_deadman_timeout)) { -+ /* -+ * We seem to be dead. Unless we (a) have kernel -+ * destructive permissions (b) have expicitly enabled -+ * destructive actions and (c) destructive actions have -+ * not been disabled, we're going to transition into -+ * the KILLED state, from which no further processing -+ * on this state will be performed. -+ */ -+ if (!dtrace_priv_kernel_destructive(state) || -+ !state->dts_cred.dcr_destructive || -+ dtrace_destructive_disallow) { -+ enum dtrace_activity *activity = -+ &state->dts_activity; -+ enum dtrace_activity curr; -+ -+ do { -+ curr = state->dts_activity; -+ } while (cmpxchg(activity, curr, -+ DTRACE_ACTIVITY_KILLED) != curr); -+ -+ dt_dbg_probe("Probe (ID %d EPID %d) Skipped\n", -+ id, ecb->dte_epid); -+ continue; -+ } -+ } -+ -+ offs = dtrace_buffer_reserve(buf, ecb->dte_needed, -+ ecb->dte_alignment, state, -+ &mstate); -+ if (offs < 0) { -+ dt_dbg_probe("Probe (ID %d EPID %d) Skipped\n", -+ id, ecb->dte_epid); -+ continue; -+ } -+ -+ tomax = buf->dtb_tomax; -+ ASSERT(tomax != NULL); -+ -+ if (ecb->dte_size != 0) { -+ DTRACE_STORE(uint32_t, tomax, offs, ecb->dte_epid); -+ dt_dbg_buf(" Store: %p[%ld .. %ld] <- %d [EPID] " -+ "(from %s::%d)\n", -+ buf, offs, offs + sizeof(uint32_t) - 1, -+ ecb->dte_epid, __func__, __LINE__); -+ } -+ -+ mstate.dtms_epid = ecb->dte_epid; -+ mstate.dtms_present |= DTRACE_MSTATE_EPID; -+ -+ if (state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) -+ mstate.dtms_access = DTRACE_ACCESS_KERNEL; -+ else -+ mstate.dtms_access = 0; -+ -+ if (pred != NULL) { -+ struct dtrace_difo *dp = pred->dtp_difo; -+ int rval; -+ -+ dt_dbg_probe(" Evaluating predicate...\n"); -+ -+ rval = dtrace_dif_emulate(dp, &mstate, vstate, state); -+ -+ if (!(*flags & CPU_DTRACE_ERROR) && !rval) { -+ dtrace_cacheid_t cid = -+ probe->dtpr_predcache; -+ -+ if (cid != DTRACE_CACHEIDNONE && !onintr) { -+ /* -+ * Update the predicate cache... -+ */ -+ ASSERT(cid == pred->dtp_cacheid); -+ if (dtsk != NULL) -+ dtsk->dt_predcache = cid; -+ } -+ -+ dt_dbg_probe(" Predicate not met (%d)\n", -+ rval); -+ dt_dbg_probe("Probe (ID %d EPID %d) Done\n", -+ id, ecb->dte_epid); -+ continue; -+ } -+ -+ dt_dbg_probe(" Predicate met (%d)\n", rval); -+ } -+ -+ for (act = ecb->dte_action; -+ !(*flags & CPU_DTRACE_ERROR) && act != NULL; -+ act = act->dta_next) { -+ size_t valoffs; -+ struct dtrace_difo *dp; -+ struct dtrace_recdesc *rec = &act->dta_rec; -+ -+ dt_dbg_probe(" Evaluating action %p (kind %d)...\n", -+ act, act->dta_kind); -+ -+ size = rec->dtrd_size; -+ valoffs = offs + rec->dtrd_offset; -+ -+ if (DTRACEACT_ISAGG(act->dta_kind)) { -+ uint64_t v = 0xbad; -+ struct dtrace_aggregation *agg; -+ -+ agg = (struct dtrace_aggregation *)act; -+ -+ dp = act->dta_difo; -+ if (dp != NULL) -+ v = dtrace_dif_emulate(dp, &mstate, -+ vstate, state); -+ -+ if (*flags & CPU_DTRACE_ERROR) -+ continue; -+ -+ /* -+ * Note that we always pass the expression -+ * value from the previous iteration of the -+ * action loop. This value will only be used -+ * if there is an expression argument to the -+ * aggregating action, denoted by the -+ * dtag_hasarg field. -+ */ -+ dtrace_aggregate(agg, buf, offs, aggbuf, v, -+ val); -+ continue; -+ } -+ -+ switch (act->dta_kind) { -+ case DTRACEACT_STOP: -+ if (dtrace_priv_proc_destructive(state)) -+ dtrace_action_stop(); -+ continue; -+ -+ case DTRACEACT_BREAKPOINT: -+ if (dtrace_priv_kernel_destructive(state)) -+ dtrace_action_breakpoint(ecb); -+ continue; -+ -+ case DTRACEACT_PANIC: -+ if (dtrace_priv_kernel_destructive(state)) -+ dtrace_action_panic(ecb); -+ continue; -+ -+ case DTRACEACT_STACK: -+ if (!dtrace_priv_kernel(state)) -+ continue; -+ -+ dtrace_getpcstack( -+ (uint64_t *)(tomax + valoffs), -+ size / sizeof(pc_t), -+ probe->dtpr_aframes + 1, -+ DTRACE_ANCHORED(probe) -+ ? NULL -+ : (uint32_t *)arg0); -+ -+ continue; -+ -+ case DTRACEACT_JSTACK: -+ case DTRACEACT_USTACK: -+ if (!dtrace_priv_proc(state)) -+ continue; -+ -+ /* -+ * See comment in DIF_VAR_PID. -+ */ -+ if (DTRACE_ANCHORED(mstate.dtms_probe) && -+ in_interrupt()) { -+ int depth = DTRACE_USTACK_NFRAMES( -+ rec->dtrd_arg) + 2; -+ -+ dtrace_bzero((void *)(tomax + valoffs), -+ DTRACE_USTACK_STRSIZE( -+ rec->dtrd_arg) + -+ depth * sizeof(uint64_t)); -+ -+ continue; -+ } -+ -+ if (DTRACE_USTACK_STRSIZE(rec->dtrd_arg) != 0 && -+ dtsk != NULL && dtsk->dt_helpers != NULL) { -+ /* -+ * This is the slow path -- we have -+ * allocated string space, and we're -+ * getting the stack of a process that -+ * has helpers. Call into a separate -+ * routine to perform this processing. -+ */ -+ dtrace_action_ustack( -+ &mstate, state, -+ (uint64_t *)(tomax + valoffs), -+ rec->dtrd_arg); -+ continue; -+ } -+ -+ dtrace_getupcstack( -+ (uint64_t *)(tomax + valoffs), -+ DTRACE_USTACK_NFRAMES(rec->dtrd_arg) + -+ 2); -+ continue; -+ -+ default: -+ break; -+ } -+ -+ dp = act->dta_difo; -+ ASSERT(dp != NULL); -+ -+ val = dtrace_dif_emulate(dp, &mstate, vstate, state); -+ -+ if (*flags & CPU_DTRACE_ERROR) -+ continue; -+ -+ switch (act->dta_kind) { -+ case DTRACEACT_SPECULATE: -+ ASSERT(buf == &state->dts_buffer[cpuid]); -+ buf = dtrace_speculation_buffer(state, cpuid, -+ val); -+ -+ if (buf == NULL) { -+ *flags |= CPU_DTRACE_DROP; -+ continue; -+ } -+ -+ offs = dtrace_buffer_reserve(buf, -+ ecb->dte_needed, -+ ecb->dte_alignment, -+ state, NULL); -+ -+ if (offs < 0) { -+ *flags |= CPU_DTRACE_DROP; -+ continue; -+ } -+ -+ tomax = buf->dtb_tomax; -+ ASSERT(tomax != NULL); -+ -+ if (ecb->dte_size != 0) { -+ DTRACE_STORE(uint32_t, tomax, offs, -+ ecb->dte_epid); -+ dt_dbg_buf(" Store: %p[%ld .. %ld] " -+ "<- %d [EPID] " -+ "(from %s::%d)\n", -+ buf, offs, -+ offs + sizeof(uint32_t) - 1, -+ ecb->dte_epid, -+ __FUNCTION__, __LINE__); -+ } -+ -+ continue; -+ -+ case DTRACEACT_CHILL: -+ if (dtrace_priv_kernel_destructive(state)) -+ dtrace_action_chill(&mstate, -+ ns_to_ktime(val)); -+ -+ continue; -+ -+ case DTRACEACT_RAISE: -+ if (dtrace_priv_proc_destructive(state)) -+ dtrace_action_raise(val); -+ -+ continue; -+ -+ case DTRACEACT_COMMIT: -+ ASSERT(!committed); -+ -+ /* -+ * We need to commit our buffer state. -+ */ -+ if (ecb->dte_size) { -+ buf->dtb_offset = offs + ecb->dte_size; -+ dt_dbg_buf(" Consume: %p[%ld .. " -+ "%lld]\n", -+ buf, offs, -+ buf->dtb_offset - 1); -+ } -+ -+ buf = &state->dts_buffer[cpuid]; -+ dtrace_speculation_commit(state, cpuid, val); -+ committed = 1; -+ continue; -+ -+ case DTRACEACT_DISCARD: -+ dtrace_speculation_discard(state, cpuid, val); -+ continue; -+ -+ case DTRACEACT_DIFEXPR: -+ case DTRACEACT_LIBACT: -+ case DTRACEACT_PRINTF: -+ case DTRACEACT_PRINTA: -+ case DTRACEACT_SYSTEM: -+ case DTRACEACT_FREOPEN: -+ case DTRACEACT_TRACEMEM: -+ case DTRACEACT_PCAP: -+ break; -+ -+ case DTRACEACT_SYM: -+ case DTRACEACT_MOD: -+ if (!dtrace_priv_kernel(state)) -+ continue; -+ break; -+ -+ case DTRACEACT_USYM: -+ case DTRACEACT_UMOD: -+ case DTRACEACT_UADDR: { -+ pid_t pid = current->pid; -+ pid_t tgid = current->tgid; -+ -+ if (!dtrace_priv_proc(state)) -+ continue; -+ -+ DTRACE_STORE(uint64_t, tomax, valoffs, -+ (uint64_t)pid); -+ dt_dbg_buf(" Store: %p[%ld .. %ld] <- %lld " -+ "[PID] (from %s::%d)\n", -+ buf, valoffs, -+ valoffs + sizeof(uint64_t) - 1, -+ (uint64_t)pid, -+ __FUNCTION__, __LINE__); -+ DTRACE_STORE(uint64_t, tomax, -+ valoffs + sizeof(uint64_t), -+ (uint64_t)tgid); -+ dt_dbg_buf(" Store: %p[%ld .. %ld] <- %lld " -+ "[TGID] (from %s::%d)\n", -+ buf, valoffs + sizeof(uint64_t), -+ valoffs + 2 * sizeof(uint64_t) - 1, -+ (uint64_t)tgid, -+ __FUNCTION__, __LINE__); -+ DTRACE_STORE(uint64_t, tomax, -+ valoffs + 2 * sizeof(uint64_t), -+ val); -+ dt_dbg_buf(" Store: %p[%ld .. %ld] <- %lld " -+ "(from %s::%d)\n", -+ buf, valoffs + 2 * sizeof(uint64_t), -+ valoffs + 3 * sizeof(uint64_t) - 1, -+ val, __FUNCTION__, __LINE__); -+ -+ continue; -+ } -+ -+ case DTRACEACT_EXIT: { -+ /* -+ * For the exit action, we are going to attempt -+ * to atomically set our activity to be -+ * draining. If this fails (either because -+ * another CPU has beat us to the exit action, -+ * or because our current activity is something -+ * other than ACTIVE or WARMUP), we will -+ * continue. This assures that the exit action -+ * can be successfully recorded at most once -+ * when we're in the ACTIVE state. If we're -+ * encountering the exit() action while in -+ * COOLDOWN, however, we want to honor the new -+ * status code. (We know that we're the only -+ * thread in COOLDOWN, so there is no race.) -+ */ -+ enum dtrace_activity *activity = -+ &state->dts_activity; -+ enum dtrace_activity curr = -+ state->dts_activity; -+ -+ if (curr == DTRACE_ACTIVITY_COOLDOWN) -+ break; -+ -+ if (curr != DTRACE_ACTIVITY_WARMUP) -+ curr = DTRACE_ACTIVITY_ACTIVE; -+ -+ if (cmpxchg(activity, curr, -+ DTRACE_ACTIVITY_DRAINING) != curr) { -+ *flags |= CPU_DTRACE_DROP; -+ continue; -+ } -+ -+ break; -+ } -+ -+ default: -+ ASSERT(0); -+ } -+ -+ if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF) { -+ uintptr_t end = valoffs + size; -+ -+ if (!dtrace_vcanload((void *)(uintptr_t)val, -+ &dp->dtdo_rtype, &mstate, -+ vstate)) -+ continue; -+ -+ if (act->dta_kind == DTRACEACT_PCAP) { -+ if (dtrace_probe_pcap(val, &valoffs, -+ size, tomax, now, -+ &mstate, vstate, -+ flags) == -1) -+ break; -+ continue; -+ } -+ -+ /* -+ * If this is a string, we're going to only -+ * load until we find the zero byte -- after -+ * which we'll store zero bytes. -+ */ -+ if (dp->dtdo_rtype.dtdt_kind == -+ DIF_TYPE_STRING) { -+ char c = '\0' + 1; -+ int intuple = act->dta_intuple; -+ size_t s; -+ -+ for (s = 0; s < size; s++) { -+ if (c != '\0') -+ c = dtrace_load8(val++); -+ -+ DTRACE_STORE(uint8_t, tomax, -+ valoffs++, c); -+ dt_dbg_buf(" Store: %p[%ld]" -+ " <- %d (from " -+ "%s::%d)\n", -+ buf, valoffs, c, -+ __FUNCTION__, -+ __LINE__); -+ -+ if (c == '\0' && intuple) -+ break; -+ } -+ -+ continue; -+ } -+ -+ while (valoffs < end) { -+ DTRACE_STORE(uint8_t, tomax, valoffs++, -+ dtrace_load8(val++)); -+ dt_dbg_buf(" Store: %p[%ld] <- ??? " -+ "(from %s::%d)\n", -+ buf, valoffs, -+ __FUNCTION__, __LINE__); -+ } -+ -+ continue; -+ } -+ -+ switch (size) { -+ case 0: -+ break; -+ case sizeof(uint8_t): -+ DTRACE_STORE(uint8_t, tomax, valoffs, val); -+ dt_dbg_buf(" Store: %p[%ld] <- %d " -+ "(from %s::%d)\n", -+ buf, valoffs, (uint8_t)val, -+ __FUNCTION__, __LINE__); -+ break; -+ case sizeof(uint16_t): -+ DTRACE_STORE(uint16_t, tomax, valoffs, val); -+ dt_dbg_buf(" Store: %p[%ld .. %ld] <- %d " -+ "(from %s::%d)\n", -+ buf, valoffs, -+ valoffs + sizeof(uint16_t) - 1, -+ (uint16_t)val, -+ __FUNCTION__, __LINE__); -+ break; -+ case sizeof(uint32_t): -+ DTRACE_STORE(uint32_t, tomax, valoffs, val); -+ dt_dbg_buf(" Store: %p[%ld] <- %d " -+ "(from %s::%d)\n", -+ buf, valoffs, -+ (uint32_t)val, -+ __FUNCTION__, __LINE__); -+ break; -+ case sizeof(uint64_t): -+ DTRACE_STORE(uint64_t, tomax, valoffs, val); -+ dt_dbg_buf(" Store: %p[%ld] <- %lld " -+ "(from %s::%d)\n", -+ buf, valoffs, -+ val, -+ __FUNCTION__, __LINE__); -+ break; -+ default: -+ /* -+ * Any other size should have been returned by -+ * reference, not by value. -+ */ -+ ASSERT(0); -+ break; -+ } -+ } -+ -+ if (*flags & CPU_DTRACE_DROP) { -+ dt_dbg_probe(" -> Dropped\n"); -+ continue; -+ } -+ -+ if (*flags & CPU_DTRACE_FAULT) { -+ int ndx; -+ struct dtrace_action *err; -+ -+ dt_dbg_probe(" -> Failed (%x)\n", *flags); -+ -+ buf->dtb_errors++; -+ -+ if (probe->dtpr_id == dtrace_probeid_error) { -+ /* -+ * There's nothing we can do -- we had an -+ * error on the error probe. We bump an -+ * error counter to at least indicate that -+ * this condition happened. -+ */ -+ dtrace_error(&state->dts_dblerrors); -+ continue; -+ } -+ -+ if (vtime && dtsk != NULL) -+ /* -+ * Before recursing on dtrace_probe(), we -+ * need to explicitly clear out our start -+ * time to prevent it from being accumulated -+ * into the dtrace_vtime. -+ */ -+ dtsk->dt_start = ktime_set(0, 0); -+ -+ /* -+ * Iterate over the actions to figure out which action -+ * we were processing when we experienced the error. -+ * Note that act points _past_ the faulting action; if -+ * act is ecb->dte_action, the fault was in the -+ * predicate, if it's ecb->dte_action->dta_next it's -+ * in action #1, and so on. -+ */ -+ for (err = ecb->dte_action, ndx = 0; -+ err != act; err = err->dta_next, ndx++) -+ continue; -+ -+ dtrace_probe_error( -+ state, ecb->dte_epid, ndx, -+ (mstate.dtms_present & DTRACE_MSTATE_FLTOFFS) -+ ? mstate.dtms_fltoffs -+ : -1, -+ DTRACE_FLAGS2FLT(*flags), -+ this_cpu_core->cpuc_dtrace_illval); -+ -+ continue; -+ } -+ -+ if (!committed) { -+ buf->dtb_offset = offs + ecb->dte_size; -+ dt_dbg_buf(" Consume: %p[%ld .. %lld]\n", -+ buf, offs, buf->dtb_offset); -+ } -+ -+ dt_dbg_probe("Probe (ID %d EPID %d) Done\n", -+ id, ecb->dte_epid); -+ } -+ -+ if (vtime && dtsk != NULL) -+ dtsk->dt_start = dtrace_gethrtime(); -+ -+ this_cpu_core->cpuc_current_probe = old_id; -+ DTRACE_SYNC_EXIT_CRITICAL(cookie, re_entry); -+ -+ if (dtsk != NULL && dtsk->dt_sig != 0) { -+ int sig = dtsk->dt_sig; -+ -+ dtsk->dt_sig = 0; -+ -+ send_sig(sig, current, 0); -+ } -+} -+EXPORT_SYMBOL(dtrace_probe); -+ -+int dtrace_probe_init(void) -+{ -+ dtrace_id_t id; -+ -+ dtrace_probe_cachep = KMEM_CACHE(dtrace_probe, SLAB_HWCACHE_ALIGN); -+ if (dtrace_probe_cachep == NULL) -+ return -ENOMEM; -+ -+ idr_init(&dtrace_probe_idr); -+ -+ /* -+ * We create a ID 0 entry as a sentinel, so we can always depend on it -+ * being the very first entry. This is used in functionality that runs -+ * through the list of probes. -+ */ -+ idr_preload(GFP_KERNEL); -+ id = idr_alloc_cyclic(&dtrace_probe_idr, NULL, 0, 0, GFP_NOWAIT); -+ idr_preload_end(); -+ -+ return id == 0 ? 0 : -EAGAIN; -+} -+ -+void dtrace_probe_exit(void) -+{ -+ idr_destroy(&dtrace_probe_idr); -+ kmem_cache_destroy(dtrace_probe_cachep); -+} -+ -+void dtrace_probe_remove_id(dtrace_id_t id) -+{ -+ idr_remove(&dtrace_probe_idr, id); -+} -+ -+struct dtrace_probe *dtrace_probe_lookup_id(dtrace_id_t id) -+{ -+ return idr_find(&dtrace_probe_idr, id); -+} -+ -+static int dtrace_probe_lookup_match(struct dtrace_probe *probe, void *arg) -+{ -+ *((dtrace_id_t *)arg) = probe->dtpr_id; -+ -+ return DTRACE_MATCH_DONE; -+} -+ -+dtrace_id_t dtrace_probe_lookup(dtrace_provider_id_t prid, const char *mod, -+ const char *func, const char *name) -+{ -+ struct dtrace_probekey pkey; -+ dtrace_id_t id; -+ int match; -+ -+ pkey.dtpk_prov = ((struct dtrace_provider *)prid)->dtpv_name; -+ pkey.dtpk_pmatch = &dtrace_match_string; -+ pkey.dtpk_mod = mod; -+ pkey.dtpk_mmatch = mod ? &dtrace_match_string : &dtrace_match_nul; -+ pkey.dtpk_func = func; -+ pkey.dtpk_fmatch = func ? &dtrace_match_string : &dtrace_match_nul; -+ pkey.dtpk_name = name; -+ pkey.dtpk_nmatch = name ? &dtrace_match_string : &dtrace_match_nul; -+ pkey.dtpk_id = DTRACE_IDNONE; -+ -+ mutex_lock(&dtrace_lock); -+ match = dtrace_match(&pkey, DTRACE_PRIV_ALL, -+ make_kuid(init_user_namespace, 0), -+ dtrace_probe_lookup_match, &id); -+ mutex_unlock(&dtrace_lock); -+ -+ ASSERT(match == 1 || match == 0); -+ -+ return match ? id : 0; -+} -+EXPORT_SYMBOL(dtrace_probe_lookup); -+ -+struct dtrace_probe *dtrace_probe_get_next(dtrace_id_t *idp) -+{ -+ return idr_get_next(&dtrace_probe_idr, idp); -+} -+ -+int dtrace_probe_for_each(int (*fn)(int id, void *p, void *data), void *data) -+{ -+ return idr_for_each(&dtrace_probe_idr, fn, data); -+} -diff --git a/dtrace/dtrace_probe_ctx.c b/dtrace/dtrace_probe_ctx.c -new file mode 100644 -index 000000000000..f04b5b269222 ---- /dev/null -+++ b/dtrace/dtrace_probe_ctx.c -@@ -0,0 +1,659 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_probe_ctx.c -+ * DESCRIPTION: DTrace - probe context safe functions -+ * -+ * Copyright (c) 2010, 2017, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/dtrace_cpu.h> -+ -+#include "dtrace.h" -+ -+void dtrace_panic(const char *fmt, ...) -+{ -+ va_list alist; -+ -+ va_start(alist, fmt); -+ vprintk(fmt, alist); -+ va_end(alist); -+ -+ BUG(); -+} -+EXPORT_SYMBOL(dtrace_panic); -+ -+int dtrace_assfail(const char *a, const char *f, int l) -+{ -+ dtrace_panic(KERN_EMERG "assertion failed: %s, file: %s, line: %d", -+ a, f, l); -+ -+ /* -+ * FIXME: We can do better than this. The OpenSolaris DTrace source -+ * states that this cannot be optimized away. -+ */ -+ return a[(uintptr_t)f]; -+} -+EXPORT_SYMBOL(dtrace_assfail); -+ -+#define DT_MASK_LO 0x00000000FFFFFFFFULL -+ -+static void dtrace_add_128(uint64_t *addend1, uint64_t *addend2, uint64_t *sum) -+{ -+ uint64_t result[2]; -+ -+ result[0] = addend1[0] + addend2[0]; -+ result[1] = addend1[1] + addend2[1] + -+ (result[0] < addend1[0] || result[0] < addend2[0] ? 1 : 0); -+ -+ sum[0] = result[0]; -+ sum[1] = result[1]; -+} -+ -+static void dtrace_shift_128(uint64_t *a, int b) -+{ -+ uint64_t mask; -+ -+ if (b == 0) -+ return; -+ -+ if (b < 0) { -+ b = -b; -+ -+ if (b >= 64) { -+ a[0] = a[1] >> (b - 64); -+ a[1] = 0; -+ } else { -+ a[0] >>= b; -+ mask = 1LL << (64 - b); -+ mask -= 1; -+ a[0] |= ((a[1] & mask) << (64 - b)); -+ a[1] >>= b; -+ } -+ } else { -+ if (b >= 64) { -+ a[1] = a[0] << (b - 64); -+ a[0] = 0; -+ } else { -+ a[1] <<= b; -+ mask = a[0] >> (64 - b); -+ a[1] |= mask; -+ a[0] <<= b; -+ } -+ } -+} -+ -+static void dtrace_multiply_128(uint64_t factor1, uint64_t factor2, -+ uint64_t *product) -+{ -+ uint64_t hi1, hi2, lo1, lo2; -+ uint64_t tmp[2]; -+ -+ hi1 = factor1 >> 32; -+ hi2 = factor2 >> 32; -+ -+ lo1 = factor1 & DT_MASK_LO; -+ lo2 = factor2 & DT_MASK_LO; -+ -+ product[0] = lo1 * lo2; -+ product[1] = hi1 * hi2; -+ -+ tmp[0] = hi1 * lo2; -+ tmp[1] = 0; -+ dtrace_shift_128(tmp, 32); -+ dtrace_add_128(product, tmp, product); -+ -+ tmp[0] = hi2 * lo1; -+ tmp[1] = 0; -+ dtrace_shift_128(tmp, 32); -+ dtrace_add_128(product, tmp, product); -+} -+ -+void dtrace_aggregate_min(uint64_t *oval, uint64_t nval, uint64_t arg) -+{ -+ if ((int64_t)nval < (int64_t)*oval) -+ *oval = nval; -+} -+ -+void dtrace_aggregate_max(uint64_t *oval, uint64_t nval, uint64_t arg) -+{ -+ if ((int64_t)nval > (int64_t)*oval) -+ *oval = nval; -+} -+ -+void dtrace_aggregate_quantize(uint64_t *quanta, uint64_t nval, uint64_t incr) -+{ -+ int i, zero = DTRACE_QUANTIZE_ZEROBUCKET; -+ int64_t val = (int64_t)nval; -+ -+ if (val < 0) { -+ for (i = 0; i < zero; i++) { -+ if (val <= DTRACE_QUANTIZE_BUCKETVAL(i)) { -+ quanta[i] += incr; -+ -+ return; -+ } -+ } -+ } else { -+ for (i = zero + 1; i < DTRACE_QUANTIZE_NBUCKETS; i++) { -+ if (val < DTRACE_QUANTIZE_BUCKETVAL(i)) { -+ quanta[i - 1] += incr; -+ -+ return; -+ } -+ } -+ -+ quanta[DTRACE_QUANTIZE_NBUCKETS - 1] += incr; -+ -+ return; -+ } -+ -+ ASSERT(0); -+} -+ -+void dtrace_aggregate_lquantize(uint64_t *lquanta, uint64_t nval, -+ uint64_t incr) -+{ -+ uint64_t arg = *lquanta++; -+ int32_t base = DTRACE_LQUANTIZE_BASE(arg); -+ uint16_t step = DTRACE_LQUANTIZE_STEP(arg); -+ uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg); -+ int64_t val = (int64_t)nval, level; -+ -+ ASSERT(step != 0); -+ ASSERT(levels != 0); -+ -+ if (val < base) { -+ lquanta[0] += incr; -+ -+ return; -+ } -+ -+ level = (val - base) / step; -+ -+ if (level < levels) { -+ lquanta[level + 1] += incr; -+ -+ return; -+ } -+ -+ lquanta[levels + 1] += incr; -+} -+ -+static uint64_t dtrace_pow(uint64_t base, uint64_t exp) -+{ -+ uint64_t p, r; -+ -+ p = base; -+ r = 1; -+ while (exp > 0) { -+ if (exp & 1) -+ r *= p; -+ -+ p *= p; -+ exp >>= 1; -+ } -+ -+ return (r); -+} -+ -+void dtrace_aggregate_llquantize(uint64_t *llquanta, uint64_t nval, -+ uint64_t incr) -+{ -+ uint64_t arg = *llquanta++; -+ int factor = DTRACE_LLQUANTIZE_FACTOR(arg); -+ int lmag = DTRACE_LLQUANTIZE_LMAG(arg); -+ int hmag = DTRACE_LLQUANTIZE_HMAG(arg); -+ int steps = DTRACE_LLQUANTIZE_STEPS(arg); -+ int i, signbit, steps_factor, mag, underflow_bin; -+ uint64_t val, bucket_max; -+ -+ ASSERT(steps != 0); -+ ASSERT(factor > 1); -+ -+ if (nval >> (64 - 1)) { -+ signbit = -1; -+ val = 1 + ~nval; -+ } else { -+ signbit = +1; -+ val = nval; -+ } -+ -+ /* -+ * Compute steps/factor. -+ * Notice that while we say there are "steps" bins per logarithmic -+ * range, steps/factor of them actually overlap with lower ranges. -+ * E.g., if factor=10 and steps=20, for mag=2 we have the 20 bins -+ * 0 50 100 150 200 250 300 350 ... 800 850 900 950 -+ * but the first two actually belong to lower ranges. -+ */ -+ steps_factor = steps/factor; -+ -+ /* the underflow bin is in the middle */ -+ underflow_bin = 1 + (hmag-lmag+1) * (steps-steps_factor); -+ -+ bucket_max = dtrace_pow(factor, lmag); -+ -+ /* check for "underflow" (smaller than the smallest bin) */ -+ if (val < bucket_max) { -+ llquanta[underflow_bin] += incr; -+ return; -+ } -+ -+ /* loop over the logarithmic ranges */ -+ i = 0; -+ for (mag = lmag; mag <= hmag; mag++) { -+ bucket_max *= factor; -+ if (val >= bucket_max) -+ continue; -+ -+ /* -+ * We want -+ * i = val * steps / bucket_max; -+ * but val*steps could overflow. An alternative is -+ * i = val / ( bucket_max/steps ) -+ * but bucket_max/steps might not divide evenly. -+ * (Plus, we end up with an extra divide.) -+ * -+ * From Solaris, we inherit constraints on factor and steps -+ * that mean bucket_max/steps divides evenly when mag>0. -+ * Meanwhile, if mag==0, val*steps cannot overflow. -+ * So between our two expressions for i, at least one -+ * will work and we just have to pick which one to use. -+ */ -+ if (mag == 0) -+ i = val * steps / bucket_max; -+ else -+ i = val / (bucket_max/steps); -+ -+ // shift for low indices that can never happen -+ i -= steps_factor; -+ break; -+ } -+ i = underflow_bin+signbit*((steps-steps_factor)*(mag-lmag)+i+1); -+ llquanta[i] += incr; -+} -+ -+void dtrace_aggregate_avg(uint64_t *data, uint64_t nval, uint64_t arg) -+{ -+ data[0]++; -+ data[1] += nval; -+} -+ -+void dtrace_aggregate_stddev(uint64_t *data, uint64_t nval, uint64_t arg) -+{ -+ int64_t snval = (int64_t)nval; -+ uint64_t tmp[2]; -+ -+ data[0]++; -+ data[1] += nval; -+ -+ if (snval < 0) -+ snval = -snval; -+ -+ dtrace_multiply_128((uint64_t)snval, (uint64_t)snval, tmp); -+ dtrace_add_128(data + 2, tmp, data + 2); -+} -+ -+void dtrace_aggregate_count(uint64_t *oval, uint64_t nval, uint64_t arg) -+{ -+ *oval = *oval + 1; -+} -+ -+void dtrace_aggregate_sum(uint64_t *oval, uint64_t nval, uint64_t arg) -+{ -+ *oval += nval; -+} -+ -+/* -+ * DTrace Aggregation Buffers -+ * -+ * Aggregation buffers use much of the same mechanism as described above -+ * ("DTrace Buffers"). However, because an aggregation is fundamentally a -+ * hash, there exists dynamic metadata associated with an aggregation buffer -+ * that is not associated with other kinds of buffers. This aggregation -+ * metadata is _only_ relevant for the in-kernel implementation of -+ * aggregations; it is not actually relevant to user-level consumers. To do -+ * this, we allocate dynamic aggregation data (hash keys and hash buckets) -+ * starting below the _limit_ of the buffer, and we allocate data from the -+ * _base_ of the buffer. When the aggregation buffer is copied out, _only_ the -+ * data is copied out; the metadata is simply discarded. Schematically, -+ * aggregation buffers look like: -+ * -+ * base of data buffer ---> +-------+------+-----------+-------+ -+ * | aggid | key | value | aggid | -+ * +-------+------+-----------+-------+ -+ * | key | -+ * +-------+-------+-----+------------+ -+ * | value | aggid | key | value | -+ * +-------+------++-----+------+-----+ -+ * | aggid | key | value | | -+ * +-------+------+-------------+ | -+ * | || | -+ * | || | -+ * | \/ | -+ * : : -+ * . . -+ * . . -+ * . . -+ * : : -+ * | /\ | -+ * | || +------------+ -+ * | || | | -+ * +---------------------+ | -+ * | hash keys | -+ * | (dtrace_aggkey structures) | -+ * | | -+ * +----------------------------------+ -+ * | hash buckets | -+ * | (dtrace_aggbuffer structure) | -+ * | | -+ * limit of data buffer ---> +----------------------------------+ -+ * -+ * As implied above, just as we assure that ECBs always store a constant -+ * amount of data, we assure that a given aggregation -- identified by its -+ * aggregation ID -- always stores data of a constant quantity and type. -+ * As with EPIDs, this allows the aggregation ID to serve as the metadata for a -+ * given record. -+ * -+ * Note that the size of the dtrace_aggkey structure must be sizeof (uintptr_t) -+ * aligned. (If this the structure changes such that this becomes false, an -+ * assertion will fail in dtrace_aggregate().) -+ */ -+#define DTRACE_AGGHASHSIZE_SLEW 17 -+ -+struct dtrace_aggkey { -+ uint32_t dtak_hashval; /* hash value */ -+ uint32_t dtak_action:4; /* action -- 4 bits */ -+ uint32_t dtak_size:28; /* size -- 28 bits */ -+ caddr_t dtak_data; /* data pointer */ -+ struct dtrace_aggkey *dtak_next; /* next in hash chain */ -+}; -+ -+struct dtrace_aggbuffer { -+ uintptr_t dtagb_hashsize; /* number of buckets */ -+ uintptr_t dtagb_free; /* free list of keys */ -+ struct dtrace_aggkey **dtagb_hash; /* hash table */ -+}; -+ -+#define DTRACEACT_ISSTRING(act) \ -+ ((act)->dta_kind == DTRACEACT_DIFEXPR && \ -+ (act)->dta_difo->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) -+ -+/* -+ * Aggregate given the tuple in the principal data buffer, and the aggregating -+ * action denoted by the specified dtrace_aggregation_t. The aggregation -+ * buffer is specified as the buf parameter. This routine does not return -+ * failure; if there is no space in the aggregation buffer, the data will be -+ * dropped, and a corresponding counter incremented. -+ */ -+void dtrace_aggregate(struct dtrace_aggregation *agg, -+ struct dtrace_buffer *dbuf, -+ intptr_t offset, struct dtrace_buffer *buf, -+ uint64_t expr, uint64_t arg) -+{ -+ struct dtrace_recdesc *rec = &agg->dtag_action.dta_rec; -+ uint32_t i, ndx, size, fsize; -+ uint32_t align = sizeof(uint64_t) - 1; -+ struct dtrace_aggbuffer *agb; -+ struct dtrace_aggkey *key; -+ uint32_t hashval = 0, limit, isstr; -+ caddr_t tomax, data, kdata; -+ dtrace_actkind_t action; -+ struct dtrace_action *act; -+ uintptr_t offs; -+ -+ if (buf == NULL) -+ return; -+ -+ if (!agg->dtag_hasarg) -+ /* -+ * Currently, only quantize(), lquantize() and llquantize() -+ * take additional arguments, and they have the same semantics: -+ * an increment value that defaults to 1 when not present. If -+ * additional aggregating actions take arguments, the setting -+ * of the default argument value will presumably have to -+ * become more sophisticated... -+ */ -+ arg = 1; -+ -+ action = agg->dtag_action.dta_kind - DTRACEACT_AGGREGATION; -+ size = rec->dtrd_offset - agg->dtag_base; -+ fsize = size + rec->dtrd_size; -+ -+ ASSERT(dbuf->dtb_tomax != NULL); -+ data = dbuf->dtb_tomax + offset + agg->dtag_base; -+ -+ tomax = buf->dtb_tomax; -+ if (tomax == NULL) { -+ dtrace_buffer_drop(buf); -+ return; -+ } -+ -+ /* -+ * The metastructure is always at the bottom of the buffer. -+ */ -+ agb = (struct dtrace_aggbuffer *)(tomax + buf->dtb_size - -+ sizeof(struct dtrace_aggbuffer)); -+ -+ if (buf->dtb_offset == 0) { -+ /* -+ * We just kludge up approximately 1/8th of the size to be -+ * buckets. If this guess ends up being routinely -+ * off-the-mark, we may need to dynamically readjust this -+ * based on past performance. -+ */ -+ uintptr_t hashsize = (buf->dtb_size >> 3) / -+ sizeof(uintptr_t); -+ -+ if ((uintptr_t)agb - hashsize * sizeof(struct dtrace_aggkey *) < -+ (uintptr_t)tomax || hashsize == 0) { -+ /* -+ * We've been given a ludicrously small buffer; -+ * increment our drop count and leave. -+ */ -+ dtrace_buffer_drop(buf); -+ return; -+ } -+ -+ /* -+ * And now, a pathetic attempt to try to get a an odd (or -+ * perchance, a prime) hash size for better hash distribution. -+ */ -+ if (hashsize > (DTRACE_AGGHASHSIZE_SLEW << 3)) -+ hashsize -= DTRACE_AGGHASHSIZE_SLEW; -+ -+ agb->dtagb_hashsize = hashsize; -+ agb->dtagb_hash = (struct dtrace_aggkey **)((uintptr_t)agb - -+ agb->dtagb_hashsize * sizeof(struct dtrace_aggkey *)); -+ agb->dtagb_free = (uintptr_t)agb->dtagb_hash; -+ -+ for (i = 0; i < agb->dtagb_hashsize; i++) -+ agb->dtagb_hash[i] = NULL; -+ } -+ -+ ASSERT(agg->dtag_first != NULL); -+ ASSERT(agg->dtag_first->dta_intuple); -+ -+ /* -+ * Calculate the hash value based on the key. Note that we _don't_ -+ * include the aggid in the hashing (but we will store it as part of -+ * the key). The hashing algorithm is Bob Jenkins' "One-at-a-time" -+ * algorithm: a simple, quick algorithm that has no known funnels, and -+ * gets good distribution in practice. The efficacy of the hashing -+ * algorithm (and a comparison with other algorithms) may be found by -+ * running the ::dtrace_aggstat MDB dcmd. -+ */ -+ for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) { -+ i = act->dta_rec.dtrd_offset - agg->dtag_base; -+ limit = i + act->dta_rec.dtrd_size; -+ ASSERT(limit <= size); -+ isstr = DTRACEACT_ISSTRING(act); -+ -+ for (; i < limit; i++) { -+ hashval += data[i]; -+ hashval += (hashval << 10); -+ hashval ^= (hashval >> 6); -+ -+ if (isstr && data[i] == '\0') -+ break; -+ } -+ } -+ -+ hashval += (hashval << 3); -+ hashval ^= (hashval >> 11); -+ hashval += (hashval << 15); -+ -+ /* -+ * Yes, the divide here is expensive -- but it's generally the least -+ * of the performance issues given the amount of data that we iterate -+ * over to compute hash values, compare data, etc. -+ */ -+ ndx = hashval % agb->dtagb_hashsize; -+ -+ for (key = agb->dtagb_hash[ndx]; key != NULL; key = key->dtak_next) { -+ ASSERT((caddr_t)key >= tomax); -+ ASSERT((caddr_t)key < tomax + buf->dtb_size); -+ -+ if (hashval != key->dtak_hashval || key->dtak_size != size) -+ continue; -+ -+ kdata = key->dtak_data; -+ ASSERT(kdata >= tomax && kdata < tomax + buf->dtb_size); -+ -+ for (act = agg->dtag_first; act->dta_intuple; -+ act = act->dta_next) { -+ i = act->dta_rec.dtrd_offset - agg->dtag_base; -+ limit = i + act->dta_rec.dtrd_size; -+ ASSERT(limit <= size); -+ isstr = DTRACEACT_ISSTRING(act); -+ -+ for (; i < limit; i++) { -+ if (kdata[i] != data[i]) -+ goto next; -+ -+ if (isstr && data[i] == '\0') -+ break; -+ } -+ } -+ -+ if (action != key->dtak_action) { -+ /* -+ * We are aggregating on the same value in the same -+ * aggregation with two different aggregating actions. -+ * (This should have been picked up in the compiler, -+ * so we may be dealing with errant or devious DIF.) -+ * This is an error condition; we indicate as much, -+ * and return. -+ */ -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); -+ return; -+ } -+ -+ /* -+ * This is a hit: we need to apply the aggregator to -+ * the value at this key. -+ */ -+ dt_dbg_agg(" Aggregate [accum]: Buf %p, offs %d, act %d, " -+ "%lld (%lld, %lld)\n", -+ buf, size, -+ agg->dtag_action.dta_kind - DTRACEACT_AGGREGATION, -+ *(uint64_t *)(kdata + size), expr, arg); -+ agg->dtag_aggregate((uint64_t *)(kdata + size), expr, arg); -+ return; -+next: -+ continue; -+ } -+ -+ /* -+ * We didn't find it. We need to allocate some zero-filled space, -+ * link it into the hash table appropriately, and apply the aggregator -+ * to the (zero-filled) value. -+ */ -+ offs = buf->dtb_offset; -+ while (offs & (align - 1)) -+ offs += sizeof(uint32_t); -+ -+ /* -+ * If we don't have enough room to both allocate a new key _and_ -+ * its associated data, increment the drop count and return. -+ */ -+ if ((uintptr_t)tomax + offs + fsize > -+ agb->dtagb_free - sizeof(struct dtrace_aggkey)) { -+ dtrace_buffer_drop(buf); -+ return; -+ } -+ -+ ASSERT(!(sizeof(struct dtrace_aggkey) & (sizeof(uintptr_t) - 1))); -+ key = (struct dtrace_aggkey *) -+ (agb->dtagb_free - sizeof(struct dtrace_aggkey)); -+ agb->dtagb_free -= sizeof(struct dtrace_aggkey); -+ -+ key->dtak_data = kdata = tomax + offs; -+ buf->dtb_offset = offs + fsize; -+ -+ /* -+ * Now copy the data across. -+ */ -+ *((dtrace_aggid_t *)kdata) = agg->dtag_id; -+ -+ for (i = sizeof(dtrace_aggid_t); i < size; i++) -+ kdata[i] = data[i]; -+ -+ /* -+ * Because strings are not zeroed out by default, we need to iterate -+ * looking for actions that store strings, and we need to explicitly -+ * pad these strings out with zeroes. -+ */ -+ for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) { -+ int nul; -+ -+ if (!DTRACEACT_ISSTRING(act)) -+ continue; -+ -+ i = act->dta_rec.dtrd_offset - agg->dtag_base; -+ limit = i + act->dta_rec.dtrd_size; -+ ASSERT(limit <= size); -+ -+ for (nul = 0; i < limit; i++) { -+ if (nul) { -+ kdata[i] = '\0'; -+ continue; -+ } -+ -+ if (data[i] != '\0') -+ continue; -+ -+ nul = 1; -+ } -+ } -+ -+ for (i = size; i < fsize; i++) -+ kdata[i] = 0; -+ -+ key->dtak_hashval = hashval; -+ key->dtak_size = size; -+ key->dtak_action = action; -+ key->dtak_next = agb->dtagb_hash[ndx]; -+ agb->dtagb_hash[ndx] = key; -+ -+ /* -+ * Finally, apply the aggregator. -+ */ -+ *((uint64_t *)(key->dtak_data + size)) = agg->dtag_initial; -+ dt_dbg_agg(" Aggregate [initial]: Buf %p, offs %d, act %d, " -+ "%lld (%lld, %lld)\n", -+ buf, size, -+ agg->dtag_action.dta_kind - DTRACEACT_AGGREGATION, -+ *(uint64_t *)(key->dtak_data + size), expr, arg); -+ agg->dtag_aggregate((uint64_t *)(key->dtak_data + size), expr, arg); -+} -diff --git a/dtrace/dtrace_ptofapi.c b/dtrace/dtrace_ptofapi.c -new file mode 100644 -index 000000000000..c42a8471879c ---- /dev/null -+++ b/dtrace/dtrace_ptofapi.c -@@ -0,0 +1,649 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_ptofapi.c -+ * DESCRIPTION: DTrace - (meta) provider-to-framework API -+ * -+ * Copyright (c) 2010, 2018, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/idr.h> -+#include <linux/list.h> -+#include <linux/module.h> -+#include <linux/slab.h> -+ -+#include "dtrace.h" -+ -+struct dtrace_provider *dtrace_provider; -+struct dtrace_meta *dtrace_meta_pid; -+struct dtrace_helpers *dtrace_deferred_pid; -+ -+DEFINE_MUTEX(dtrace_provider_lock); -+DEFINE_MUTEX(dtrace_meta_lock); -+ -+/* -+ * Register the calling provider with the DTrace core. This should generally -+ * be called by providers during module initialization. -+ */ -+int dtrace_register(const char *name, const struct dtrace_pattr *pap, -+ uint32_t priv, const struct cred *cr, -+ const struct dtrace_pops *pops, void *arg, -+ dtrace_provider_id_t *idp) -+{ -+ struct dtrace_provider *provider; -+ -+ if (name == NULL || pap == NULL || pops == NULL || idp == NULL) { -+ pr_warn("Failed to register provider %s: invalid args\n", -+ name ? name : "<NULL>"); -+ return -EINVAL; -+ } -+ -+ if (name[0] == '\0' || dtrace_badname(name)) { -+ pr_warn("Failed to register provider %s: invalid name\n", -+ name); -+ return -EINVAL; -+ } -+ -+ if ((pops->dtps_provide == NULL && pops->dtps_provide_module == NULL) || -+ pops->dtps_enable == NULL || pops->dtps_disable == NULL || -+ pops->dtps_destroy == NULL || -+ ((pops->dtps_resume == NULL) != (pops->dtps_suspend == NULL))) { -+ pr_warn("Failed to register provider %s: invalid ops\n", -+ name); -+ return -EINVAL; -+ } -+ -+ if (dtrace_badattr(&pap->dtpa_provider) || -+ dtrace_badattr(&pap->dtpa_mod) || -+ dtrace_badattr(&pap->dtpa_func) || -+ dtrace_badattr(&pap->dtpa_name) || -+ dtrace_badattr(&pap->dtpa_args)) { -+ pr_warn("Failed to register provider %s: invalid attributes\n", -+ name); -+ return -EINVAL; -+ } -+ -+ if (priv & ~DTRACE_PRIV_ALL) { -+ pr_warn("Failed to register provider %s: invalid privilege " -+ "attributes\n", name); -+ return -EINVAL; -+ } -+ -+ if ((priv & DTRACE_PRIV_KERNEL) && -+ (priv & (DTRACE_PRIV_USER | DTRACE_PRIV_OWNER)) && -+ pops->dtps_usermode == NULL) { -+ pr_warn("Failed to register provider %s: need " -+ "dtps_usermode() op for given privilege " -+ "attributes\n", name); -+ return -EINVAL; -+ } -+ -+ dt_dbg_prov("Registering provider '%s'...\n", name); -+ provider = kzalloc(sizeof(struct dtrace_provider), GFP_KERNEL); -+ if (provider == NULL) { -+ dt_dbg_prov(" Failed to allocate provider struct\n"); -+ return -ENOMEM; -+ } -+ provider->dtpv_name = dtrace_strdup(name); -+ if (provider->dtpv_name == NULL) { -+ kfree(provider); -+ dt_dbg_prov(" Failed to allocate provider name\n"); -+ return -ENOMEM; -+ } -+ provider->dtpv_attr = *pap; -+ provider->dtpv_priv.dtpp_flags = priv; -+ -+ if (cr != NULL) { -+ provider->dtpv_priv.dtpp_uid = -+ from_kuid(init_user_namespace, get_cred(cr)->uid); -+ put_cred(cr); -+ } -+ -+ provider->dtpv_pops = *pops; -+ -+ if (pops->dtps_provide == NULL) { -+ ASSERT(pops->dtps_provide_module != NULL); -+ provider->dtpv_pops.dtps_provide = -+ (void (*)(void *, const struct dtrace_probedesc *)) -+ dtrace_nullop; -+ } -+ -+ if (pops->dtps_provide_module == NULL) { -+ ASSERT(pops->dtps_provide != NULL); -+ provider->dtpv_pops.dtps_provide_module = -+ (void (*)(void *, struct module *))dtrace_nullop; -+ } -+ -+ if (pops->dtps_destroy_module == NULL) { -+ provider->dtpv_pops.dtps_destroy_module = -+ (void (*)(void *, struct module *))dtrace_nullop; -+ } -+ -+ if (pops->dtps_suspend == NULL) { -+ ASSERT(pops->dtps_resume == NULL); -+ provider->dtpv_pops.dtps_suspend = -+ (void (*)(void *, dtrace_id_t, void *))dtrace_nullop; -+ provider->dtpv_pops.dtps_resume = -+ (void (*)(void *, dtrace_id_t, void *))dtrace_nullop; -+ } -+ -+ provider->dtpv_arg = arg; -+ *idp = (dtrace_provider_id_t)provider; -+ -+ if (pops == &dtrace_provider_ops) { -+ ASSERT(MUTEX_HELD(&dtrace_provider_lock)); -+ ASSERT(MUTEX_HELD(&dtrace_lock)); -+ ASSERT(dtrace_anon.dta_enabling == NULL); -+ -+ /* -+ * The DTrace provider must be at the head of the provider -+ * chain. -+ */ -+ provider->dtpv_next = dtrace_provider; -+ dtrace_provider = provider; -+ -+ dt_dbg_prov(" Done registering %s\n", name); -+ -+ return 0; -+ } -+ -+ mutex_lock(&module_mutex); -+ mutex_lock(&dtrace_provider_lock); -+ mutex_lock(&dtrace_lock); -+ -+ /* -+ * If there is at least one provider registered, we'll add this new one -+ * after the first provider. -+ */ -+ if (dtrace_provider != NULL) { -+ provider->dtpv_next = dtrace_provider->dtpv_next; -+ dtrace_provider->dtpv_next = provider; -+ } else -+ dtrace_provider = provider; -+ -+ if (dtrace_retained != NULL) { -+ dt_dbg_prov(" Processing retained enablings for %s\n", name); -+ dtrace_enabling_provide(provider); -+ -+ /* -+ * We must now call dtrace_enabling_matchall() which needs to -+ * acquire cpu_lock and dtrace_lock. We therefore need to drop -+ * our locks before calling it. -+ */ -+ mutex_unlock(&dtrace_lock); -+ mutex_unlock(&dtrace_provider_lock); -+ mutex_unlock(&module_mutex); -+ dtrace_enabling_matchall(); -+ -+ dt_dbg_prov(" Done registering %s\n", name); -+ -+ return 0; -+ } -+ -+ mutex_unlock(&dtrace_lock); -+ mutex_unlock(&dtrace_provider_lock); -+ mutex_unlock(&module_mutex); -+ -+ dt_dbg_prov(" Done registering %s\n", name); -+ -+ return 0; -+} -+EXPORT_SYMBOL(dtrace_register); -+ -+struct unreg_state { -+ struct dtrace_provider *prov; -+ struct dtrace_probe *first; -+}; -+ -+/* -+ * Check whether the given probe is still enabled for the given provider. -+ */ -+static int dtrace_unregister_check(int id, void *p, void *data) -+{ -+ struct dtrace_probe *probe = (struct dtrace_probe *)p; -+ struct unreg_state *st = (struct unreg_state *)data; -+ -+ if (probe->dtpr_provider != st->prov) -+ return 0; -+ -+ if (probe->dtpr_ecb == NULL) -+ return 0; -+ -+ return -EBUSY; -+} -+ -+/* -+ * Remove the given probe from the hash tables and the probe IDR, if it is -+ * associated with the given provider. The probes are chained for further -+ * processing. -+ */ -+static int dtrace_unregister_probe(int id, void *p, void *data) -+{ -+ struct dtrace_probe *probe = (struct dtrace_probe *)p; -+ struct unreg_state *st = (struct unreg_state *)data; -+ -+ if (probe->dtpr_provider != st->prov) -+ return 0; -+ -+ dtrace_hash_remove(dtrace_bymod, probe); -+ dtrace_hash_remove(dtrace_byfunc, probe); -+ dtrace_hash_remove(dtrace_byname, probe); -+ -+ if (st->first == NULL) { -+ st->first = probe; -+ probe->dtpr_nextmod = NULL; -+ } else { -+ probe->dtpr_nextmod = st->first; -+ st->first = probe; -+ } -+ -+ return 0; -+} -+ -+/* -+ * Remove the given probe from the hash tables and the probe IDR, if it is -+ * associated with the given provider and if it does not have any enablings. -+ * The probes are chained for further processing. -+ */ -+static int dtrace_condense_probe(int id, void *p, void *data) -+{ -+ struct dtrace_probe *probe = (struct dtrace_probe *)p; -+ struct unreg_state *st = (struct unreg_state *)data; -+ -+ if (probe->dtpr_provider != st->prov) -+ return 0; -+ -+ if (probe->dtpr_ecb == NULL) -+ return 0; -+ -+ dtrace_hash_remove(dtrace_bymod, probe); -+ dtrace_hash_remove(dtrace_byfunc, probe); -+ dtrace_hash_remove(dtrace_byname, probe); -+ -+ if (st->first == NULL) { -+ st->first = probe; -+ probe->dtpr_nextmod = NULL; -+ } else { -+ probe->dtpr_nextmod = st->first; -+ st->first = probe; -+ } -+ -+ return 0; -+} -+ -+/* -+ * Unregister the specified provider from the DTrace core. This should be -+ * called by provider during module cleanup. -+ * -+ * The mutex_lock is already held during this call. -+ */ -+int dtrace_unregister(dtrace_provider_id_t id) -+{ -+ struct dtrace_provider *old = (struct dtrace_provider *)id; -+ struct dtrace_provider *prev = NULL; -+ int err, self = 0; -+ struct dtrace_probe *probe; -+ struct unreg_state st = { old, NULL }; -+ -+ ASSERT(MUTEX_HELD(&module_mutex)); -+ -+ dt_dbg_prov("Unregistering provider '%s'...\n", old->dtpv_name); -+ -+ if (old->dtpv_pops.dtps_enable == -+ (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop) { -+ /* -+ * When the provider is the DTrace core itself, we're called -+ * with locks already held. -+ */ -+ ASSERT(old == dtrace_provider); -+ ASSERT(MUTEX_HELD(&dtrace_provider_lock)); -+ ASSERT(MUTEX_HELD(&dtrace_lock)); -+ -+ self = 1; -+ -+ if (dtrace_provider->dtpv_next != NULL) { -+ /* -+ * We cannot and should not remove the DTrace provider -+ * if there is any other provider left. -+ */ -+ dt_dbg_prov(" Failed to unregister %s - not last\n", -+ old->dtpv_name); -+ -+ return -EBUSY; -+ } -+ } else { -+ mutex_lock(&dtrace_provider_lock); -+ mutex_lock(&dtrace_lock); -+ } -+ -+ /* -+ * If /dev/dtrace/dtrace is still held open by a process, or if there -+ * are anonymous probes that are still enabled, we refuse to deregister -+ * providers, unless the provider has been invalidated explicitly. -+ */ -+ if (!old->dtpv_defunct && -+ (dtrace_opens || (dtrace_anon.dta_state != NULL && -+ dtrace_anon.dta_state->dts_necbs > 0))) { -+ if (!self) { -+ mutex_unlock(&dtrace_lock); -+ mutex_unlock(&dtrace_provider_lock); -+ } -+ -+ dt_dbg_prov(" Failed to unregister %s - dtrace in use\n", -+ old->dtpv_name); -+ -+ return -EBUSY; -+ } -+ -+ /* -+ * Check whether any of the probes associated with this provider are -+ * still enabled (having at least one ECB). If any are found, we -+ * cannot remove this provider. -+ */ -+ st.prov = old; -+ err = dtrace_probe_for_each(dtrace_unregister_check, &st); -+ if (err < 0) { -+ if (!self) { -+ mutex_unlock(&dtrace_lock); -+ mutex_unlock(&dtrace_provider_lock); -+ } -+ -+ dt_dbg_prov(" Failed to unregister %s - provider in use\n", -+ old->dtpv_name); -+ -+ return err; -+ } -+ -+ /* -+ * All the probes associated with this provider are disabled. We can -+ * safely remove these probes from the hashtables and the probe array. -+ * We chain all the probes together for further processing. -+ */ -+ dtrace_probe_for_each(dtrace_unregister_probe, &st); -+ -+ /* -+ * The probes associated with the provider have been removed. Ensure -+ * synchronization on probe IDR processing. -+ */ -+ dtrace_sync(); -+ -+ /* -+ * Now get rid of the actual probes. -+ */ -+ for (probe = st.first; probe != NULL; probe = st.first) { -+ int probe_id = probe->dtpr_id; -+ -+ st.first = probe->dtpr_nextmod; -+ -+ old->dtpv_pops.dtps_destroy(old->dtpv_arg, probe_id, -+ probe->dtpr_arg); -+ -+ kfree(probe->dtpr_mod); -+ kfree(probe->dtpr_func); -+ kfree(probe->dtpr_name); -+ kmem_cache_free(dtrace_probe_cachep, probe); -+ -+ dtrace_probe_remove_id(probe_id); -+ } -+ -+ prev = dtrace_provider; -+ if (prev == old) { -+ /* -+ * We are removing the provider at the head of the chain. -+ */ -+ ASSERT(self); -+ ASSERT(old->dtpv_next == NULL); -+ -+ dtrace_provider = old->dtpv_next; -+ } else { -+ while (prev != NULL && prev->dtpv_next != old) -+ prev = prev->dtpv_next; -+ -+ if (prev == NULL) { -+ pr_err("Attempt to unregister non-existent DTrace " -+ "provider %p\n", (void *)id); -+ BUG(); -+ } -+ -+ prev->dtpv_next = old->dtpv_next; -+ } -+ -+ if (!self) { -+ mutex_unlock(&dtrace_lock); -+ mutex_unlock(&dtrace_provider_lock); -+ } -+ -+ kfree(old->dtpv_name); -+ kfree(old); -+ -+ dt_dbg_prov(" Done unregistering\n"); -+ -+ return 0; -+} -+EXPORT_SYMBOL(dtrace_unregister); -+ -+/* -+ * Invalidate the specified provider. All subsequent probe lookups for the -+ * specified provider will fail, but the probes will not be removed. -+ */ -+void dtrace_invalidate(dtrace_provider_id_t id) -+{ -+ struct dtrace_provider *pvp = (struct dtrace_provider *)id; -+ -+ ASSERT(pvp->dtpv_pops.dtps_enable != -+ (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop); -+ -+ mutex_lock(&dtrace_provider_lock); -+ mutex_lock(&dtrace_lock); -+ -+ pvp->dtpv_defunct = 1; -+ -+ mutex_unlock(&dtrace_lock); -+ mutex_unlock(&dtrace_provider_lock); -+} -+EXPORT_SYMBOL(dtrace_invalidate); -+ -+/* -+ * Indicate whether or not DTrace has attached. -+ */ -+int dtrace_attached(void) -+{ -+ /* -+ * dtrace_provider will be non-NULL iff the DTrace driver has -+ * attached. (It's non-NULL because DTrace is always itself a -+ * provider.) -+ */ -+ return dtrace_provider != NULL; -+} -+EXPORT_SYMBOL(dtrace_attached); -+ -+/* -+ * Remove all the unenabled probes for the given provider. This function is -+ * not unlike dtrace_unregister(), except that it doesn't remove the provider -+ * -- just as many of its associated probes as it can. -+ */ -+int dtrace_condense(dtrace_provider_id_t id) -+{ -+ struct dtrace_provider *prov = (struct dtrace_provider *)id; -+ struct dtrace_probe *probe; -+ struct unreg_state st = { prov, NULL }; -+ -+ /* -+ * Make sure this isn't the DTrace provider itself. -+ */ -+ ASSERT(prov->dtpv_pops.dtps_enable != -+ (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop); -+ -+ mutex_lock(&dtrace_provider_lock); -+ mutex_lock(&dtrace_lock); -+ -+ /* -+ * Attempt to destroy the probes associated with this provider. -+ */ -+ dtrace_probe_for_each(dtrace_condense_probe, &st); -+ -+ /* -+ * The probes associated with the provider have been removed. Ensure -+ * synchronization on probe IDR processing. -+ */ -+ dtrace_sync(); -+ -+ /* -+ * Now get rid of the actual probes. -+ */ -+ for (probe = st.first; probe != NULL; probe = st.first) { -+ int probe_id = probe->dtpr_id; -+ -+ st.first = probe->dtpr_nextmod; -+ -+ prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, probe_id, -+ probe->dtpr_arg); -+ -+ kfree(probe->dtpr_mod); -+ kfree(probe->dtpr_func); -+ kfree(probe->dtpr_name); -+ kfree(probe); -+ -+ dtrace_probe_remove_id(probe_id); -+ } -+ -+ mutex_unlock(&dtrace_lock); -+ mutex_unlock(&dtrace_provider_lock); -+ -+ return 0; -+} -+EXPORT_SYMBOL(dtrace_condense); -+ -+int dtrace_meta_register(const char *name, const struct dtrace_mops *mops, -+ void *arg, dtrace_meta_provider_id_t *idp) -+{ -+ struct dtrace_meta *meta; -+ struct dtrace_helpers *help, *next; -+ int i; -+ -+ *idp = DTRACE_METAPROVNONE; -+ -+ /* -+ * We strictly don't need the name, but we hold onto it for -+ * debuggability. All hail error queues! -+ */ -+ if (name == NULL) { -+ pr_warn("failed to register meta-provider: invalid name\n"); -+ return -EINVAL; -+ } -+ -+ if (mops == NULL || -+ mops->dtms_create_probe == NULL || -+ mops->dtms_provide_pid == NULL || -+ mops->dtms_remove_pid == NULL) { -+ pr_warn("failed to register meta-register %s: invalid ops\n", -+ name); -+ return -EINVAL; -+ } -+ -+ dt_dbg_prov("Registering provider '%s'...\n", name); -+ meta = kzalloc(sizeof(struct dtrace_meta), GFP_KERNEL); -+ if (meta == NULL) { -+ dt_dbg_prov(" Failed to allocate meta provider struct\n"); -+ return -ENOMEM; -+ } -+ meta->dtm_mops = *mops; -+ meta->dtm_name = kmalloc(strlen(name) + 1, GFP_KERNEL); -+ if (meta->dtm_name == NULL) { -+ kfree(meta); -+ dt_dbg_prov(" Failed to allocate meta provider name\n"); -+ return -ENOMEM; -+ } -+ strcpy(meta->dtm_name, name); -+ meta->dtm_arg = arg; -+ -+ mutex_lock(&dtrace_meta_lock); -+ mutex_lock(&dtrace_lock); -+ -+ if (dtrace_meta_pid != NULL) { -+ mutex_unlock(&dtrace_lock); -+ mutex_unlock(&dtrace_meta_lock); -+ pr_warn("failed to register meta-register %s: user-land " -+ "meta-provider exists", name); -+ kfree(meta->dtm_name); -+ kfree(meta); -+ return -EINVAL; -+ } -+ -+ dtrace_meta_pid = meta; -+ *idp = (dtrace_meta_provider_id_t)meta; -+ -+ /* -+ * If there are providers and probes ready to go, pass them -+ * off to the new meta provider now. -+ */ -+ help = dtrace_deferred_pid; -+ dtrace_deferred_pid = NULL; -+ -+ mutex_unlock(&dtrace_lock); -+ -+ while (help != NULL) { -+ for (i = 0; i < help->dthps_nprovs; i++) { -+ dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov, -+ help->dthps_pid); -+ } -+ -+ next = help->dthps_next; -+ help->dthps_next = NULL; -+ help->dthps_prev = NULL; -+ help->dthps_deferred = 0; -+ help = next; -+ } -+ -+ mutex_unlock(&dtrace_meta_lock); -+ -+ dt_dbg_prov(" Done registering %s\n", name); -+ -+ return 0; -+} -+EXPORT_SYMBOL(dtrace_meta_register); -+ -+int dtrace_meta_unregister(dtrace_meta_provider_id_t id) -+{ -+ struct dtrace_meta **pp, *old = (struct dtrace_meta *)id; -+ -+ dt_dbg_prov("Unregistering meta provider '%s'...\n", old->dtm_name); -+ mutex_lock(&dtrace_meta_lock); -+ mutex_lock(&dtrace_lock); -+ -+ if (old == dtrace_meta_pid) { -+ pp = &dtrace_meta_pid; -+ } else { -+ pr_err("Attempt to unregister non-existent DTrace meta-" -+ "provider %p\n", (void *)old); -+ BUG(); -+ } -+ -+ if (old->dtm_count != 0) { -+ mutex_unlock(&dtrace_lock); -+ mutex_unlock(&dtrace_meta_lock); -+ return -EBUSY; -+ } -+ -+ *pp = NULL; -+ -+ mutex_unlock(&dtrace_lock); -+ mutex_unlock(&dtrace_meta_lock); -+ -+ kfree(old->dtm_name); -+ kfree(old); -+ -+ dt_dbg_prov(" Done unregistering\n"); -+ -+ return 0; -+} -+EXPORT_SYMBOL(dtrace_meta_unregister); -diff --git a/dtrace/dtrace_spec.c b/dtrace/dtrace_spec.c -new file mode 100644 -index 000000000000..4ca9bb7a6427 ---- /dev/null -+++ b/dtrace/dtrace_spec.c -@@ -0,0 +1,434 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_spec.c -+ * DESCRIPTION: DTrace - speculation implementation -+ * -+ * Copyright (c) 2010, 2011, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/dtrace_cpu.h> -+#include <linux/smp.h> -+#include <asm/cmpxchg.h> -+ -+#include "dtrace.h" -+ -+/* -+ * Given consumer state, this routine finds a speculation in the INACTIVE -+ * state and transitions it into the ACTIVE state. If there is no speculation -+ * in the INACTIVE state, 0 is returned. In this case, no error counter is -+ * incremented -- it is up to the caller to take appropriate action. -+ */ -+int dtrace_speculation(struct dtrace_state *state) -+{ -+ int i = 0; -+ uint32_t count, *stat = &state->dts_speculations_unavail; -+ enum dtrace_speculation_state curr; -+ -+ while (i < state->dts_nspeculations) { -+ struct dtrace_speculation *spec = &state->dts_speculations[i]; -+ -+ curr = spec->dtsp_state; -+ -+ if (curr != DTRACESPEC_INACTIVE) { -+ if (curr == DTRACESPEC_COMMITTINGMANY || -+ curr == DTRACESPEC_COMMITTING || -+ curr == DTRACESPEC_DISCARDING) -+ stat = &state->dts_speculations_busy; -+ -+ i++; -+ continue; -+ } -+ -+ if (cmpxchg((uint32_t *)&spec->dtsp_state, curr, -+ DTRACESPEC_ACTIVE) == curr) -+ return i + 1; -+ } -+ -+ /* -+ * We couldn't find a speculation. If we found as much as a single -+ * busy speculation buffer, we'll attribute this failure as "busy" -+ * instead of "unavail". -+ */ -+ do { -+ count = *stat; -+ } while (cmpxchg(stat, count, count + 1) != count); -+ -+ return 0; -+} -+ -+/* -+ * This routine commits an active speculation. If the specified speculation -+ * is not in a valid state to perform a commit(), this routine will silently do -+ * nothing. The state of the specified speculation is transitioned according -+ * to the state transition diagram outlined in <sys/dtrace_impl.h> -+ */ -+void dtrace_speculation_commit(struct dtrace_state *state, processorid_t cpu, -+ dtrace_specid_t which) -+{ -+ struct dtrace_speculation *spec; -+ struct dtrace_buffer *src, *dest; -+ uintptr_t daddr, saddr, dlimit; -+ enum dtrace_speculation_state curr, new = 0; -+ intptr_t offs; -+ -+ if (which == 0) -+ return; -+ -+ if (which > state->dts_nspeculations) { -+ per_cpu_core(cpu)->cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; -+ return; -+ } -+ -+ spec = &state->dts_speculations[which - 1]; -+ src = &spec->dtsp_buffer[cpu]; -+ dest = &state->dts_buffer[cpu]; -+ -+ do { -+ curr = spec->dtsp_state; -+ -+ if (curr == DTRACESPEC_COMMITTINGMANY) -+ break; -+ -+ switch (curr) { -+ case DTRACESPEC_INACTIVE: -+ case DTRACESPEC_DISCARDING: -+ return; -+ -+ case DTRACESPEC_COMMITTING: -+ /* -+ * This is only possible if we are (a) commit()'ing -+ * without having done a prior speculate() on this CPU -+ * and (b) racing with another commit() on a different -+ * CPU. There's nothing to do -- we just assert that -+ * our offset is 0. -+ */ -+ ASSERT(src->dtb_offset == 0); -+ return; -+ -+ case DTRACESPEC_ACTIVE: -+ new = DTRACESPEC_COMMITTING; -+ break; -+ -+ case DTRACESPEC_ACTIVEONE: -+ /* -+ * This speculation is active on one CPU. If our -+ * buffer offset is non-zero, we know that the one CPU -+ * must be us. Otherwise, we are committing on a -+ * different CPU from the speculate(), and we must -+ * rely on being asynchronously cleaned. -+ */ -+ if (src->dtb_offset != 0) { -+ new = DTRACESPEC_COMMITTING; -+ break; -+ } -+ /*FALLTHROUGH*/ -+ -+ case DTRACESPEC_ACTIVEMANY: -+ new = DTRACESPEC_COMMITTINGMANY; -+ break; -+ -+ default: -+ ASSERT(0); -+ } -+ } while (cmpxchg((uint32_t *)&spec->dtsp_state, curr, new) != -+ curr); -+ -+ /* -+ * We have set the state to indicate that we are committing this -+ * speculation. Now reserve the necessary space in the destination -+ * buffer. -+ */ -+ offs = dtrace_buffer_reserve(dest, src->dtb_offset, sizeof(uint64_t), -+ state, NULL); -+ if (offs < 0) { -+ dtrace_buffer_drop(dest); -+ goto out; -+ } -+ -+ /* -+ * We have the space; copy the buffer across. (Note that this is a -+ * highly subobtimal bcopy(); in the unlikely event that this becomes -+ * a serious performance issue, a high-performance DTrace-specific -+ * bcopy() should obviously be invented.) -+ */ -+ daddr = (uintptr_t)dest->dtb_tomax + offs; -+ dlimit = daddr + src->dtb_offset; -+ saddr = (uintptr_t)src->dtb_tomax; -+ -+ /* -+ * First, the aligned portion. -+ */ -+ while (dlimit - daddr >= sizeof(uint64_t)) { -+ *((uint64_t *)daddr) = *((uint64_t *)saddr); -+ *((uint64_t *)daddr) = *((uint64_t *)saddr); -+ -+ daddr += sizeof(uint64_t); -+ saddr += sizeof(uint64_t); -+ } -+ -+ /* -+ * Now any left-over bit... -+ */ -+ while (dlimit - daddr) -+ *((uint8_t *)daddr++) = *((uint8_t *)saddr++); -+ -+ /* -+ * Finally, commit the reserved space in the destination buffer. -+ */ -+ dest->dtb_offset = offs + src->dtb_offset; -+ -+out: -+ /* -+ * If we're lucky enough to be the only active CPU on this speculation -+ * buffer, we can just set the state back to DTRACESPEC_INACTIVE. -+ */ -+ if (curr == DTRACESPEC_ACTIVE || -+ (curr == DTRACESPEC_ACTIVEONE && new == DTRACESPEC_COMMITTING)) { -+ /* -+ * Will cause unused warning if DEBUG is not defined. -+ */ -+ uint32_t rval = -+ cmpxchg((uint32_t *)&spec->dtsp_state, -+ DTRACESPEC_COMMITTING, -+ DTRACESPEC_INACTIVE); -+ -+ ASSERT(rval == DTRACESPEC_COMMITTING); -+ rval = 0; /* Avoid warning about unused variable if !DEBUG */ -+ } -+ -+ src->dtb_offset = 0; -+ src->dtb_xamot_drops += src->dtb_drops; -+ src->dtb_drops = 0; -+} -+ -+/* -+ * This routine discards an active speculation. If the specified speculation -+ * is not in a valid state to perform a discard(), this routine will silently -+ * do nothing. The state of the specified speculation is transitioned -+ * according to the state transition diagram outlined in <sys/dtrace_impl.h> -+ */ -+void dtrace_speculation_discard(struct dtrace_state *state, processorid_t cpu, -+ dtrace_specid_t which) -+{ -+ struct dtrace_speculation *spec; -+ enum dtrace_speculation_state curr, new = 0; -+ struct dtrace_buffer *buf; -+ -+ if (which == 0) -+ return; -+ -+ if (which > state->dts_nspeculations) { -+ per_cpu_core(cpu)->cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; -+ return; -+ } -+ -+ spec = &state->dts_speculations[which - 1]; -+ buf = &spec->dtsp_buffer[cpu]; -+ -+ do { -+ curr = spec->dtsp_state; -+ -+ switch (curr) { -+ case DTRACESPEC_INACTIVE: -+ case DTRACESPEC_COMMITTINGMANY: -+ case DTRACESPEC_COMMITTING: -+ case DTRACESPEC_DISCARDING: -+ return; -+ -+ case DTRACESPEC_ACTIVE: -+ case DTRACESPEC_ACTIVEMANY: -+ new = DTRACESPEC_DISCARDING; -+ break; -+ -+ case DTRACESPEC_ACTIVEONE: -+ if (buf->dtb_offset != 0) -+ new = DTRACESPEC_INACTIVE; -+ else -+ new = DTRACESPEC_DISCARDING; -+ -+ break; -+ -+ default: -+ ASSERT(0); -+ } -+ } while (cmpxchg((uint32_t *)&spec->dtsp_state, curr, new) != curr); -+ -+ buf->dtb_offset = 0; -+ buf->dtb_drops = 0; -+} -+ -+/* -+ * Note: not called from probe context. This function is called -+ * asynchronously from cross call context to clean any speculations that are -+ * in the COMMITTINGMANY or DISCARDING states. These speculations may not be -+ * transitioned back to the INACTIVE state until all CPUs have cleaned the -+ * speculation. -+ */ -+void dtrace_speculation_clean_here(struct dtrace_state *state) -+{ -+ dtrace_icookie_t cookie; -+ processorid_t cpu = smp_processor_id(); -+ struct dtrace_buffer *dest = &state->dts_buffer[cpu]; -+ dtrace_specid_t i; -+ uint32_t re_entry; -+ -+ DTRACE_SYNC_ENTER_CRITICAL(cookie, re_entry); -+ -+ if (dest->dtb_tomax == NULL) { -+ DTRACE_SYNC_EXIT_CRITICAL(cookie, re_entry); -+ return; -+ } -+ -+ for (i = 0; i < state->dts_nspeculations; i++) { -+ struct dtrace_speculation *spec = &state->dts_speculations[i]; -+ struct dtrace_buffer *src = &spec->dtsp_buffer[cpu]; -+ -+ if (src->dtb_tomax == NULL) -+ continue; -+ -+ if (spec->dtsp_state == DTRACESPEC_DISCARDING) { -+ src->dtb_offset = 0; -+ continue; -+ } -+ -+ if (spec->dtsp_state != DTRACESPEC_COMMITTINGMANY) -+ continue; -+ -+ if (src->dtb_offset == 0) -+ continue; -+ -+ dtrace_speculation_commit(state, cpu, i + 1); -+ } -+ -+ DTRACE_SYNC_EXIT_CRITICAL(cookie, re_entry); -+} -+ -+void dtrace_speculation_clean(struct dtrace_state *state) -+{ -+ int work = 0, rv; -+ dtrace_specid_t i; -+ -+ for (i = 0; i < state->dts_nspeculations; i++) { -+ struct dtrace_speculation *spec = &state->dts_speculations[i]; -+ -+ ASSERT(!spec->dtsp_cleaning); -+ -+ if (spec->dtsp_state != DTRACESPEC_DISCARDING && -+ spec->dtsp_state != DTRACESPEC_COMMITTINGMANY) -+ continue; -+ -+ work++; -+ spec->dtsp_cleaning = 1; -+ } -+ -+ if (!work) -+ return; -+ -+ dtrace_xcall(DTRACE_CPUALL, -+ (dtrace_xcall_t)dtrace_speculation_clean_here, state); -+ -+ /* -+ * We now know that all CPUs have committed or discarded their -+ * speculation buffers, as appropriate. We can now set the state -+ * to inactive. -+ */ -+ for (i = 0; i < state->dts_nspeculations; i++) { -+ struct dtrace_speculation *spec = -+ &state->dts_speculations[i]; -+ enum dtrace_speculation_state curr, new; -+ -+ if (!spec->dtsp_cleaning) -+ continue; -+ -+ curr = spec->dtsp_state; -+ ASSERT(curr == DTRACESPEC_DISCARDING || -+ curr == DTRACESPEC_COMMITTINGMANY); -+ -+ new = DTRACESPEC_INACTIVE; -+ -+ rv = cmpxchg((uint32_t *)&spec->dtsp_state, curr, new); -+ ASSERT(rv == curr); -+ spec->dtsp_cleaning = 0; -+ } -+} -+ -+/* -+ * Called as part of a speculate() to get the speculative buffer associated -+ * with a given speculation. Returns NULL if the specified speculation is not -+ * in an ACTIVE state. If the speculation is in the ACTIVEONE state -- and -+ * the active CPU is not the specified CPU -- the speculation will be -+ * atomically transitioned into the ACTIVEMANY state. -+ */ -+struct dtrace_buffer *dtrace_speculation_buffer(struct dtrace_state *state, -+ processorid_t cpu, -+ dtrace_specid_t which) -+{ -+ struct dtrace_speculation *spec; -+ enum dtrace_speculation_state curr, new = 0; -+ struct dtrace_buffer *buf; -+ -+ if (which == 0) -+ return NULL; -+ -+ if (which > state->dts_nspeculations) { -+ per_cpu_core(cpu)->cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; -+ return NULL; -+ } -+ -+ spec = &state->dts_speculations[which - 1]; -+ buf = &spec->dtsp_buffer[cpu]; -+ -+ do { -+ curr = spec->dtsp_state; -+ -+ switch (curr) { -+ case DTRACESPEC_INACTIVE: -+ case DTRACESPEC_COMMITTINGMANY: -+ case DTRACESPEC_DISCARDING: -+ return NULL; -+ -+ case DTRACESPEC_COMMITTING: -+ ASSERT(buf->dtb_offset == 0); -+ return NULL; -+ -+ case DTRACESPEC_ACTIVEONE: -+ /* -+ * This speculation is currently active on one CPU. -+ * Check the offset in the buffer; if it's non-zero, -+ * that CPU must be us (and we leave the state alone). -+ * If it's zero, assume that we're starting on a new -+ * CPU -- and change the state to indicate that the -+ * speculation is active on more than one CPU. -+ */ -+ if (buf->dtb_offset != 0) -+ return buf; -+ -+ new = DTRACESPEC_ACTIVEMANY; -+ break; -+ -+ case DTRACESPEC_ACTIVEMANY: -+ return buf; -+ -+ case DTRACESPEC_ACTIVE: -+ new = DTRACESPEC_ACTIVEONE; -+ break; -+ -+ default: -+ ASSERT(0); -+ } -+ } while (cmpxchg((uint32_t *)&spec->dtsp_state, curr, new) != curr); -+ -+ ASSERT(new == DTRACESPEC_ACTIVEONE || new == DTRACESPEC_ACTIVEMANY); -+ -+ return buf; -+} -diff --git a/dtrace/dtrace_state.c b/dtrace/dtrace_state.c -new file mode 100644 -index 000000000000..7fb696e8f1a9 ---- /dev/null -+++ b/dtrace/dtrace_state.c -@@ -0,0 +1,1108 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_state.c -+ * DESCRIPTION: DTrace - consumer state implementation -+ * -+ * Copyright (c) 2010, 2018, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/cyclic.h> -+#include <linux/fs.h> -+#include <linux/idr.h> -+#include <linux/slab.h> -+#include <linux/smp.h> -+#include <linux/vmalloc.h> -+#include <asm/cmpxchg.h> -+ -+#include "dtrace.h" -+ -+int dtrace_destructive_disallow = 0; -+dtrace_optval_t dtrace_nspec_default = 1; -+dtrace_optval_t dtrace_specsize_default = 32 * 1024; -+dtrace_optval_t dtrace_dstate_defsize = 1 * 1024 * 1024; -+size_t dtrace_strsize_default = 256; -+dtrace_optval_t dtrace_stackframes_default = 20; -+dtrace_optval_t dtrace_ustackframes_default = 100; -+dtrace_optval_t dtrace_cleanrate_default = 9900990; -+dtrace_optval_t dtrace_cleanrate_min = 20000; -+dtrace_optval_t dtrace_cleanrate_max = (uint64_t)60 * NANOSEC; -+dtrace_optval_t dtrace_aggrate_default = NANOSEC; -+dtrace_optval_t dtrace_switchrate_default = NANOSEC; -+dtrace_optval_t dtrace_statusrate_default = NANOSEC; -+dtrace_optval_t dtrace_statusrate_max = (uint64_t)10 * NANOSEC; -+dtrace_optval_t dtrace_jstackframes_default = 50; -+dtrace_optval_t dtrace_jstackstrsize_default = 512; -+ktime_t dtrace_deadman_interval = KTIME_INIT(1, 0); -+ktime_t dtrace_deadman_timeout = KTIME_INIT(10, 0); -+ktime_t dtrace_deadman_user = KTIME_INIT(30, 0); -+ -+ /* Sampling before counting */ -+uint64_t dtrace_sync_sample_count = 100; -+ -+dtrace_id_t dtrace_probeid_begin; -+dtrace_id_t dtrace_probeid_end; -+dtrace_id_t dtrace_probeid_error; -+ -+struct dtrace_dynvar dtrace_dynhash_sink; -+ -+#define DTRACE_DYNHASH_FREE 0 -+#define DTRACE_DYNHASH_SINK 1 -+#define DTRACE_DYNHASH_VALID 2 -+ -+#define DTRACE_DYNVAR_CHUNKSIZE 256 -+ -+static void dtrace_dynvar_clean(struct dtrace_dstate *dstate) -+{ -+ struct dtrace_dynvar *dirty; -+ struct dtrace_dstate_percpu *dcpu; -+ int i, work = 0; -+ -+ for (i = 0; i < NR_CPUS; i++) { -+ dcpu = &dstate->dtds_percpu[i]; -+ -+ ASSERT(dcpu->dtdsc_rinsing == NULL); -+ -+ /* -+ * If the dirty list is NULL, there is no dirty work to do. -+ */ -+ if (dcpu->dtdsc_dirty == NULL) -+ continue; -+ -+ /* -+ * If the clean list is non-NULL, then we're not going to do -+ * any work for this CPU -- it means that there has not been -+ * a dtrace_dynvar() allocation on this CPU (or from this CPU) -+ * since the last time we cleaned house. -+ */ -+ if (dcpu->dtdsc_clean != NULL) -+ continue; -+ -+ work = 1; -+ -+ /* -+ * Atomically move the dirty list aside. -+ */ -+ do { -+ dirty = dcpu->dtdsc_dirty; -+ -+ /* -+ * Before we zap the dirty list, set the rinsing list. -+ * (This allows for a potential assertion in -+ * dtrace_dynvar(): if a free dynamic variable appears -+ * on a hash chain, either the dirty list or the -+ * rinsing list for some CPU must be non-NULL.) -+ */ -+ dcpu->dtdsc_rinsing = dirty; -+ dtrace_membar_producer(); -+ } while (cmpxchg(&dcpu->dtdsc_dirty, dirty, NULL) != dirty); -+ } -+ -+ /* -+ * No work to do; return. -+ */ -+ if (!work) -+ return; -+ -+ dtrace_sync(); -+ -+ for (i = 0; i < NR_CPUS; i++) { -+ dcpu = &dstate->dtds_percpu[i]; -+ -+ if (dcpu->dtdsc_rinsing == NULL) -+ continue; -+ -+ /* -+ * We are now guaranteed that no hash chain contains a pointer -+ * into this dirty list; we can make it clean. -+ */ -+ ASSERT(dcpu->dtdsc_clean == NULL); -+ dcpu->dtdsc_clean = dcpu->dtdsc_rinsing; -+ dcpu->dtdsc_rinsing = NULL; -+ } -+ -+ /* -+ * Before we actually set the state to be DTRACE_DSTATE_CLEAN, make -+ * sure that all CPUs have seen all of the dtdsc_clean pointers. -+ * This prevents a race whereby a CPU incorrectly decides that -+ * the state should be something other than DTRACE_DSTATE_CLEAN -+ * after dtrace_dynvar_clean() has completed. -+ */ -+ dtrace_sync(); -+ -+ dstate->dtds_state = DTRACE_DSTATE_CLEAN; -+} -+ -+int dtrace_dstate_init(struct dtrace_dstate *dstate, size_t size) -+{ -+ size_t hashsize, maxper, min, -+ chunksize = dstate->dtds_chunksize; -+ void *base, *percpu; -+ uintptr_t limit; -+ struct dtrace_dynvar *dvar, *next, *start; -+ int i; -+ -+ ASSERT(MUTEX_HELD(&dtrace_lock)); -+ ASSERT(dstate->dtds_base == NULL && dstate->dtds_percpu == NULL); -+ -+ memset(dstate, 0, sizeof(struct dtrace_dstate)); -+ -+ dstate->dtds_chunksize = chunksize; -+ if (dstate->dtds_chunksize == 0) -+ dstate->dtds_chunksize = DTRACE_DYNVAR_CHUNKSIZE; -+ -+ min = dstate->dtds_chunksize + sizeof(struct dtrace_dynhash); -+ if (size < min) -+ size = min; -+ -+ base = dtrace_vzalloc_try(size); -+ if (base == NULL) -+ return -ENOMEM; -+ percpu = kmem_cache_alloc(dtrace_state_cachep, GFP_KERNEL); -+ if (percpu == NULL) { -+ vfree(base); -+ return -ENOMEM; -+ } -+ -+ dstate->dtds_size = size; -+ dstate->dtds_base = base; -+ dstate->dtds_percpu = percpu; -+ memset(dstate->dtds_percpu, 0, -+ NR_CPUS * sizeof(struct dtrace_dstate_percpu)); -+ -+ hashsize = size / -+ (dstate->dtds_chunksize + sizeof(struct dtrace_dynhash)); -+ -+ if (hashsize != 1 && (hashsize & 1)) -+ hashsize--; -+ -+ dstate->dtds_hashsize = hashsize; -+ dstate->dtds_hash = dstate->dtds_base; -+ -+ /* -+ * Set all of our hash buckets to point to the single sink, and (if -+ * it hasn't already been set), set the sink's hash value to be the -+ * sink sentinel value. The sink is needed for dynamic variable -+ * lookups to know that they have iterated over an entire, valid hash -+ * chain. -+ */ -+ for (i = 0; i < hashsize; i++) -+ dstate->dtds_hash[i].dtdh_chain = &dtrace_dynhash_sink; -+ -+ if (dtrace_dynhash_sink.dtdv_hashval != DTRACE_DYNHASH_SINK) -+ dtrace_dynhash_sink.dtdv_hashval = DTRACE_DYNHASH_SINK; -+ -+ /* -+ * Determine number of active CPUs. Divide free list evenly among -+ * active CPUs. -+ */ -+ start = (struct dtrace_dynvar *)((uintptr_t)base + -+ hashsize * sizeof(struct dtrace_dynhash)); -+ limit = (uintptr_t)base + size; -+ -+ maxper = (limit - (uintptr_t)start) / NR_CPUS; -+ maxper = (maxper / dstate->dtds_chunksize) * dstate->dtds_chunksize; -+ -+ for (i = 0; i < NR_CPUS; i++) { -+ dstate->dtds_percpu[i].dtdsc_free = dvar = start; -+ -+ /* -+ * If we don't even have enough chunks to make it once through -+ * NCPUs, we're just going to allocate everything to the first -+ * CPU. And if we're on the last CPU, we're going to allocate -+ * whatever is left over. In either case, we set the limit to -+ * be the limit of the dynamic variable space. -+ */ -+ if (maxper == 0 || i == NR_CPUS - 1) { -+ limit = (uintptr_t)base + size; -+ start = NULL; -+ } else { -+ limit = (uintptr_t)start + maxper; -+ start = (struct dtrace_dynvar *)limit; -+ } -+ -+ ASSERT(limit <= (uintptr_t)base + size); -+ -+ for (;;) { -+ next = (struct dtrace_dynvar *)((uintptr_t)dvar + -+ dstate->dtds_chunksize); -+ -+ if ((uintptr_t)next + dstate->dtds_chunksize >= limit) -+ break; -+ -+ dvar->dtdv_next = next; -+ dvar = next; -+ } -+ -+ if (maxper == 0) -+ break; -+ } -+ -+ return 0; -+} -+ -+void dtrace_dstate_fini(struct dtrace_dstate *dstate) -+{ -+ ASSERT(MUTEX_HELD(&cpu_lock)); -+ -+ if (dstate->dtds_base == NULL) -+ return; -+ -+ vfree(dstate->dtds_base); -+ kmem_cache_free(dtrace_state_cachep, dstate->dtds_percpu); -+} -+ -+void dtrace_vstate_fini(struct dtrace_vstate *vstate) -+{ -+ /* -+ * If only there was a logical XOR operator... -+ */ -+ ASSERT((vstate->dtvs_nglobals == 0) ^ (vstate->dtvs_globals != NULL)); -+ -+ if (vstate->dtvs_nglobals > 0) -+ vfree(vstate->dtvs_globals); -+ -+ if (vstate->dtvs_ntlocals > 0) -+ vfree(vstate->dtvs_tlocals); -+ -+ ASSERT((vstate->dtvs_nlocals == 0) ^ (vstate->dtvs_locals != NULL)); -+ -+ if (vstate->dtvs_nlocals > 0) -+ vfree(vstate->dtvs_locals); -+} -+ -+static void dtrace_state_clean(struct dtrace_state *state) -+{ -+ dtrace_optval_t *opt = state->dts_options; -+ -+ if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE && -+ state->dts_activity != DTRACE_ACTIVITY_DRAINING) -+ return; -+ -+ dtrace_dynvar_clean(&state->dts_vstate.dtvs_dynvars); -+ dtrace_speculation_clean(state); -+ -+ cyclic_reprogram(state->dts_cleaner, ns_to_ktime( -+ opt[DTRACEOPT_CLEANRATE])); -+} -+ -+static void dtrace_state_deadman(struct dtrace_state *state) -+{ -+ ktime_t now; -+ -+ dtrace_sync(); -+ -+ now = dtrace_gethrtime(); -+ if (state != dtrace_anon.dta_state && -+ ktime_ge(ktime_sub(now, state->dts_laststatus), -+ dtrace_deadman_user)) -+ return; -+ -+ /* -+ * We must be sure that dts_alive never appears to be less than the -+ * value upon entry to dtrace_state_deadman(), and because we lack a -+ * dtrace_cas64(), we cannot store to it atomically. We thus instead -+ * store KTIME_MAX to it, followed by a memory barrier, followed by -+ * the new value. This assures that dts_alive never appears to be -+ * less than its true value, regardless of the order in which the -+ * stores to the underlying storage are issued. -+ */ -+ state->dts_alive = ktime_set(KTIME_SEC_MAX, 0); -+ dtrace_membar_producer(); -+ state->dts_alive = now; -+} -+ -+struct dtrace_state *dtrace_state_create(struct file *file) -+{ -+ struct dtrace_state *state; -+ dtrace_optval_t *opt; -+ int bufsize = NR_CPUS * sizeof(struct dtrace_buffer), i; -+#ifdef FIXME -+ const struct cred *cr = file->f_cred; -+#endif -+ dtrace_aggid_t aggid; -+ -+ ASSERT(MUTEX_HELD(&cpu_lock)); -+ ASSERT(MUTEX_HELD(&dtrace_lock)); -+ -+ state = kzalloc(sizeof(struct dtrace_state), GFP_KERNEL); -+ if (state == NULL) -+ return NULL; -+ -+ state->dts_epid = DTRACE_EPIDNONE + 1; -+ state->dts_buffer = vzalloc(bufsize); -+ if (state->dts_buffer == NULL) { -+ kfree(state); -+ return NULL; -+ } -+ -+ state->dts_aggbuffer = vzalloc(bufsize); -+ if (state->dts_aggbuffer == NULL) { -+ vfree(state->dts_buffer); -+ kfree(state); -+ return NULL; -+ } -+ -+ idr_init(&state->dts_agg_idr); -+ state->dts_naggs = 0; -+ state->dts_cleaner = 0; -+ state->dts_deadman = 0; -+ state->dts_vstate.dtvs_state = state; -+ -+ /* -+ * Create a first entry in the aggregation IDR, so that ID 0 is used as -+ * that gets used as meaning 'none'. -+ */ -+ idr_preload(GFP_KERNEL); -+ aggid = idr_alloc_cyclic(&state->dts_agg_idr, NULL, 0, 0, GFP_NOWAIT); -+ idr_preload_end(); -+ -+ ASSERT(aggid == 0); -+ -+ for (i = 0; i < DTRACEOPT_MAX; i++) -+ state->dts_options[i] = DTRACEOPT_UNSET; -+ -+ /* -+ * Set the default options. -+ */ -+ opt = state->dts_options; -+ opt[DTRACEOPT_BUFPOLICY] = DTRACEOPT_BUFPOLICY_SWITCH; -+ opt[DTRACEOPT_BUFRESIZE] = DTRACEOPT_BUFRESIZE_AUTO; -+ opt[DTRACEOPT_NSPEC] = dtrace_nspec_default; -+ opt[DTRACEOPT_SPECSIZE] = dtrace_specsize_default; -+ opt[DTRACEOPT_CPU] = (dtrace_optval_t)DTRACE_CPUALL; -+ opt[DTRACEOPT_STRSIZE] = dtrace_strsize_default; -+ opt[DTRACEOPT_STACKFRAMES] = dtrace_stackframes_default; -+ opt[DTRACEOPT_USTACKFRAMES] = dtrace_ustackframes_default; -+ opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_default; -+ opt[DTRACEOPT_AGGRATE] = dtrace_aggrate_default; -+ opt[DTRACEOPT_SWITCHRATE] = dtrace_switchrate_default; -+ opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_default; -+ opt[DTRACEOPT_JSTACKFRAMES] = dtrace_jstackframes_default; -+ opt[DTRACEOPT_JSTACKSTRSIZE] = dtrace_jstackstrsize_default; -+ -+ state->dts_activity = DTRACE_ACTIVITY_INACTIVE; -+ -+#ifdef FIXME -+ /* -+ * Set probe visibility and destructiveness based on user credential -+ * information. For actual anonymous tracing or if all privileges are -+ * set, checks are bypassed. -+ */ -+ if (cr == NULL || -+ PRIV_POLICY_ONLY(cr, PRIV_ALL, FALSE)) { -+ state->dts_cred.dcr_visible = DTRACE_CRV_ALL; -+ state->dts_cred.dcr_action = DTRACE_CRA_ALL; -+ } else { -+ state->dts_cred.dcr_cred = get_cred(cr); -+ -+ /* -+ * CRA_PROC means "we have *some* privilege for dtrace" and -+ * it unlocks the use of variables like pid, etc. -+ */ -+ if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, FALSE) || -+ PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, FALSE)) -+ state->dts_cred.dcr_action |= DTRACE_CRA_PROC; -+ -+ /* -+ * The DTRACE_USER privilege allows the use of syscall and -+ * profile providers. If the user also has PROC_OWNER, we -+ * extend the scope to include additional visibility and -+ * destructive power. -+ */ -+ if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, FALSE)) { -+ if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, FALSE)) -+ state->dts_cred.dcr_visible |= -+ DTRACE_CRV_ALLPROC; -+ -+ state->dts_cred.dcr_action |= -+ DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER; -+ } -+ -+ /* -+ * Holding the DTRACE_KERNEL privilege also implies that -+ * the user has the DTRACE_USER privilege from a visibility -+ * perspective. But without further privileges, some -+ * destructive actions are not available. -+ */ -+ if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, FALSE)) { -+ /* -+ * Make all probes in all zones visible. However, -+ * this doesn't mean that all actions become available -+ * to all zones. -+ */ -+ state->dts_cred.dcr_visible |= DTRACE_CRV_KERNEL | -+ DTRACE_CRV_ALLPROC; -+ state->dts_cred.dcr_action |= DTRACE_CRA_KERNEL | -+ DTRACE_CRA_PROC; -+ -+ /* -+ * Holding PROC_OWNER means that destructive actions -+ * are allowed. -+ */ -+ if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, FALSE)) -+ state->dts_cred.dcr_action |= -+ DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER; -+ } -+ -+ /* -+ * Holding the DTRACE_PROC privilege gives control over the -+ * fasttrap and pid providers. We need to grant wider -+ * destructive privileges in the event that the user has -+ * PROC_OWNER . -+ */ -+ if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, FALSE)) { -+ if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, FALSE)) -+ state->dts_cred.dcr_action |= -+ DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER; -+ } -+ } -+#else -+ state->dts_cred.dcr_visible = DTRACE_CRV_ALLPROC | DTRACE_CRV_KERNEL; -+ state->dts_cred.dcr_action = DTRACE_CRA_ALL; -+#endif -+ -+ return state; -+} -+ -+static int dtrace_state_buffer(struct dtrace_state *state, -+ struct dtrace_buffer *buf, int which) -+{ -+ dtrace_optval_t *opt = state->dts_options, size; -+ processorid_t cpu = DTRACE_CPUALL; -+ int flags = 0, rval; -+ -+ ASSERT(MUTEX_HELD(&dtrace_lock)); -+ ASSERT(MUTEX_HELD(&cpu_lock)); -+ ASSERT(which < DTRACEOPT_MAX); -+ ASSERT(state->dts_activity == DTRACE_ACTIVITY_INACTIVE || -+ (state == dtrace_anon.dta_state && -+ state->dts_activity == DTRACE_ACTIVITY_ACTIVE)); -+ -+ if (opt[which] == DTRACEOPT_UNSET || opt[which] == 0) -+ return 0; -+ -+ if (opt[DTRACEOPT_CPU] != DTRACEOPT_UNSET) -+ cpu = opt[DTRACEOPT_CPU]; -+ -+ if (which == DTRACEOPT_SPECSIZE) -+ flags |= DTRACEBUF_NOSWITCH; -+ -+ if (which == DTRACEOPT_BUFSIZE) { -+ if (opt[DTRACEOPT_BUFPOLICY] == DTRACEOPT_BUFPOLICY_RING) -+ flags |= DTRACEBUF_RING; -+ -+ if (opt[DTRACEOPT_BUFPOLICY] == DTRACEOPT_BUFPOLICY_FILL) -+ flags |= DTRACEBUF_FILL; -+ -+ if (state != dtrace_anon.dta_state || -+ state->dts_activity != DTRACE_ACTIVITY_ACTIVE) -+ flags |= DTRACEBUF_INACTIVE; -+ } -+ -+ for (size = opt[which]; size >= sizeof(uint64_t); size >>= 1) { -+ /* -+ * The size must be 8-byte aligned. If the size is not 8-byte -+ * aligned, drop it down by the difference. -+ */ -+ if (size & (sizeof(uint64_t) - 1)) -+ size -= size & (sizeof(uint64_t) - 1); -+ -+ if (size < state->dts_reserve) { -+ /* -+ * Buffers always must be large enough to accommodate -+ * their prereserved space. We return -E2BIG instead -+ * of ENOMEM in this case to allow for user-level -+ * software to differentiate the cases. -+ */ -+ return -E2BIG; -+ } -+ -+ rval = dtrace_buffer_alloc(buf, size, flags, cpu); -+ if (rval != -ENOMEM) { -+ opt[which] = size; -+ return rval; -+ } -+ -+ if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL) -+ return rval; -+ } -+ -+ return -ENOMEM; -+} -+ -+static int dtrace_state_buffers(struct dtrace_state *state) -+{ -+ struct dtrace_speculation *spec = state->dts_speculations; -+ int rval, i; -+ -+ rval = dtrace_state_buffer(state, state->dts_buffer, DTRACEOPT_BUFSIZE); -+ if (rval != 0) -+ return rval; -+ -+ rval = dtrace_state_buffer(state, state->dts_aggbuffer, -+ DTRACEOPT_AGGSIZE); -+ if (rval != 0) -+ return rval; -+ -+ for (i = 0; i < state->dts_nspeculations; i++) { -+ rval = dtrace_state_buffer(state, spec[i].dtsp_buffer, -+ DTRACEOPT_SPECSIZE); -+ if (rval != 0) -+ return rval; -+ } -+ -+ return 0; -+} -+ -+static void dtrace_begin_probe(struct dtrace_state *state) -+{ -+ processorid_t cpuid = smp_processor_id(); -+ -+ ASSERT(state->dts_buffer[cpuid].dtb_flags & DTRACEBUF_INACTIVE); -+ state->dts_buffer[cpuid].dtb_flags &= ~DTRACEBUF_INACTIVE; -+ -+ dtrace_probe(dtrace_probeid_begin, (uint64_t)(uintptr_t)state, 0, 0, 0, -+ 0, 0, 0); -+ -+ /* -+ * We may have had an exit action from a BEGIN probe; only change our -+ * state to ACTIVE if we're still in WARMUP. -+ */ -+ ASSERT(state->dts_activity == DTRACE_ACTIVITY_WARMUP || -+ state->dts_activity == DTRACE_ACTIVITY_DRAINING); -+ -+ if (state->dts_activity == DTRACE_ACTIVITY_WARMUP) -+ state->dts_activity = DTRACE_ACTIVITY_ACTIVE; -+ -+ dtrace_membar_enter(); -+} -+ -+static void dtrace_state_prereserve(struct dtrace_state *state) -+{ -+ struct dtrace_ecb *ecb; -+ struct dtrace_probe *probe; -+ -+ state->dts_reserve = 0; -+ -+ if (state->dts_options[DTRACEOPT_BUFPOLICY] != DTRACEOPT_BUFPOLICY_FILL) -+ return; -+ -+ /* -+ * If our buffer policy is a "fill" buffer policy, we need to set the -+ * prereserved space to be the space required by the END probes. -+ */ -+ probe = dtrace_probe_lookup_id(dtrace_probeid_end); -+ ASSERT(probe != NULL); -+ -+ for (ecb = probe->dtpr_ecb; ecb != NULL; ecb = ecb->dte_next) { -+ if (ecb->dte_state != state) -+ continue; -+ -+ state->dts_reserve += ecb->dte_needed + ecb->dte_alignment; -+ } -+} -+ -+int dtrace_state_go(struct dtrace_state *state, processorid_t *cpu) -+{ -+ dtrace_optval_t *opt = state->dts_options, sz, nspec; -+ struct dtrace_speculation *spec; -+ struct dtrace_buffer *buf; -+ struct cyc_handler hdlr; -+ struct cyc_time when; -+ processorid_t cpuid; -+ int rval = 0, i, bufsize = NR_CPUS * sizeof(struct dtrace_buffer); -+ -+ mutex_lock(&cpu_lock); -+ mutex_lock(&dtrace_lock); -+ -+ if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) { -+ rval = -EBUSY; -+ goto out; -+ } -+ -+ /* -+ * Before we can perform any checks, we must prime all of the -+ * retained enablings that correspond to this state. -+ */ -+ dtrace_enabling_prime(state); -+ -+ if (state->dts_destructive && !state->dts_cred.dcr_destructive) { -+ rval = -EACCES; -+ goto out; -+ } -+ -+ dtrace_state_prereserve(state); -+ -+ /* -+ * If a cpu has been selected check if its value is valid and -+ * the cpu is online. -+ */ -+ cpuid = opt[DTRACEOPT_CPU]; -+ if (cpuid != DTRACE_CPUALL && -+ (cpuid < 0 || cpuid >= NR_CPUS || !cpu_online(cpuid))) { -+ rval = -ENXIO; -+ goto out; -+ } -+ -+ /* -+ * Now we want to do is try to allocate our speculations. -+ * We do not automatically resize the number of speculations; if -+ * this fails, we will fail the operation. -+ */ -+ nspec = opt[DTRACEOPT_NSPEC]; -+ ASSERT(nspec != DTRACEOPT_UNSET); -+ -+ if (nspec > INT_MAX) { -+ rval = -ENOMEM; -+ goto out; -+ } -+ -+ spec = vzalloc(nspec * sizeof(struct dtrace_speculation)); -+ if (spec == NULL) { -+ rval = -ENOMEM; -+ goto out; -+ } -+ -+ state->dts_speculations = spec; -+ state->dts_nspeculations = (int)nspec; -+ -+ for (i = 0; i < nspec; i++) { -+ buf = vzalloc(bufsize); -+ if (buf == NULL) { -+ rval = -ENOMEM; -+ goto err; -+ } -+ -+ spec[i].dtsp_buffer = buf; -+ } -+ -+ if (opt[DTRACEOPT_GRABANON] != DTRACEOPT_UNSET) { -+ if (dtrace_anon.dta_state == NULL) { -+ rval = -ENOENT; -+ goto out; -+ } -+ -+ if (state->dts_necbs != 0) { -+ rval = -EALREADY; -+ goto out; -+ } -+ -+ state->dts_anon = dtrace_anon_grab(); -+ ASSERT(state->dts_anon != NULL); -+ state = state->dts_anon; -+ -+ /* -+ * We want "grabanon" to be set in the grabbed state, so we'll -+ * copy that option value from the grabbing state into the -+ * grabbed state. -+ */ -+ state->dts_options[DTRACEOPT_GRABANON] = -+ opt[DTRACEOPT_GRABANON]; -+ -+ *cpu = dtrace_anon.dta_beganon; -+ -+ /* -+ * If the anonymous state is active (as it almost certainly -+ * is if the anonymous enabling ultimately matched anything), -+ * we don't allow any further option processing -- but we -+ * don't return failure. -+ */ -+ if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) -+ goto out; -+ } -+ -+ if (opt[DTRACEOPT_AGGSIZE] != DTRACEOPT_UNSET && -+ opt[DTRACEOPT_AGGSIZE] != 0) { -+ if (state->dts_naggs == 0) { -+ /* -+ * We're not going to create an aggregation buffer -+ * because we don't have any ECBs that contain -+ * aggregations -- set this option to 0. -+ */ -+ opt[DTRACEOPT_AGGSIZE] = 0; -+ } else { -+ /* -+ * If we have an aggregation buffer, we must also have -+ * a buffer to use as scratch. -+ */ -+ if (opt[DTRACEOPT_BUFSIZE] == DTRACEOPT_UNSET || -+ opt[DTRACEOPT_BUFSIZE] < state->dts_needed) -+ opt[DTRACEOPT_BUFSIZE] = state->dts_needed; -+ } -+ } -+ -+ if (opt[DTRACEOPT_SPECSIZE] != DTRACEOPT_UNSET && -+ opt[DTRACEOPT_SPECSIZE] != 0) { -+ /* -+ * We are not going to create speculation buffers if we do not -+ * have any ECBs that actually speculate. -+ */ -+ if (!state->dts_speculates) -+ opt[DTRACEOPT_SPECSIZE] = 0; -+ } -+ -+ /* -+ * The bare minimum size for any buffer that we're actually going to -+ * do anything to is sizeof (uint64_t). -+ */ -+ sz = sizeof(uint64_t); -+ -+ if ((state->dts_needed != 0 && opt[DTRACEOPT_BUFSIZE] < sz) || -+ (state->dts_speculates && opt[DTRACEOPT_SPECSIZE] < sz) || -+ (state->dts_naggs != 0 && opt[DTRACEOPT_AGGSIZE] < sz)) { -+ /* -+ * A buffer size has been explicitly set to 0 (or to a size -+ * that will be adjusted to 0) and we need the space -- we -+ * need to return failure. We return -ENOSPC to differentiate -+ * it from failing to allocate a buffer due to failure to meet -+ * the reserve (for which we return -E2BIG). -+ */ -+ rval = -ENOSPC; -+ goto out; -+ } -+ -+ rval = dtrace_state_buffers(state); -+ if (rval != 0) -+ goto err; -+ -+ sz = opt[DTRACEOPT_DYNVARSIZE]; -+ if (sz == DTRACEOPT_UNSET) -+ sz = dtrace_dstate_defsize; -+ -+ do { -+ rval = dtrace_dstate_init(&state->dts_vstate.dtvs_dynvars, sz); -+ -+ if (rval == 0) -+ break; -+ -+ if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL) -+ goto err; -+ } while (sz >>= 1); -+ -+ opt[DTRACEOPT_DYNVARSIZE] = sz; -+ -+ if (rval != 0) -+ goto err; -+ -+ if (opt[DTRACEOPT_STATUSRATE] > dtrace_statusrate_max) -+ opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_max; -+ -+ if (opt[DTRACEOPT_CLEANRATE] == 0) -+ opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max; -+ -+ if (opt[DTRACEOPT_CLEANRATE] < dtrace_cleanrate_min) -+ opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_min; -+ -+ if (opt[DTRACEOPT_CLEANRATE] > dtrace_cleanrate_max) -+ opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max; -+ -+ hdlr.cyh_func = (cyc_func_t)dtrace_state_clean; -+ hdlr.cyh_arg = (uintptr_t)state; -+ hdlr.cyh_level = CY_LOW_LEVEL; -+ -+ when.cyt_when = ktime_set(0, 0); -+ when.cyt_interval = CY_INTERVAL_INF; -+ -+ state->dts_cleaner = cyclic_add(&hdlr, &when); -+ cyclic_reprogram(state->dts_cleaner, ns_to_ktime( -+ opt[DTRACEOPT_CLEANRATE])); -+ -+ hdlr.cyh_func = (cyc_func_t)dtrace_state_deadman; -+ hdlr.cyh_arg = (uintptr_t)state; -+ hdlr.cyh_level = CY_LOW_LEVEL; -+ -+ when.cyt_when = ktime_set(0, 0); -+ when.cyt_interval = dtrace_deadman_interval; -+ -+ state->dts_alive = state->dts_laststatus = dtrace_gethrtime(); -+ state->dts_deadman = cyclic_add(&hdlr, &when); -+ -+ state->dts_activity = DTRACE_ACTIVITY_WARMUP; -+ -+ /* -+ * Issue xcall even when the BEGIN probe fires on current CPU. The -+ * underlying implementation of SMP will turn it into direct function -+ * call. It is not allowed to turn off interrupts so we need to pick -+ * a cpu first and then xcall it. This way a begin probe will always -+ * fire on the expected cpu. -+ */ -+ *cpu = (cpuid == DTRACE_CPUALL) ? smp_processor_id() : cpuid; -+ dtrace_xcall(*cpu, (dtrace_xcall_t)dtrace_begin_probe, state); -+ -+ /* -+ * Regardless of whether or not now we're in ACTIVE or DRAINING, we -+ * want each CPU to transition its principal buffer out of the -+ * INACTIVE state. Doing this assures that no CPU will suddenly begin -+ * processing an ECB halfway down a probe's ECB chain; all CPUs will -+ * atomically transition from processing none of a state's ECBs to -+ * processing all of them. -+ */ -+ dtrace_xcall(DTRACE_CPUALL, (dtrace_xcall_t)dtrace_buffer_activate, -+ state); -+ goto out; -+ -+err: -+ dtrace_buffer_free(state->dts_buffer); -+ dtrace_buffer_free(state->dts_aggbuffer); -+ -+ nspec = state->dts_nspeculations; -+ if (nspec == 0) { -+ ASSERT(state->dts_speculations == NULL); -+ goto out; -+ } -+ -+ spec = state->dts_speculations; -+ ASSERT(spec != NULL); -+ -+ for (i = 0; i < state->dts_nspeculations; i++) { -+ buf = spec[i].dtsp_buffer; -+ if (buf == NULL) -+ break; -+ -+ dtrace_buffer_free(buf); -+ vfree(buf); -+ } -+ -+ vfree(spec); -+ state->dts_nspeculations = 0; -+ state->dts_speculations = NULL; -+ -+out: -+ mutex_unlock(&dtrace_lock); -+ mutex_unlock(&cpu_lock); -+ -+ return rval; -+} -+ -+static void dtrace_end_probe(struct dtrace_state *state) -+{ -+ dtrace_probe(dtrace_probeid_end, (uint64_t)(uintptr_t)state, 0, 0, 0, -+ 0, 0, 0); -+ -+ state->dts_activity = DTRACE_ACTIVITY_STOPPED; -+ -+ dtrace_membar_enter(); -+} -+ -+int dtrace_state_stop(struct dtrace_state *state, processorid_t *cpu) -+{ -+ processorid_t cpuid = state->dts_options[DTRACEOPT_CPU]; -+ -+ ASSERT(MUTEX_HELD(&dtrace_lock)); -+ -+ if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE && -+ state->dts_activity != DTRACE_ACTIVITY_DRAINING) -+ return -EINVAL; -+ -+ /* -+ * We'll set the activity to DTRACE_ACTIVITY_DRAINING, and issue a sync -+ * to be sure that every CPU has seen it. See below for the details -+ * on why this is done. -+ */ -+ state->dts_activity = DTRACE_ACTIVITY_DRAINING; -+ dtrace_sync(); -+ -+ /* -+ * By this point, it is impossible for any CPU to be still processing -+ * with DTRACE_ACTIVITY_ACTIVE. We can thus set our activity to -+ * DTRACE_ACTIVITY_COOLDOWN and know that we're not racing with any -+ * other CPU in dtrace_buffer_reserve(). This allows dtrace_probe() -+ * and callees to know that the activity is DTRACE_ACTIVITY_COOLDOWN -+ * iff we're in the END probe. -+ */ -+ state->dts_activity = DTRACE_ACTIVITY_COOLDOWN; -+ dtrace_sync(); -+ ASSERT(state->dts_activity == DTRACE_ACTIVITY_COOLDOWN); -+ -+ /* -+ * Finally, we can release the reserve and call the END probe. We -+ * disable interrupts across calling the END probe to allow us to -+ * return the CPU on which we actually called the END probe. This -+ * allows user-land to be sure that this CPU's principal buffer is -+ * processed last. -+ */ -+ state->dts_reserve = 0; -+ -+ /* -+ * Same as for BEGIN probe in dtrace_state_go(). The END probe must -+ * also fire on the enabled cpu. -+ */ -+ *cpu = (cpuid == DTRACE_CPUALL) ? smp_processor_id() : cpuid; -+ dtrace_xcall(*cpu, (dtrace_xcall_t)dtrace_end_probe, state); -+ -+ dtrace_sync(); -+ return 0; -+} -+ -+int dtrace_state_option(struct dtrace_state *state, dtrace_optid_t option, -+ dtrace_optval_t val) -+{ -+ ASSERT(MUTEX_HELD(&dtrace_lock)); -+ -+ if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) -+ return -EBUSY; -+ -+ if (option >= DTRACEOPT_MAX) -+ return -EINVAL; -+ -+ if (option != DTRACEOPT_CPU && val < 0) -+ return -EINVAL; -+ -+ switch (option) { -+ case DTRACEOPT_DESTRUCTIVE: -+ if (dtrace_destructive_disallow) -+ return -EACCES; -+ -+ state->dts_cred.dcr_destructive = 1; -+ break; -+ -+ case DTRACEOPT_BUFSIZE: -+ case DTRACEOPT_DYNVARSIZE: -+ case DTRACEOPT_AGGSIZE: -+ case DTRACEOPT_SPECSIZE: -+ case DTRACEOPT_STRSIZE: -+ if (val < 0) -+ return -EINVAL; -+ -+ /* -+ * If this is an otherwise negative value, set it to the -+ * highest multiple of 128m less than LONG_MAX. Technically, -+ * we're adjusting the size without regard to the buffer -+ * resizing policy, but in fact, this has no effect -- if we -+ * set the buffer size to ~LONG_MAX and the buffer policy is -+ * ultimately set to be "manual", the buffer allocation is -+ * guaranteed to fail, if only because the allocation requires -+ * two buffers. (We set the the size to the highest multiple -+ * of 128m because it ensures that the size will remain a -+ * multiple of a megabyte when repeatedly halved -- all the -+ * way down to 15m.) -+ */ -+ if (val >= LONG_MAX) -+ val = LONG_MAX - (1 << 27) + 1; -+ } -+ -+ state->dts_options[option] = val; -+ -+ return 0; -+} -+ -+void dtrace_state_destroy(struct dtrace_state *state) -+{ -+ struct dtrace_ecb *ecb; -+ struct dtrace_vstate *vstate = &state->dts_vstate; -+ int i; -+ struct dtrace_speculation *spec = state->dts_speculations; -+ int nspec = state->dts_nspeculations; -+ uint32_t match; -+ -+ ASSERT(MUTEX_HELD(&dtrace_lock)); -+ ASSERT(MUTEX_HELD(&cpu_lock)); -+ -+ /* -+ * First, retract any retained enablings for this state. -+ */ -+ dtrace_enabling_retract(state); -+ ASSERT(state->dts_nretained == 0); -+ -+ if (state->dts_activity == DTRACE_ACTIVITY_ACTIVE || -+ state->dts_activity == DTRACE_ACTIVITY_DRAINING) { -+ /* -+ * We have managed to come into dtrace_state_destroy() on a -+ * hot enabling -- almost certainly because of a disorderly -+ * shutdown of a consumer. (That is, a consumer that is -+ * exiting without having called dtrace_stop().) In this case, -+ * we're going to set our activity to be KILLED, and then -+ * issue a sync to be sure that everyone is out of probe -+ * context before we start blowing away ECBs. -+ */ -+ state->dts_activity = DTRACE_ACTIVITY_KILLED; -+ dtrace_sync(); -+ } -+ -+ /* -+ * Release the credential hold we took in dtrace_state_create(). -+ */ -+ if (state->dts_cred.dcr_cred != NULL) -+ put_cred(state->dts_cred.dcr_cred); -+ -+ /* -+ * Now we can safely disable and destroy any enabled probes. Because -+ * any DTRACE_PRIV_KERNEL probes may actually be slowing our progress -+ * (especially if they're all enabled), we take two passes through the -+ * ECBs: in the first, we disable just DTRACE_PRIV_KERNEL probes, and -+ * in the second we disable whatever is left over. -+ */ -+ for (match = DTRACE_PRIV_KERNEL; ; match = 0) { -+ for (i = 0; i < state->dts_necbs; i++) { -+ ecb = state->dts_ecbs[i]; -+ if (ecb == NULL) -+ continue; -+ -+ if (match && ecb->dte_probe != NULL) { -+ struct dtrace_probe *probe = -+ ecb->dte_probe; -+ struct dtrace_provider *prov = -+ probe->dtpr_provider; -+ -+ if (!(prov->dtpv_priv.dtpp_flags & match)) -+ continue; -+ } -+ -+ dtrace_ecb_disable(ecb); -+ dtrace_ecb_destroy(ecb); -+ } -+ -+ if (!match) -+ break; -+ } -+ -+ /* -+ * Before we free the buffers, perform one more sync to assure that -+ * every CPU is out of probe context. -+ */ -+ dtrace_sync(); -+ -+ dtrace_buffer_free(state->dts_buffer); -+ dtrace_buffer_free(state->dts_aggbuffer); -+ -+ for (i = 0; i < nspec; i++) -+ dtrace_buffer_free(spec[i].dtsp_buffer); -+ -+ if (state->dts_cleaner != CYCLIC_NONE) -+ cyclic_remove(state->dts_cleaner); -+ -+ if (state->dts_deadman != CYCLIC_NONE) -+ cyclic_remove(state->dts_deadman); -+ -+ dtrace_dstate_fini(&vstate->dtvs_dynvars); -+ dtrace_vstate_fini(vstate); -+ vfree(state->dts_ecbs); -+ -+ /* -+ * If there were aggregations allocated, they should have been cleaned -+ * up by now, so we can get rid of the idr. -+ */ -+ idr_destroy(&state->dts_agg_idr); -+ -+ vfree(state->dts_buffer); -+ vfree(state->dts_aggbuffer); -+ -+ for (i = 0; i < nspec; i++) -+ vfree(spec[i].dtsp_buffer); -+ -+ vfree(spec); -+ -+ dtrace_format_destroy(state); -+ -+ kfree(state); -+} -diff --git a/dtrace/dtrace_util.c b/dtrace/dtrace_util.c -new file mode 100644 -index 000000000000..b7fdaaadcb6b ---- /dev/null -+++ b/dtrace/dtrace_util.c -@@ -0,0 +1,283 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_util.c -+ * DESCRIPTION: DTrace - utility functions -+ * -+ * Copyright (c) 2010, 2018, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/dtrace_cpu.h> -+#include <linux/module.h> -+#include <linux/vmalloc.h> -+#include <asm/pgtable.h> -+ -+#include "dtrace.h" -+ -+int dtrace_isglob(const char *s) -+{ -+ char c; -+ -+ while ((c = *s++) != '\0') { -+ if (c == '[' || c == '?' || c == '*' || c == '\\') -+ return 1; -+ } -+ -+ return 0; -+} -+EXPORT_SYMBOL(dtrace_isglob); -+ -+int dtrace_gmatch(const char *s, const char *p) -+{ -+ const char *olds = s; -+ char sc; -+ char pc; -+ -+ sc = *s++; -+ pc = *p++; -+ -+ if (!pc) -+ return !sc; -+ -+ switch (pc) { -+ case '[': { -+ int ok = 0; -+ char lc = '\0'; -+ int inv = 0; -+ -+ if (!sc) -+ return 0; -+ -+ if (*p == '!') { -+ inv = 1; -+ p++; -+ } -+ -+ pc = *p++; -+ do { -+ if (pc == '-' && lc && *p != ']') { -+ pc = *p++; -+ if (pc == '\\') -+ pc = *p++; -+ -+ if (inv) { -+ if (sc < lc || sc > pc) -+ ok++; -+ else -+ return 0; -+ } else { -+ if (lc <= sc && sc <= pc) -+ ok++; -+ } -+ } else if (pc == '\\') { -+ pc = *p++; -+ } -+ -+ lc = sc; -+ -+ if (inv) { -+ if (sc != lc) -+ ok++; -+ else -+ return 0; -+ } else { -+ if (sc == lc) -+ ok++; -+ } -+ -+ pc = *p++; -+ } while (pc != ']'); -+ -+ return ok ? dtrace_gmatch(s, p) : 0; -+ } -+ case '\\': -+ pc = *p++; -+ if (!pc) -+ return 0; -+ -+ /* fall-through */ -+ default: -+ if (pc != sc) -+ return 0; -+ -+ /* fall-through */ -+ case '?': -+ return sc ? dtrace_gmatch(s, p) : 0; -+ case '*': -+ while (*p == '*') -+ p++; -+ -+ if (!*p) -+ return 1; -+ -+ s = olds; -+ while (*s) { -+ if (dtrace_gmatch(s, p)) -+ return 1; -+ -+ s++; -+ } -+ -+ return 0; -+ } -+} -+EXPORT_SYMBOL(dtrace_gmatch); -+ -+int dtrace_badattr(const struct dtrace_attribute *a) -+{ -+ return a->dtat_name > DTRACE_STABILITY_MAX || -+ a->dtat_data > DTRACE_STABILITY_MAX || -+ a->dtat_class > DTRACE_CLASS_MAX; -+} -+ -+/* -+ * Allocate a chunk of virtual memory in kernel space, and zero it out. This -+ * allocation might fail (which will report a backtrace in the kernel log, yet -+ * it is harmless). -+ */ -+void *dtrace_vzalloc_try(unsigned long size) -+{ -+ return __vmalloc(size, -+ GFP_NOWAIT | __GFP_FS | __GFP_IO | __GFP_NOMEMALLOC | -+ __GFP_NORETRY | __GFP_NOWARN | __GFP_ZERO, -+ PAGE_KERNEL); -+} -+EXPORT_SYMBOL(dtrace_vzalloc_try); -+ -+/* -+ * Return a duplicate copy of a string. If the specified string is NULL, this -+ * function returs a zero-length string. -+ */ -+char *dtrace_strdup(const char *str) -+{ -+ return kstrdup(str ? str : "", GFP_KERNEL); -+} -+ -+/* -+ * Compare two strings using safe loads. -+ */ -+int dtrace_strncmp(char *s1, char *s2, size_t limit) -+{ -+ uint8_t c1, c2; -+ volatile uint16_t *flags; -+ -+ if (s1 == s2 || limit == 0) -+ return 0; -+ -+ flags = (volatile uint16_t *)&this_cpu_core->cpuc_dtrace_flags; -+ -+ do { -+ if (s1 == NULL) -+ c1 = '\0'; -+ else -+ c1 = dtrace_load8((uintptr_t)s1++); -+ -+ if (s2 == NULL) -+ c2 = '\0'; -+ else -+ c2 = dtrace_load8((uintptr_t)s2++); -+ -+ if (c1 != c2) -+ return (c1 - c2); -+ } while (--limit && c1 != '\0' && !(*flags & CPU_DTRACE_FAULT)); -+ -+ return 0; -+} -+ -+/* -+ * Compute strlen(s) for a string using safe memory accesses. The additional -+ * len parameter is used to specify a maximum length to ensure completion. -+ */ -+size_t dtrace_strlen(const char *s, size_t lim) -+{ -+ uint_t len; -+ -+ for (len = 0; len != lim; len++) { -+ if (dtrace_load8((uintptr_t)s++) == '\0') -+ break; -+ } -+ -+ return len; -+} -+ -+#define DTRACE_ISALPHA(c) (((c) >= 'a' && (c) <= 'z') || \ -+ ((c) >= 'A' && (c) <= 'Z')) -+int dtrace_badname(const char *s) -+{ -+ char c; -+ -+ if (s == NULL) -+ return 0; -+ -+ c = *s++; -+ if (c == '\0') -+ return 0; -+ -+ if (!DTRACE_ISALPHA(c) && c != '-' && c != '_' && c != '.') -+ return 1; -+ -+ while ((c = *s++) != '\0') { -+ if (!DTRACE_ISALPHA(c) && (c < '0' || c > '9') && -+ c != '-' && c != '_' && c != '.' && c != '`') -+ return 1; -+ } -+ -+ return 0; -+} -+ -+void dtrace_cred2priv(const struct cred *cr, uint32_t *privp, kuid_t *uidp) -+{ -+#ifdef FIXME -+/* -+ * This should probably be rewritten based on capabilities in the struct cred. -+ */ -+ uint32_t priv; -+ -+ if (cr == NULL) -+ priv = DTRACE_PRIV_ALL; -+ else { -+ const struct cred *lcr = get_cred(cr); -+ -+ if (PRIV_POLICY_ONLY(lcr, PRIV_ALL, FALSE)) -+ priv = DTRACE_PRIV_ALL; -+ else { -+ *uidp = lcr->uid; -+ priv = 0; -+ -+ if (PRIV_POLICY_ONLY(lcr, PRIV_DTRACE_KERNEL, FALSE)) -+ priv |= DTRACE_PRIV_KERNEL | DTRACE_PRIV_USER; -+ else if (PRIV_POLICY_ONLY(lcr, PRIV_DTRACE_USER, -+ FALSE)) -+ priv |= DTRACE_PRIV_USER; -+ -+ if (PRIV_POLICY_ONLY(lcr, PRIV_DTRACE_PROC, FALSE)) -+ priv |= DTRACE_PRIV_PROC; -+ if (PRIV_POLICY_ONLY(lcr, PRIV_PROC_OWNER, FALSE)) -+ priv |= DTRACE_PRIV_OWNER; -+ } -+ -+ put_cred(cr); -+ } -+ -+ *privp = priv; -+#else -+ *privp = DTRACE_PRIV_ALL; -+ -+ if (cr != NULL) { -+ const struct cred *lcr = get_cred(cr); -+ -+ *uidp = lcr->uid; -+ put_cred(cr); -+ } -+#endif -+} -+ --- -2.27.0 - diff --git a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0006-dtrace-systrace-provider-core-components.patch b/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0006-dtrace-systrace-provider-core-components.patch deleted file mode 100644 index 93966fe3717c..000000000000 --- a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0006-dtrace-systrace-provider-core-components.patch +++ /dev/null @@ -1,327 +0,0 @@ -From 02b2a30f8c5af76595de98a3db4c4d79ea87e0d8 Mon Sep 17 00:00:00 2001 -From: Kris Van Hees <kris.van.hees@oracle.com> -Date: Mon, 19 Nov 2018 16:55:11 +0000 -Subject: [PATCH 06/20] dtrace: systrace provider core components - -This implements the core (linked-in) components of the DTrace systrace -provider, which intercepts system call invocations. As previously, -the arch-dependent pieces needed for x86 are also provided. - -Signed-off-by: Nick Alcock <nick.alcock@oracle.com> -Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com> -Signed-off-by: Tomas Jedlicka <tomas.jedlicka@oracle.com> -Signed-off-by: Eugene Loh <eugene.loh@oracle.com> -Signed-off-by: David Mc Lean <david.mclean@oracle.com> -Signed-off-by: Vincent Lim <vincent.lim@oracle.com> ---- - arch/x86/entry/syscall_32.c | 4 + - arch/x86/entry/syscall_64.c | 4 + - arch/x86/include/asm/dtrace_syscall.h | 3 + - arch/x86/include/asm/dtrace_syscall_types.h | 11 +++ - arch/x86/include/asm/syscall.h | 8 ++ - arch/x86/kernel/dtrace_syscall.c | 97 +++++++++++++++++++++ - arch/x86/kernel/dtrace_syscall_stubs.S | 0 - include/linux/dtrace_syscall.h | 60 +++++++++++++ - kernel/dtrace/Kconfig | 9 ++ - kernel/dtrace/Makefile | 1 + - 10 files changed, 197 insertions(+) - create mode 100644 arch/x86/include/asm/dtrace_syscall.h - create mode 100644 arch/x86/include/asm/dtrace_syscall_types.h - create mode 100644 arch/x86/kernel/dtrace_syscall.c - create mode 100644 arch/x86/kernel/dtrace_syscall_stubs.S - create mode 100644 include/linux/dtrace_syscall.h - -diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c -index 7d17b3addbbb..877a12180a17 100644 ---- a/arch/x86/entry/syscall_32.c -+++ b/arch/x86/entry/syscall_32.c -@@ -22,7 +22,11 @@ extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned lon - - #define __SYSCALL_I386(nr, sym, qual) [nr] = sym, - -+#if IS_ENABLED(CONFIG_DT_SYSTRACE) -+__visible sys_call_ptr_t ia32_sys_call_table[__NR_syscall_compat_max+1] = { -+#else - __visible const sys_call_ptr_t ia32_sys_call_table[__NR_syscall_compat_max+1] = { -+#endif /* CONFIG_DT_SYSTRACE || CONFIG_DT_SYSTRACE_MODULE */ - /* - * Smells like a compiler bug -- it doesn't work - * when the & below is removed. -diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c -index adf619a856e8..7359edbf1a15 100644 ---- a/arch/x86/entry/syscall_64.c -+++ b/arch/x86/entry/syscall_64.c -@@ -24,7 +24,11 @@ SYSCALL_DEFINE0(ni_syscall) - #define __SYSCALL_64(nr, sym, qual) [nr] = sym, - #define __SYSCALL_X32(nr, sym, qual) - -+#if IS_ENABLED(CONFIG_DT_SYSTRACE) -+asmlinkage sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { -+#else - asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { -+#endif /* CONFIG_DT_SYSTRACE || CONFIG_DT_SYSTRACE_MODULE */ - /* - * Smells like a compiler bug -- it doesn't work - * when the & below is removed. -diff --git a/arch/x86/include/asm/dtrace_syscall.h b/arch/x86/include/asm/dtrace_syscall.h -new file mode 100644 -index 000000000000..402826562478 ---- /dev/null -+++ b/arch/x86/include/asm/dtrace_syscall.h -@@ -0,0 +1,3 @@ -+/* -+ * Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved. -+ */ -diff --git a/arch/x86/include/asm/dtrace_syscall_types.h b/arch/x86/include/asm/dtrace_syscall_types.h -new file mode 100644 -index 000000000000..2b3ee563ad14 ---- /dev/null -+++ b/arch/x86/include/asm/dtrace_syscall_types.h -@@ -0,0 +1,11 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+#include <linux/types.h> -+#include <linux/dtrace_types.h> -+ -+typedef asmlinkage long (*dt_sys_call_t)(const struct pt_regs *regs); -+ -+#define DTRACE_SYSCALL_WRAP_PREFIX "__x64_" -diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h -index 8db3fdb6102e..120260898c79 100644 ---- a/arch/x86/include/asm/syscall.h -+++ b/arch/x86/include/asm/syscall.h -@@ -24,7 +24,11 @@ typedef asmlinkage long (*sys_call_ptr_t)(unsigned long, unsigned long, - unsigned long, unsigned long, - unsigned long, unsigned long); - #endif /* CONFIG_X86_64 */ -+#if IS_ENABLED(CONFIG_DT_SYSTRACE) -+extern sys_call_ptr_t sys_call_table[]; -+#else - extern const sys_call_ptr_t sys_call_table[]; -+#endif - - #if defined(CONFIG_X86_32) - #define ia32_sys_call_table sys_call_table -@@ -33,8 +37,12 @@ extern const sys_call_ptr_t sys_call_table[]; - #endif - - #if defined(CONFIG_IA32_EMULATION) -+#if IS_ENABLED(CONFIG_DT_SYSTRACE) -+extern sys_call_ptr_t ia32_sys_call_table[]; -+#else - extern const sys_call_ptr_t ia32_sys_call_table[]; - #endif -+#endif - - #ifdef CONFIG_X86_X32_ABI - extern const sys_call_ptr_t x32_sys_call_table[]; -diff --git a/arch/x86/kernel/dtrace_syscall.c b/arch/x86/kernel/dtrace_syscall.c -new file mode 100644 -index 000000000000..42153b0261f0 ---- /dev/null -+++ b/arch/x86/kernel/dtrace_syscall.c -@@ -0,0 +1,97 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_syscall.c -+ * DESCRIPTION: Dynamic Tracing: system call tracing support (arch-specific) -+ * -+ * Copyright (C) 2010-2018 Oracle Corporation -+ */ -+ -+#include <linux/dtrace_cpu.h> -+#include <linux/dtrace_os.h> -+#include <linux/dtrace_syscall.h> -+#include <linux/fs.h> -+#include <linux/module.h> -+#include <linux/namei.h> -+#include <linux/sched.h> -+#include <asm/insn.h> -+#include <asm/stacktrace.h> -+#include <asm/syscalls.h> -+ -+/* -+ * SYSTEM CALL TRACING SUPPORT -+ */ -+void (*systrace_probe)(dtrace_id_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, -+ uintptr_t, uintptr_t, uintptr_t); -+ -+void systrace_stub(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, -+ uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, -+ uintptr_t arg5, uintptr_t arg6) -+{ -+} -+ -+asmlinkage long systrace_syscall(const struct pt_regs *regs); -+ -+asmlinkage long dtrace_stub_ptregs(uintptr_t, uintptr_t, uintptr_t, uintptr_t, -+ uintptr_t, uintptr_t, uintptr_t); -+ -+static struct systrace_info systrace_info = -+{ -+ &systrace_probe, -+ systrace_stub, -+ systrace_syscall, -+ {}, -+ { -+#define __SYSCALL_64(nr, sym, compat) [nr] { __stringify(sym), }, -+#define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat) -+#define __SYSCALL_X32(nt, sym, compat) -+#include <asm/syscalls_64.h> -+ } -+}; -+ -+asmlinkage long systrace_syscall(const struct pt_regs *regs) -+{ -+ long rc = 0; -+ unsigned long sysnum; -+ dtrace_id_t id; -+ struct dtrace_syscalls *sc; -+ -+ sysnum = syscall_get_nr(current, (struct pt_regs *) regs); -+ sc = &systrace_info.sysent[sysnum]; -+ -+ /* -+ * Note: 64-bit syscall-specific. -+ */ -+ id = sc->stsy_entry; -+ if (id != DTRACE_IDNONE) -+ (*systrace_probe)(id, regs->di, regs->si, regs->dx, -+ regs->r10, regs->r8, regs->r9, 0); -+ -+ /* -+ * FIXME: Add stop functionality for DTrace. -+ */ -+ -+ if (sc->stsy_underlying != NULL) -+ rc = (*sc->stsy_underlying)(regs); -+ -+ id = sc->stsy_return; -+ if (id != DTRACE_IDNONE) -+ (*systrace_probe)(id, (uintptr_t)rc, (uintptr_t)rc, -+ (uintptr_t)((uint64_t)rc >> 32), 0, 0, 0, 0); -+ -+ return rc; -+} -+ -+struct systrace_info *dtrace_syscalls_init(void) -+{ -+ int i; -+ -+ for (i = 0; i < NR_syscalls; i++) { -+ systrace_info.sysent[i].stsy_tblent = -+ (dt_sys_call_t *)&sys_call_table[i]; -+ systrace_info.sysent[i].stsy_underlying = -+ (dt_sys_call_t)sys_call_table[i]; -+ } -+ -+ return &systrace_info; -+} -+EXPORT_SYMBOL(dtrace_syscalls_init); -diff --git a/arch/x86/kernel/dtrace_syscall_stubs.S b/arch/x86/kernel/dtrace_syscall_stubs.S -new file mode 100644 -index 000000000000..e69de29bb2d1 -diff --git a/include/linux/dtrace_syscall.h b/include/linux/dtrace_syscall.h -new file mode 100644 -index 000000000000..7f9e351f3783 ---- /dev/null -+++ b/include/linux/dtrace_syscall.h -@@ -0,0 +1,60 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+#ifndef _LINUX_DTRACE_SYSCALL_H_ -+#define _LINUX_DTRACE_SYSCALL_H_ -+ -+#include <linux/types.h> -+#include <linux/dtrace_os.h> -+#include <asm/syscall.h> -+ -+#define DTRACE_SYSCALL_STUB(t, n) SCE_##t, -+enum dtrace_sce_id { -+ SCE_NONE = 0, -+#include <asm/dtrace_syscall.h> -+ SCE_nr_stubs -+}; -+#undef DTRACE_SYSCALL_STUB -+ -+#define DTRACE_SYSCALL_STUB(t, n) \ -+ asmlinkage long dtrace_stub_##n(uintptr_t, uintptr_t, uintptr_t, \ -+ uintptr_t, uintptr_t, uintptr_t, \ -+ uintptr_t); -+#include <asm/dtrace_syscall.h> -+#undef DTRACE_SYSCALL_STUB -+ -+#ifndef CONFIG_ARCH_HAS_SYSCALL_WRAPPER -+typedef asmlinkage long (*dt_sys_call_t)(uintptr_t, uintptr_t, uintptr_t, -+ uintptr_t, uintptr_t, uintptr_t, -+ uintptr_t); -+ -+#define DTRACE_SYSCALL_WRAP_PREFIX "" -+#else -+#include <asm/dtrace_syscall_types.h> -+#endif -+ -+struct dtrace_syscalls { -+ const char *name; -+ dtrace_id_t stsy_entry; -+ dtrace_id_t stsy_return; -+ dt_sys_call_t stsy_underlying; -+ dt_sys_call_t *stsy_tblent; -+}; -+ -+typedef void (*dtrace_systrace_probe_t)(dtrace_id_t, uintptr_t, uintptr_t, -+ uintptr_t, uintptr_t, uintptr_t, -+ uintptr_t, uintptr_t); -+ -+struct systrace_info { -+ dtrace_systrace_probe_t *probep; -+ dtrace_systrace_probe_t stub; -+ dt_sys_call_t syscall; -+ dt_sys_call_t stubs[SCE_nr_stubs]; -+ struct dtrace_syscalls sysent[NR_syscalls]; -+}; -+ -+extern struct systrace_info *dtrace_syscalls_init(void); -+ -+#endif /* _LINUX_DTRACE_SYSCALL_H_ */ -diff --git a/kernel/dtrace/Kconfig b/kernel/dtrace/Kconfig -index 854e4411343f..d04ca0ab7ac9 100644 ---- a/kernel/dtrace/Kconfig -+++ b/kernel/dtrace/Kconfig -@@ -23,6 +23,15 @@ config DT_CORE - - if DT_CORE - -+config DT_SYSTRACE -+ tristate "System Call Tracing" -+ default m -+ select FTRACE -+ select FTRACE_SYSCALLS -+ help -+ Provides DTrace probes at the entry and exit of all system calls, -+ in the syscall provider. -+ - config DT_DT_TEST - tristate "DTrace Test Probe" - default m -diff --git a/kernel/dtrace/Makefile b/kernel/dtrace/Makefile -index 872785327c3d..68fc3861e5d1 100644 ---- a/kernel/dtrace/Makefile -+++ b/kernel/dtrace/Makefile -@@ -3,6 +3,7 @@ - # - - DT_CORE_ARCH_OBJS = $(addprefix ../../arch/$(SRCARCH)/kernel/, \ -+ dtrace_syscall.o dtrace_syscall_stubs.o \ - dtrace_util.o) - - ifdef CONFIG_DT_CORE --- -2.27.0 - diff --git a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0007-dtrace-systrace-provider.patch b/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0007-dtrace-systrace-provider.patch deleted file mode 100644 index 4a7e6937a3af..000000000000 --- a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0007-dtrace-systrace-provider.patch +++ /dev/null @@ -1,374 +0,0 @@ -From 02d8ef998efdbbf220bd710602c29eb690eeef61 Mon Sep 17 00:00:00 2001 -From: Kris Van Hees <kris.van.hees@oracle.com> -Date: Mon, 19 Nov 2018 17:58:09 +0000 -Subject: [PATCH 07/20] dtrace: systrace provider - -This implements the DTrace systrace provider, which intercepts system -call invocations. - -Signed-off-by: Nick Alcock <nick.alcock@oracle.com> -Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com> -Signed-off-by: Tomas Jedlicka <tomas.jedlicka@oracle.com> -Signed-off-by: Eugene Loh <eugene.loh@oracle.com> -Signed-off-by: David Mc Lean <david.mclean@oracle.com> -Signed-off-by: Vincent Lim <vincent.lim@oracle.com> ---- - dtrace/Makefile | 2 + - dtrace/systrace.h | 33 +++++++ - dtrace/systrace_dev.c | 224 ++++++++++++++++++++++++++++++++++++++++++ - dtrace/systrace_mod.c | 52 ++++++++++ - 4 files changed, 311 insertions(+) - create mode 100644 dtrace/systrace.h - create mode 100644 dtrace/systrace_dev.c - create mode 100644 dtrace/systrace_mod.c - -diff --git a/dtrace/Makefile b/dtrace/Makefile -index 36a4b97b922c..b91bc69d3802 100644 ---- a/dtrace/Makefile -+++ b/dtrace/Makefile -@@ -3,6 +3,7 @@ - # - - obj-$(CONFIG_DT_CORE) += dtrace.o -+obj-$(CONFIG_DT_SYSTRACE) += systrace.o - obj-$(CONFIG_DT_DT_TEST) += dt_test.o - - dtrace-y := dtrace_mod.o dtrace_dev.o \ -@@ -14,6 +15,7 @@ dtrace-y := dtrace_mod.o dtrace_dev.o \ - dtrace_probe.o dtrace_probe_ctx.o \ - dtrace_ptofapi.o dtrace_predicate.o \ - dtrace_spec.o dtrace_state.o dtrace_util.o -+systrace-y := systrace_mod.o systrace_dev.o - dt_test-y := dt_test_mod.o dt_test_dev.o - - -include arch/$(SRCARCH)/dtrace/Makefile.arch -diff --git a/dtrace/systrace.h b/dtrace/systrace.h -new file mode 100644 -index 000000000000..832bc613b966 ---- /dev/null -+++ b/dtrace/systrace.h -@@ -0,0 +1,33 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Dynamic Tracing for Linux - syscall tracing provider -+ * -+ * Copyright (c) 2011, 2017, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#ifndef _SYSTRACE_H_ -+#define _SYSTRACE_H_ -+ -+#include "dtrace.h" -+ -+extern void systrace_provide(void *, const struct dtrace_probedesc *); -+extern int systrace_enable(void *arg, dtrace_id_t, void *); -+extern void systrace_disable(void *arg, dtrace_id_t, void *); -+extern void systrace_destroy(void *, dtrace_id_t, void *); -+ -+extern dtrace_provider_id_t syscall_id; -+ -+extern int syscall_dev_init(void); -+extern void syscall_dev_exit(void); -+ -+#endif /* _SYSTRACE_H_ */ -diff --git a/dtrace/systrace_dev.c b/dtrace/systrace_dev.c -new file mode 100644 -index 000000000000..2ff3ba4329a5 ---- /dev/null -+++ b/dtrace/systrace_dev.c -@@ -0,0 +1,224 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: systrace_dev.c -+ * DESCRIPTION: DTrace - systrace provider device driver -+ * -+ * Copyright (c) 2010, 2018, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/dtrace_syscall.h> -+#include <linux/fs.h> -+#include <linux/miscdevice.h> -+#include <asm/unistd.h> -+ -+#include "dtrace.h" -+#include "dtrace_dev.h" -+#include "systrace.h" -+ -+#define SYSTRACE_ARTIFICIAL_FRAMES 1 -+ -+#define SYSTRACE_SHIFT 16 -+#define SYSTRACE_ENTRY(id) ((1 << SYSTRACE_SHIFT) | (id)) -+#define SYSTRACE_RETURN(id) (id) -+#define SYSTRACE_SYSNUM(x) ((int)(x) & ((1 << SYSTRACE_SHIFT) - 1)) -+#define SYSTRACE_ISENTRY(x) ((int)(x) >> SYSTRACE_SHIFT) -+ -+#if ((1 << SYSTRACE_SHIFT) <= NR_syscalls) -+# error 1 << SYSTRACE_SHIFT must exceed number of system calls -+#endif -+ -+static struct systrace_info *systrace_info; -+ -+void systrace_provide(void *arg, const struct dtrace_probedesc *desc) -+{ -+ int failed_count = 0; -+ int i; -+ -+ ASSERT(systrace_info != NULL); -+ -+ if (desc != NULL) -+ return; -+ -+ for (i = 0; i < NR_syscalls; i++) { -+ const char *nm = systrace_info->sysent[i].name; -+ dtrace_id_t id; -+ int sz; -+ size_t wrap_len; -+ -+ if (nm == NULL) -+ continue; -+ -+ if (systrace_info->sysent[i].stsy_underlying == NULL) -+ continue; -+ -+ wrap_len = strlen(DTRACE_SYSCALL_WRAP_PREFIX); -+ sz = strlen(nm); -+ if (sz > wrap_len && -+ memcmp(nm, DTRACE_SYSCALL_WRAP_PREFIX, wrap_len) == 0) { -+ nm += wrap_len; -+ sz -= wrap_len; -+ } -+ if (sz > 4 && memcmp(nm, "sys_", 4) == 0) -+ nm += 4; -+ else if (sz > 5 && memcmp(nm, "stub_", 5) == 0) -+ nm += 5; -+ -+ id = dtrace_probe_lookup(syscall_id, dtrace_kmod->name, nm, -+ "entry"); -+ if (id == DTRACE_IDNONE) { -+ id = dtrace_probe_create(syscall_id, dtrace_kmod->name, -+ nm, "entry", -+ SYSTRACE_ARTIFICIAL_FRAMES, -+ (void *)((uintptr_t)SYSTRACE_ENTRY(i))); -+ if (id == DTRACE_IDNONE) -+ failed_count++; -+ -+ systrace_info->sysent[i].stsy_entry = DTRACE_IDNONE; -+ } -+ -+ id = dtrace_probe_lookup(syscall_id, dtrace_kmod->name, nm, -+ "return"); -+ if (id == DTRACE_IDNONE) { -+ id = dtrace_probe_create(syscall_id, dtrace_kmod->name, -+ nm, "return", -+ SYSTRACE_ARTIFICIAL_FRAMES, -+ (void *)((uintptr_t)SYSTRACE_RETURN(i))); -+ if (id == DTRACE_IDNONE) -+ failed_count++; -+ -+ systrace_info->sysent[i].stsy_return = DTRACE_IDNONE; -+ } -+ } -+ -+ if (failed_count > 0) -+ pr_warn("systrace: Failed to provide %d probes (out of memory)\n", failed_count); -+} -+ -+static dt_sys_call_t get_intercept(int sysnum) -+{ -+ switch (sysnum) { -+ default: -+ return systrace_info->syscall; -+#define DTRACE_SYSCALL_STUB(t, n) \ -+ case __NR_##n: \ -+ return systrace_info->stubs[SCE_##t]; -+#include <asm/dtrace_syscall.h> -+#undef DTRACE_SYSCALL_STUB -+ } -+} -+ -+int systrace_enable(void *arg, dtrace_id_t id, void *parg) -+{ -+ int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); -+ struct dtrace_syscalls *sc = &systrace_info->sysent[sysnum]; -+ int enabled = sc->stsy_entry != DTRACE_IDNONE || -+ sc->stsy_return != DTRACE_IDNONE; -+ dt_sys_call_t intercept = get_intercept(sysnum); -+ -+ if (!enabled) { -+ if (cmpxchg(sc->stsy_tblent, sc->stsy_underlying, -+ intercept) != sc->stsy_underlying) -+ return 1; -+ } else -+ ASSERT(*sc->stsy_tblent == intercept); -+ -+ if (SYSTRACE_ISENTRY((uintptr_t)parg)) -+ sc->stsy_entry = id; -+ else -+ sc->stsy_return = id; -+ -+ return 0; -+} -+ -+void systrace_disable(void *arg, dtrace_id_t id, void *parg) -+{ -+ int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); -+ struct dtrace_syscalls *sc = &systrace_info->sysent[sysnum]; -+ int enabled = -+ (sc->stsy_entry != DTRACE_IDNONE ? 1 : 0) + -+ (sc->stsy_return != DTRACE_IDNONE ? 1 : 0); -+ dt_sys_call_t intercept = get_intercept(sysnum); -+ -+ /* -+ * Every syscall can have 2 probes associated with it. We need to keep -+ * the interceptor in place until the last probe is getting disabled. -+ */ -+ if (enabled == 1) -+ (void)cmpxchg(sc->stsy_tblent, intercept, sc->stsy_underlying); -+ -+ if (SYSTRACE_ISENTRY((uintptr_t)parg)) -+ sc->stsy_entry = DTRACE_IDNONE; -+ else -+ sc->stsy_return = DTRACE_IDNONE; -+} -+ -+void systrace_destroy(void *arg, dtrace_id_t id, void *parg) -+{ -+ int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); -+ -+ /* -+ * Nothing to be done here - just ensure our probe has been disabled. -+ */ -+ if (SYSTRACE_ISENTRY((uintptr_t)parg)) -+ ASSERT(systrace_info->sysent[sysnum].stsy_entry == -+ DTRACE_IDNONE); -+ else -+ ASSERT(systrace_info->sysent[sysnum].stsy_return == -+ DTRACE_IDNONE); -+} -+ -+static int systrace_open(struct inode *inode, struct file *file) -+{ -+ return -EAGAIN; -+} -+ -+static int systrace_close(struct inode *inode, struct file *file) -+{ -+ return 0; -+} -+ -+static const struct file_operations systrace_fops = { -+ .owner = THIS_MODULE, -+ .open = systrace_open, -+ .release = systrace_close, -+}; -+ -+static struct miscdevice systrace_dev = { -+ .minor = DT_DEV_SYSTRACE_MINOR, -+ .name = "systrace", -+ .nodename = "dtrace/provider/systrace", -+ .fops = &systrace_fops, -+}; -+ -+int syscall_dev_init(void) -+{ -+ int ret = 0; -+ -+ systrace_info = dtrace_syscalls_init(); -+ -+ ret = misc_register(&systrace_dev); -+ if (ret) -+ pr_err("%s: Can't register misc device %d\n", -+ systrace_dev.name, systrace_dev.minor); -+ -+ *(systrace_info->probep) = (dtrace_systrace_probe_t)dtrace_probe; -+ -+ return ret; -+} -+ -+void syscall_dev_exit(void) -+{ -+ *(systrace_info->probep) = systrace_info->stub; -+ -+ misc_deregister(&systrace_dev); -+} -diff --git a/dtrace/systrace_mod.c b/dtrace/systrace_mod.c -new file mode 100644 -index 000000000000..d286f7d9d47a ---- /dev/null -+++ b/dtrace/systrace_mod.c -@@ -0,0 +1,52 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: systrace_mod.c -+ * DESCRIPTION: DTrace - systrace provider kernel module -+ * -+ * Copyright (c) 2010, 2017, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/module.h> -+ -+#include "dtrace.h" -+#include "dtrace_dev.h" -+#include "systrace.h" -+ -+MODULE_AUTHOR("Kris Van Hees (kris.van.hees@oracle.com)"); -+MODULE_DESCRIPTION("System Call Tracing"); -+MODULE_VERSION("v0.1"); -+MODULE_LICENSE("GPL"); -+ -+static const struct dtrace_pattr syscall_attr = { -+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, -+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, -+}; -+ -+static struct dtrace_pops syscall_pops = { -+ .dtps_provide = systrace_provide, -+ .dtps_provide_module = NULL, -+ .dtps_destroy_module = NULL, -+ .dtps_enable = systrace_enable, -+ .dtps_disable = systrace_disable, -+ .dtps_suspend = NULL, -+ .dtps_resume = NULL, -+ .dtps_getargdesc = NULL, -+ .dtps_getargval = NULL, -+ .dtps_usermode = NULL, -+ .dtps_destroy = systrace_destroy -+}; -+ -+DT_PROVIDER_MODULE(syscall, DTRACE_PRIV_USER) --- -2.27.0 - diff --git a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0008-dtrace-sdt-provider-core-components.patch b/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0008-dtrace-sdt-provider-core-components.patch deleted file mode 100644 index 08454652f1c2..000000000000 --- a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0008-dtrace-sdt-provider-core-components.patch +++ /dev/null @@ -1,3113 +0,0 @@ -From 5fe6e712052e4a5bc2d01c345be6545f80c8b2cb Mon Sep 17 00:00:00 2001 -From: Kris Van Hees <kris.van.hees@oracle.com> -Date: Mon, 19 Nov 2018 22:03:10 +0000 -Subject: [PATCH 08/20] dtrace: sdt provider core components - -This implements the core (linked-in) machinery needed for SDT -tracepoints: - - - generate empty stub function calls __dtrace_probe_* for each probe - point and perf-event probe point, and record their section-relative - offset in tables in special symbols in the output; calls to - is-enabling probes (conditionals of the form - if (DTRACE_FOO_ENABLED(probe-name))) are translated as well - - similarly record the names and types of arguments to probes in - special sections - - parse both of these at load time, and substitute in nops over the top - of the stub functions, remembering their locations: is-enabled probes - get 0-returns patched over the top - - on probe enabling, patch invalid-operation traps over the top of - those stub functions; handle these by calling the probe, then return - as if the trap had never happened - -The provider module itself is added in the next commit. - -Signed-off-by: Nick Alcock <nick.alcock@oracle.com> -Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com> -Signed-off-by: Tomas Jedlicka <tomas.jedlicka@oracle.com> -Signed-off-by: Eugene Loh <eugene.loh@oracle.com> -Signed-off-by: David Mc Lean <david.mclean@oracle.com> -Signed-off-by: Vincent Lim <vincent.lim@oracle.com> ---- - .gitignore | 6 + - Makefile | 1 + - arch/x86/dtrace/include/dtrace/sdt_arch.h | 28 ++ - arch/x86/include/asm/dtrace_arch.h | 8 +- - arch/x86/include/asm/dtrace_sdt_arch.h | 11 + - arch/x86/include/asm/dtrace_util.h | 13 + - arch/x86/include/asm/spinlock.h | 1 + - arch/x86/include/asm/text-patching.h | 1 + - arch/x86/kernel/alternative.c | 2 +- - arch/x86/kernel/dtrace_sdt.c | 75 +++ - arch/x86/kernel/dtrace_util.c | 214 +++++++- - arch/x86/kernel/vmlinux.lds.S | 3 +- - include/asm-generic/vmlinux.lds.h | 16 + - include/linux/dtrace_os.h | 3 + - include/linux/dtrace_sdt.h | 32 ++ - include/linux/module.h | 5 + - include/linux/sdt.h | 191 +++++++ - include/linux/sdt_internal.h | 276 ++++++++++ - include/linux/tracepoint.h | 8 +- - kernel/dtrace/Kconfig | 16 + - kernel/dtrace/Makefile | 3 +- - kernel/dtrace/dtrace_os.c | 83 +++ - kernel/dtrace/dtrace_sdt_core.c | 364 ++++++++++++++ - kernel/module.c | 14 + - scripts/.gitignore | 1 + - scripts/Makefile | 7 + - scripts/Makefile.modfinal | 49 +- - scripts/dtrace_sdt.sh | 588 ++++++++++++++++++++++ - scripts/kmodsdt.c | 410 +++++++++++++++ - scripts/link-vmlinux.sh | 69 ++- - scripts/mod/modpost.c | 19 +- - 31 files changed, 2497 insertions(+), 20 deletions(-) - create mode 100644 arch/x86/dtrace/include/dtrace/sdt_arch.h - create mode 100644 arch/x86/include/asm/dtrace_sdt_arch.h - create mode 100644 arch/x86/kernel/dtrace_sdt.c - create mode 100644 include/linux/dtrace_sdt.h - create mode 100644 include/linux/sdt.h - create mode 100644 include/linux/sdt_internal.h - create mode 100644 kernel/dtrace/dtrace_sdt_core.c - create mode 100755 scripts/dtrace_sdt.sh - create mode 100644 scripts/kmodsdt.c - -diff --git a/.gitignore b/.gitignore -index 33bd38f6cdb0..4f7a1e69b813 100644 ---- a/.gitignore -+++ b/.gitignore -@@ -149,3 +149,9 @@ x509.genkey - - # Clang's compilation database file - /compile_commands.json -+ -+# -+# Generated DTrace SDT files -+# -+*.sdtinfo.c -+*.sdtstub.S -diff --git a/Makefile b/Makefile -index dc05d9af9d1b..2b3a60d2d18b 100644 ---- a/Makefile -+++ b/Makefile -@@ -1770,6 +1770,7 @@ clean: $(clean-dirs) - -o -name '*.dtb' -o -name '*.dtb.S' -o -name '*.dt.yaml' \ - -o -name '*.dwo' -o -name '*.lst' \ - -o -name '*.su' -o -name '*.mod' \ -+ -o -name '*.sdtinfo.c' -o -name '*.sdtstub.S' \ - -o -name '.*.d' -o -name '.*.tmp' -o -name '*.mod.c' \ - -o -name '*.lex.c' -o -name '*.tab.[ch]' \ - -o -name '*.asn1.[ch]' \ -diff --git a/arch/x86/dtrace/include/dtrace/sdt_arch.h b/arch/x86/dtrace/include/dtrace/sdt_arch.h -new file mode 100644 -index 000000000000..d8616b44079b ---- /dev/null -+++ b/arch/x86/dtrace/include/dtrace/sdt_arch.h -@@ -0,0 +1,28 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Dynamic Tracing for Linux - SDT Implementation defines -+ * -+ * Copyright (c) 2009, 2015, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _X86_64_SDT_ARCH_H -+#define _X86_64_SDT_ARCH_H -+ -+#define SDT_AFRAMES 4 -+ -+#endif /* _X86_64_SDT_ARCH_H */ -diff --git a/arch/x86/include/asm/dtrace_arch.h b/arch/x86/include/asm/dtrace_arch.h -index 74e27f08a873..88beb8e4db0b 100644 ---- a/arch/x86/include/asm/dtrace_arch.h -+++ b/arch/x86/include/asm/dtrace_arch.h -@@ -12,16 +12,22 @@ - - typedef uint8_t asm_instr_t; - -+#define ASM_CALL_SIZE 5 -+ - typedef int (*prov_exit_f)(void); - - /* - * Structure to hold DTrace specific information about modules (including the - * core kernel module). Note that each module (and the main kernel) already -- * has one field that relates to probing: -+ * has three fields that relate to probing: -+ * - sdt_probes: description of SDT probes in the module -+ * - sdt_probec: number of SDT probes in the module - * - pdata: pointer to a dtrace_module struct (for DTrace) - */ - struct dtrace_module { - int enabled_cnt; -+ size_t sdt_probe_cnt; -+ size_t fbt_probe_cnt; - prov_exit_f prov_exit; /* Called with module_mutex held */ - }; - -diff --git a/arch/x86/include/asm/dtrace_sdt_arch.h b/arch/x86/include/asm/dtrace_sdt_arch.h -new file mode 100644 -index 000000000000..59f57cb489ab ---- /dev/null -+++ b/arch/x86/include/asm/dtrace_sdt_arch.h -@@ -0,0 +1,11 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+ -+/* Copyright (C) 2016 Oracle, Inc. */ -+ -+#ifndef _X86_DTRACE_SDT_ARCH_H -+#define _X86_DTRACE_SDT_ARCH_H -+ -+#define __DTRACE_SDT_ISENABLED_PROTO void -+#define __DTRACE_SDT_ISENABLED_ARGS -+ -+#endif /* _X86_DTRACE_SDT_ARCH_H */ -diff --git a/arch/x86/include/asm/dtrace_util.h b/arch/x86/include/asm/dtrace_util.h -index 4d9843bbc95b..ce28bf42a59d 100644 ---- a/arch/x86/include/asm/dtrace_util.h -+++ b/arch/x86/include/asm/dtrace_util.h -@@ -6,11 +6,24 @@ - #ifndef _X86_DTRACE_UTIL_H - #define _X86_DTRACE_UTIL_H - -+#define DTRACE_INVOP_NOPS 0x0f /* 5-byte NOP sequence */ -+#define DTRACE_INVOP_MOV_RSP_RBP 0x48 /* mov %rsp, %rbp = 48 89 e5 */ -+#define DTRACE_INVOP_PUSH_BP 0x55 /* push %rbp = 55 */ -+#define DTRACE_INVOP_NOP 0x90 /* nop = 90 */ -+#define DTRACE_INVOP_LEAVE 0xc9 /* leave = c9 */ -+#define DTRACE_INVOP_RET 0xc3 /* ret = c3 */ -+ - #ifndef __ASSEMBLY__ - - #include <asm/dtrace_arch.h> - #include <asm/ptrace.h> - -+extern int dtrace_invop_add(uint8_t (*func)(struct pt_regs *)); -+extern void dtrace_invop_remove(uint8_t (*func)(struct pt_regs *)); -+ -+extern void dtrace_invop_enable(asm_instr_t *, asm_instr_t); -+extern void dtrace_invop_disable(asm_instr_t *, asm_instr_t); -+ - #endif - - #endif /* _X86_DTRACE_UTIL_H */ -diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h -index 5b6bc7016c22..79e017f90c60 100644 ---- a/arch/x86/include/asm/spinlock.h -+++ b/arch/x86/include/asm/spinlock.h -@@ -9,6 +9,7 @@ - #include <linux/compiler.h> - #include <asm/paravirt.h> - #include <asm/bitops.h> -+#include <linux/sdt.h> - - /* - * Your basic SMP spinlocks, allowing only a single CPU anywhere -diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h -index 67315fa3956a..14ce162a91e6 100644 ---- a/arch/x86/include/asm/text-patching.h -+++ b/arch/x86/include/asm/text-patching.h -@@ -25,6 +25,7 @@ static inline void apply_paravirt(struct paravirt_patch_site *start, - */ - #define POKE_MAX_OPCODE_SIZE 5 - -+extern void add_nops(void *insns, unsigned int len); - extern void text_poke_early(void *addr, const void *opcode, size_t len); - - /* -diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c -index 15ac0d5f4b40..c36f31259194 100644 ---- a/arch/x86/kernel/alternative.c -+++ b/arch/x86/kernel/alternative.c -@@ -254,7 +254,7 @@ void __init arch_init_ideal_nops(void) - } - - /* Use this to add nops to a buffer, then text_poke the whole buffer. */ --static void __init_or_module add_nops(void *insns, unsigned int len) -+void __init_or_module add_nops(void *insns, unsigned int len) - { - while (len > 0) { - unsigned int noplen = len; -diff --git a/arch/x86/kernel/dtrace_sdt.c b/arch/x86/kernel/dtrace_sdt.c -new file mode 100644 -index 000000000000..bcc97d3adb2e ---- /dev/null -+++ b/arch/x86/kernel/dtrace_sdt.c -@@ -0,0 +1,75 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_sdt.c -+ * DESCRIPTION: Dynamic Tracing: SDT registration code (arch-specific) -+ * -+ * Copyright (c) 2010, 2018, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+#include <linux/kernel.h> -+#include <linux/memory.h> -+#include <linux/module.h> -+#include <linux/dtrace_os.h> -+#include <linux/sdt.h> -+#include <linux/slab.h> -+#include <linux/vmalloc.h> -+#include <asm/nmi.h> -+#include <asm/nops.h> -+#include <asm/dtrace_arch.h> -+#include <asm/text-patching.h> -+ -+static uint8_t nops[ASM_CALL_SIZE]; -+static uint8_t movs[ASM_CALL_SIZE]; -+ -+#define DT_OP_REX_RAX 0x48 -+#define DT_OP_XOR_EAX_0 0x33 -+#define DT_OP_XOR_EAX_1 0xc0 -+ -+/* This code is based on apply_alternatives and text_poke_early. It needs to -+ * run before SMP is initialized in order to avoid SMP problems with patching -+ * code that might be accessed on another CPU. -+ */ -+void __init_or_module dtrace_sdt_nop_multi(asm_instr_t **addrs, -+ int *is_enabled, int cnt) -+{ -+ int i; -+ asm_instr_t *addr; -+ unsigned long flags; -+ -+ stop_nmi(); -+ local_irq_save(flags); -+ -+ for (i = 0; i < cnt; i++) { -+ addr = addrs[i]; -+ if (likely(!is_enabled[i])) -+ memcpy(addr, nops, sizeof(nops)); -+ else -+ memcpy(addr, movs, sizeof(movs)); -+ } -+ -+ sync_core(); -+ local_irq_restore(flags); -+ restart_nmi(); -+} -+ -+void __init dtrace_sdt_init_arch(void) -+{ -+ /* -+ * A little unusual, but potentially necessary. While we could use a -+ * single NOP sequence of length ASM_CALL_SIZE, we need to consider the -+ * fact that when a SDT probe point is enabled, a single invalid opcode -+ * is written on the first byte of this NOP sequence. By using a -+ * sequence of a 1-byte NOP, followed by a (ASM_CALL_SIZE - 1) byte NOP -+ * sequence, we play it pretty safe. -+ */ -+ add_nops(nops, 1); -+ add_nops(nops + 1, ASM_CALL_SIZE - 1); -+ -+ /* -+ * Is-enabled probe points contain an "xor %rax, %rax" when disabled. -+ */ -+ movs[0] = DT_OP_REX_RAX; -+ movs[1] = DT_OP_XOR_EAX_0; -+ movs[2] = DT_OP_XOR_EAX_1; -+ add_nops(movs + 3, ASM_CALL_SIZE - 3); -+} -diff --git a/arch/x86/kernel/dtrace_util.c b/arch/x86/kernel/dtrace_util.c -index 64280fb98bbb..6531327509bb 100644 ---- a/arch/x86/kernel/dtrace_util.c -+++ b/arch/x86/kernel/dtrace_util.c -@@ -61,6 +61,13 @@ void dtrace_handle_badaddr(struct pt_regs *regs) - dtrace_skip_instruction(regs); - } - -+struct dtrace_invop_hdlr { -+ uint8_t (*dtih_func)(struct pt_regs *); -+ struct dtrace_invop_hdlr *dtih_next; -+}; -+ -+static struct dtrace_invop_hdlr *dtrace_invop_hdlrs; -+ - /* - * Trap notification handler. - */ -@@ -68,6 +75,7 @@ int dtrace_die_notifier(struct notifier_block *nb, unsigned long val, - void *args) - { - struct die_args *dargs = args; -+ int orig_trapnr = 0; - - switch (val) { - case DIE_PAGE_FAULT: { -@@ -79,12 +87,141 @@ int dtrace_die_notifier(struct notifier_block *nb, unsigned long val, - return NOTIFY_OK | NOTIFY_STOP_MASK; - } - case DIE_GPF: { -- if (!DTRACE_CPUFLAG_ISSET(CPU_DTRACE_NOFAULT)) -+ /* -+ * This gets messy... For one, some versions of Xen deliver -+ * the invalid opcode generated by the LOCK prefix (0xf0) as a -+ * GP fault rather than a UD fault. So, we need to figure out -+ * whether the GP we're processing here is one of those -+ * misreported faults. -+ * -+ * But, it is possible that the instruction that caused the -+ * fault (0xf0) gets overwritten by a different CPU with the -+ * original valid opcode before we get to look at it here, -+ * which makes it kind of hard to recognize. -+ * -+ * So... we're going to assume that a GP fault that gets -+ * triggered for the LOCK prefix opcode (0xf0) *or* for an -+ * opcode that can get overwritten with the LOCK prefix for -+ * probing is actually a UD fault. -+ * -+ * If we are wrong, the handlers will simply see a fault that -+ * isn't theirs, and return without consuming it. And in that -+ * case, the kernel will report a UD fault that may have been -+ * a real GP fault... Sorry. -+ */ -+ asm_instr_t opc = *(asm_instr_t *)dargs->regs->ip; -+ -+ if (opc != 0xf0 && opc != 0x55 && opc != 0xc3) { -+ if (!DTRACE_CPUFLAG_ISSET(CPU_DTRACE_NOFAULT)) -+ return NOTIFY_DONE; -+ -+ dtrace_handle_badaddr(dargs->regs); -+ -+ return NOTIFY_OK | NOTIFY_STOP_MASK; -+ } -+ -+ /* -+ * ... and instead treat them as the SDT probe point traps that -+ * they are. -+ */ -+ orig_trapnr = dargs->trapnr; -+ dargs->trapnr = 6; -+ } -+ /* fallthrough */ -+ case DIE_TRAP: { -+ struct dtrace_invop_hdlr *hdlr; -+ int rval = 0; -+ -+ if (dargs->trapnr != 6) - return NOTIFY_DONE; - -- dtrace_handle_badaddr(dargs->regs); -+ for (hdlr = dtrace_invop_hdlrs; hdlr != NULL; -+ hdlr = hdlr->dtih_next) { -+ rval = hdlr->dtih_func(dargs->regs); -+ if (rval != 0) -+ break; -+ } - -- return NOTIFY_OK | NOTIFY_STOP_MASK; -+ switch (rval) { -+ case DTRACE_INVOP_NOPS: -+ /* -+ * SDT probe points are encoded as either: -+ * - a 1-byte NOP followed by a multi-byte NOP -+ * - a multi-byte code sequence (to set AX to 0), -+ * followed by a multi-byte NOP -+ * In both cases, the total length of the probe point -+ * instruction is ASM_CALL_SITE bytes, so we can safely -+ * skip that number of bytes here. -+ */ -+ dargs->regs->ip += ASM_CALL_SIZE; -+ return NOTIFY_OK | NOTIFY_STOP_MASK; -+ case DTRACE_INVOP_MOV_RSP_RBP: -+ case DTRACE_INVOP_NOP: -+ case DTRACE_INVOP_PUSH_BP: -+ case DTRACE_INVOP_RET: -+ return notifier_from_errno(-rval); -+ default: -+ /* -+ * This must not have been a trap triggered from a -+ * probe point. Let someone else deal with it... -+ * -+ * If we got here because of a GPF that we thought -+ * was a UD (due to a bug in some versions of Xen), -+ * undo our change to dargs->trapnr. -+ */ -+ if (unlikely(orig_trapnr)) -+ dargs->trapnr = orig_trapnr; -+ -+ return NOTIFY_DONE; -+ } -+ } -+ case DIE_INT3: { -+ struct dtrace_invop_hdlr *hdlr; -+ int rval = 0; -+ -+ /* -+ * Let's assume that this is a DTrace probe firing, so we need -+ * to adjust the IP (to be consistent with #UD processing) so -+ * that it reflects the address of the #BP rather than the -+ * following intruction. -+ * -+ * If it turns out that this was not DTrace related, we'll have -+ * to reverse this adjustment. -+ */ -+ dargs->regs->ip--; -+ for (hdlr = dtrace_invop_hdlrs; hdlr != NULL; -+ hdlr = hdlr->dtih_next) { -+ rval = hdlr->dtih_func(dargs->regs); -+ if (rval != 0) -+ break; -+ } -+ -+ switch (rval) { -+ case DTRACE_INVOP_NOPS: -+ /* -+ * SDT probe points are encoded as either: -+ * - a 1-byte NOP followed by a multi-byte NOP -+ * - a multi-byte code sequence (to set AX to 0), -+ * followed by a multi-byte NOP -+ * In both cases, the total length of the probe point -+ * instruction is ASM_CALL_SITE bytes, so we can safely -+ * skip that number of bytes here. -+ */ -+ dargs->regs->ip += ASM_CALL_SIZE; -+ return NOTIFY_OK | NOTIFY_STOP_MASK; -+ case DTRACE_INVOP_MOV_RSP_RBP: -+ case DTRACE_INVOP_NOP: -+ case DTRACE_INVOP_PUSH_BP: -+ case DTRACE_INVOP_RET: -+ return notifier_from_errno(-rval); -+ default: -+ /* -+ * This must not have been a trap triggered from a -+ * probe point. Re-adjust the instruction pointer -+ * and let someone else deal with it... -+ */ -+ dargs->regs->ip++; -+ } - } - /* fallthrough */ - default: -@@ -92,6 +229,77 @@ int dtrace_die_notifier(struct notifier_block *nb, unsigned long val, - } - } - -+/* -+ * Add an INVOP trap handler. -+ */ -+int dtrace_invop_add(uint8_t (*func)(struct pt_regs *)) -+{ -+ struct dtrace_invop_hdlr *hdlr; -+ -+ hdlr = kmalloc(sizeof(struct dtrace_invop_hdlr), GFP_KERNEL); -+ if (hdlr == NULL) { -+ pr_warn("Failed to add invop handler: out of memory\n"); -+ return -ENOMEM; -+ } -+ -+ hdlr->dtih_func = func; -+ hdlr->dtih_next = dtrace_invop_hdlrs; -+ dtrace_invop_hdlrs = hdlr; -+ -+ return 0; -+} -+EXPORT_SYMBOL(dtrace_invop_add); -+ -+/* -+ * Remove an INVOP trap handler. -+ */ -+void dtrace_invop_remove(uint8_t (*func)(struct pt_regs *)) -+{ -+ struct dtrace_invop_hdlr *hdlr = dtrace_invop_hdlrs, *prev = NULL; -+ -+ for (;;) { -+ if (hdlr == NULL) -+ return; -+ -+ if (hdlr->dtih_func == func) -+ break; -+ -+ prev = hdlr; -+ hdlr = hdlr->dtih_next; -+ } -+ -+ if (prev == NULL) -+ dtrace_invop_hdlrs = hdlr->dtih_next; -+ else -+ prev->dtih_next = hdlr->dtih_next; -+ -+ kfree(hdlr); -+} -+EXPORT_SYMBOL(dtrace_invop_remove); -+ -+/* -+ * Enable an INVOP-based probe, i.e. ensure that an INVOP trap is triggered at -+ * the specified address. -+ */ -+void dtrace_invop_enable(asm_instr_t *addr, asm_instr_t opcode) -+{ -+ mutex_lock(&text_mutex); -+ text_poke(addr, ((unsigned char []){opcode}), 1); -+ mutex_unlock(&text_mutex); -+} -+EXPORT_SYMBOL(dtrace_invop_enable); -+ -+/* -+ * Disable an INVOP-based probe. -+ */ -+void dtrace_invop_disable(asm_instr_t *addr, asm_instr_t opcode) -+{ -+ mutex_lock(&text_mutex); -+ text_poke(addr, ((unsigned char []){opcode}), 1); -+ mutex_unlock(&text_mutex); -+} -+EXPORT_SYMBOL(dtrace_invop_disable); -+ - static inline int dtrace_bad_address(void *addr) - { - unsigned long dummy; -diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S -index e3296aa028fe..dee52f39799c 100644 ---- a/arch/x86/kernel/vmlinux.lds.S -+++ b/arch/x86/kernel/vmlinux.lds.S -@@ -437,7 +437,8 @@ INIT_PER_CPU(irq_stack_backing_store); - /* - * Build-time check on the image size: - */ --. = ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), -+. = ASSERT(((_end < _text) ? (_end < KERNEL_IMAGE_SIZE) -+ : (_end - _text <= KERNEL_IMAGE_SIZE)), - "kernel image bigger than KERNEL_IMAGE_SIZE"); - - #ifdef CONFIG_SMP -diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h -index e00f41aa8ec4..900cc08c41f1 100644 ---- a/include/asm-generic/vmlinux.lds.h -+++ b/include/asm-generic/vmlinux.lds.h -@@ -194,6 +194,20 @@ - #define ERROR_INJECT_WHITELIST() - #endif - -+#ifdef CONFIG_DTRACE -+#define DTRACE_SDT_NAMES() . = ALIGN(8); \ -+ __start_dtrace_sdt_names = .; \ -+ KEEP(*(_dtrace_sdt_names)) \ -+ __stop_dtrace_sdt_names = .; -+#define DTRACE_SDT_ARGS() . = ALIGN(8); \ -+ __start_dtrace_sdt_args = .; \ -+ KEEP(*(_dtrace_sdt_args)) \ -+ __stop_dtrace_sdt_args = .; -+#else -+#define DTRACE_SDT_NAMES() -+#define DTRACE_SDT_ARGS() -+#endif -+ - #ifdef CONFIG_EVENT_TRACING - #define FTRACE_EVENTS() . = ALIGN(8); \ - __start_ftrace_events = .; \ -@@ -652,6 +666,8 @@ - FTRACE_EVENTS() \ - TRACE_SYSCALLS() \ - KPROBE_BLACKLIST() \ -+ DTRACE_SDT_NAMES() \ -+ DTRACE_SDT_ARGS() \ - ERROR_INJECT_WHITELIST() \ - MEM_DISCARD(init.rodata) \ - CLK_OF_TABLES() \ -diff --git a/include/linux/dtrace_os.h b/include/linux/dtrace_os.h -index 5bcd77e08a14..f2921ce039a7 100644 ---- a/include/linux/dtrace_os.h -+++ b/include/linux/dtrace_os.h -@@ -25,6 +25,9 @@ extern void __init dtrace_os_init(void); - extern void __init dtrace_psinfo_os_init(void); - extern void __init dtrace_task_os_init(void); - -+extern void *dtrace_alloc_text(struct module *, unsigned long); -+extern void dtrace_free_text(void *); -+ - extern void dtrace_mod_pdata_alloc(struct module *); - extern void dtrace_mod_pdata_free(struct module *); - extern int dtrace_destroy_prov(struct module *); -diff --git a/include/linux/dtrace_sdt.h b/include/linux/dtrace_sdt.h -new file mode 100644 -index 000000000000..3a4d608bc3fa ---- /dev/null -+++ b/include/linux/dtrace_sdt.h -@@ -0,0 +1,32 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+#ifndef _DTRACE_SDT_H_ -+#define _DTRACE_SDT_H_ -+ -+#ifdef CONFIG_DTRACE -+#include <linux/module.h> -+#include <asm/dtrace_arch.h> -+ -+extern void __init dtrace_sdt_init(void); -+extern void __init dtrace_sdt_register(struct module *); -+extern void dtrace_sdt_register_module(struct module *, -+ void *sdt_names_addr, size_t, -+ void *sdt_args_addr, size_t); -+extern void dtrace_sdt_exit(void); -+ -+/* -+ * Functions to be defined in arch/<arch>/kernel/dtrace_sdt.c -+ */ -+extern void __init_or_module dtrace_sdt_nop_multi(asm_instr_t **, int *, int); -+ -+#ifdef CONFIG_X86_64 -+extern void __init dtrace_sdt_init_arch(void); -+#else -+#define dtrace_sdt_init_arch() -+#endif /* CONFIG_X86_64 */ -+ -+#endif /* CONFIG_DTRACE */ -+#endif /* _DTRACE_SDT_H_ */ -diff --git a/include/linux/module.h b/include/linux/module.h -index 1411e8cc803c..7012e81d4909 100644 ---- a/include/linux/module.h -+++ b/include/linux/module.h -@@ -29,6 +29,8 @@ - #include <linux/percpu.h> - #include <asm/module.h> - -+#include <linux/sdt.h> -+ - /* Not Yet Implemented */ - #define MODULE_SUPPORTED_DEVICE(name) - -@@ -499,8 +501,11 @@ struct module { - #endif - - #ifdef CONFIG_DTRACE -+ struct sdt_probedesc *sdt_probes; -+ unsigned int sdt_probec; - void *pdata; - #endif -+ - #ifdef CONFIG_MODULE_UNLOAD - /* What modules depend on me? */ - struct list_head source_list; -diff --git a/include/linux/sdt.h b/include/linux/sdt.h -new file mode 100644 -index 000000000000..8efb63cff1ac ---- /dev/null -+++ b/include/linux/sdt.h -@@ -0,0 +1,191 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+#ifndef _LINUX_SDT_H_ -+#define _LINUX_SDT_H_ -+ -+#include <linux/sdt_internal.h> -+ -+#if IS_ENABLED(CONFIG_DT_SDT) -+ -+#include <asm/dtrace_sdt_arch.h> -+#include <linux/stringify.h> -+ -+#define DTRACE_PROBE(name, ...) { \ -+ extern int __dtrace_probe_##name(__DTRACE_TYPE_APPLY_DEFAULT(__DTRACE_UINTPTR_EACH, void, ## __VA_ARGS__)); \ -+ (void)__dtrace_probe_##name(__DTRACE_ARG_APPLY(__DTRACE_UINTCAST_EACH, ## __VA_ARGS__)); \ -+ asm volatile(".pushsection _dtrace_sdt_names, \"a\", @progbits\n" \ -+ ".ascii \"" __stringify(name) "\"\n" \ -+ ".byte 0\n" \ -+ ".popsection\n" \ -+ ".pushsection _dtrace_sdt_args, \"a\", @progbits\n" \ -+ __DTRACE_TYPE_APPLY_NOCOMMA(__DTRACE_TYPE_EACH, ## __VA_ARGS__) \ -+ ".byte 0\n" \ -+ ".popsection\n"); \ -+} -+ -+#define DTRACE_PROBE_ENABLED(name) unlikely(({ \ -+ extern int __dtrace_isenabled_##name(__DTRACE_SDT_ISENABLED_PROTO); \ -+ __dtrace_isenabled_##name(__DTRACE_SDT_ISENABLED_ARGS); \ -+})) -+ -+#ifdef CONFIG_DT_SDT_PERF -+ -+#define __DTRACE_UINTPTR_CAST_EACH(x) ({ \ -+ union { \ -+ typeof((x)) __val; \ -+ unsigned char __c; \ -+ unsigned short __s; \ -+ unsigned int __i; \ -+ unsigned long __l; \ -+ unsigned long long __ll; } __u = { .__val = (x) }; \ -+ __builtin_choose_expr(sizeof(__u.__val) == sizeof(__u.__c), __u.__c, \ -+ __builtin_choose_expr(sizeof(__u.__val) == sizeof(__u.__s), __u.__s, \ -+ __builtin_choose_expr(sizeof(__u.__val) == sizeof(__u.__i), __u.__i, \ -+ __builtin_choose_expr(sizeof(__u.__val) == sizeof(__u.__l), __u.__l, \ -+ __builtin_choose_expr(sizeof(__u.__val) == sizeof(__u.__ll), __u.__ll, \ -+ (uintptr_t)&(__u.__val)))))); }) -+ -+#define DTRACE_PROBE_TRACEPOINT(name, ...) { \ -+ extern void __dtrace_probe___perf_##name(__DTRACE_APPLY(__DTRACE_UINTPTR_EACH, ## __VA_ARGS__)); \ -+ __dtrace_probe___perf_##name(__DTRACE_APPLY(__DTRACE_UINTPTR_CAST_EACH, ## __VA_ARGS__)); \ -+} -+ -+#define DTRACE_PROTO_TRACEPOINT(name, ...) { \ -+ asm volatile(".pushsection _dtrace_sdt_names, \"a\", @progbits\n"\ -+ ".ascii \"" __stringify(__perf_##name) "\"\n" \ -+ ".byte 0\n" \ -+ ".popsection\n" \ -+ ".pushsection _dtrace_sdt_args, \"a\", @progbits\n" \ -+ ".ascii \"" __stringify(__VA_ARGS__) "\"\n" \ -+ ".byte 0\n" \ -+ ".popsection\n"); \ -+} -+#else -+ -+#define DTRACE_PROBE_TRACEPOINT(name, ...) -+#define DTRACE_PROTO_TRACEPOINT(name, ...) -+ -+#endif -+ -+#else /* ! IS_ENABLED(CONFIG_DT_SDT) */ -+ -+/* -+ * This apparently redundant call serves to validate the DTRACE_PROBE has the -+ * right number of args even when dtrace is turned off. -+ */ -+#define DTRACE_PROBE(name, ...) \ -+ __DTRACE_DOUBLE_APPLY_NOCOMMA(__DTRACE_NONE, __DTRACE_NONE, ## __VA_ARGS__) \ -+ do { } while (0) -+#define DTRACE_PROBE_ENABLED(name) 0 -+#define DTRACE_PROBE_TRACEPOINT(name, ...) -+#define DTRACE_PROTO_TRACEPOINT(name, ...) -+ -+#endif /* IS_ENABLED(CONFIG_DT_SDT) */ -+ -+#ifdef CONFIG_DTRACE -+ -+struct sdt_probedesc { -+ char *sdpd_name; /* probe name */ -+ char *sdpd_func; /* probe function */ -+#ifndef __GENKSYMS__ -+ const char *sdpd_args; /* arg string */ -+#endif -+ unsigned long sdpd_offset; /* offset of call in text */ -+ struct sdt_probedesc *sdpd_next; /* next static probe */ -+}; -+ -+#endif /* CONFIG_DTRACE */ -+ -+#define DTRACE_SCHED(name, ...) \ -+ DTRACE_PROBE(__sched_##name, ## __VA_ARGS__); -+ -+#define DTRACE_PROC(name, ...) \ -+ DTRACE_PROBE(__proc_##name, ## __VA_ARGS__); -+ -+#define DTRACE_IO(name, ...) \ -+ DTRACE_PROBE(__io_##name, ## __VA_ARGS__); -+ -+#define DTRACE_IO_ENABLED(name) \ -+ DTRACE_PROBE_ENABLED(__io_##name) -+ -+#define DTRACE_ISCSI(name, ...) \ -+ DTRACE_PROBE(__iscsi_##name, ## __VA_ARGS__); -+ -+#define DTRACE_NFSV3(name, ...) \ -+ DTRACE_PROBE(__nfsv3_##name, ## __VA_ARGS__); -+ -+#define DTRACE_NFSV4(name, ...) \ -+ DTRACE_PROBE(__nfsv4_##name, ## __VA_ARGS__); -+ -+#define DTRACE_SMB(name, ...) \ -+ DTRACE_PROBE(__smb_##name, ## __VA_ARGS__); -+ -+/* -+ * These definitions are used at probe points to specify the traffic direction; -+ * this helps simplify argument translation. -+ */ -+#define DTRACE_NET_PROBE_OUTBOUND 0x0 -+#define DTRACE_NET_PROBE_INBOUND 0x1 -+ -+#define DTRACE_IP(name, ...) \ -+ DTRACE_PROBE(__ip_##name, ## __VA_ARGS__); -+ -+/* -+ * Default DTRACE_TCP() and DTRACE_UDP() provider definitions specify the -+ * probe point within an is-enabled predicate. This is to avoid the overhead -+ * incurred during argument dereferencing (e.g. calls to ip_hdr(skb)), along -+ * with any conditional evaluation (which would require branching) when the -+ * probe is disabled. -+ * -+ * Because some TCP probe points require additional argument preparation, -+ * we also define the is-enabled predicate directly as -+ * DTRACE_TCP_ENABLED(probename) along with a probe point which does not -+ * the probe in an is-enabled predicate; this allows us to handle cases such -+ * as this: -+ * -+ * if (DTRACE_TCP_ENABLED(state__change)) { -+ * ...argument preparation... -+ * DTRACE_TCP_NOCHECK(state__change, ...); -+ * } -+ */ -+ -+#define DTRACE_TCP(name, ...) \ -+ if (DTRACE_PROBE_ENABLED(__tcp_##name)) \ -+ DTRACE_PROBE(__tcp_##name, ## __VA_ARGS__) -+#define DTRACE_TCP_ENABLED(name) \ -+ DTRACE_PROBE_ENABLED(__tcp_##name) -+#define DTRACE_TCP_NOCHECK(name, ...) \ -+ DTRACE_PROBE(__tcp_##name, ## __VA_ARGS__); -+ -+#define DTRACE_UDP(name, ...) \ -+ if (DTRACE_PROBE_ENABLED(__udp_##name)) \ -+ DTRACE_PROBE(__udp_##name, ## __VA_ARGS__); -+ -+#define DTRACE_SYSEVENT(name, ...) \ -+ DTRACE_PROBE(__sysevent_##name, ## __VA_ARGS__); -+ -+#define DTRACE_XPV(name, ...) \ -+ DTRACE_PROBE(__xpv_##name, ## __VA_ARGS__); -+ -+#define DTRACE_FC(name, ...) \ -+ DTRACE_PROBE(__fc_##name, ## __VA_ARGS__); -+ -+#define DTRACE_SRP(name, ...) \ -+ DTRACE_PROBE(__srp_##name, ## __VA_ARGS__); -+ -+#define DTRACE_LOCKSTAT_ENABLED(name) \ -+ DTRACE_PROBE_ENABLED(__lockstat_##name) -+ -+#define DTRACE_LOCKSTAT(name, ...) \ -+ DTRACE_PROBE(__lockstat_##name, ## __VA_ARGS__) -+ -+#define DTRACE_LOCKSTAT_RW_WRITER 0 -+#define DTRACE_LOCKSTAT_RW_READER 1 -+ -+/* Needed for lockstat probes where we cannot include ktime.h */ -+extern u64 dtrace_gethrtime_ns(void); -+ -+#endif /* _LINUX_SDT_H_ */ -diff --git a/include/linux/sdt_internal.h b/include/linux/sdt_internal.h -new file mode 100644 -index 000000000000..b544f8e619cf ---- /dev/null -+++ b/include/linux/sdt_internal.h -@@ -0,0 +1,276 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Hide away all the terrible macro magic. -+ * -+ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+#ifndef _LINUX_SDT_INTERNAL_H_ -+#define _LINUX_SDT_INTERNAL_H_ -+ -+#include <linux/types.h> -+ -+/* -+ * This counts the number of args. -+ */ -+#define __DTRACE_NARGS_SEQ(dummy, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, \ -+ _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, \ -+ _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, \ -+ _31, _32, _33, _34, _35, _36, N, ...) N -+#define __DTRACE_NARGS(...) \ -+ __DTRACE_NARGS_SEQ(dummy, ##__VA_ARGS__, 36, 35, 34, 33, 32, 31, \ -+ 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, \ -+ 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, \ -+ 8, 7, 6, 5, 4, 3, 2, 1, 0) -+ -+/* -+ * This will let macros expand before concatting them. -+ */ -+#define __DTRACE_PRIMITIVE_CAT(x, y) x ## y -+#define __DTRACE_CAT(x, y) __DTRACE_PRIMITIVE_CAT(x, y) -+ -+#define __DTRACE_COMMA , -+#define __DTRACE_NO_COMMA -+#define __DTRACE_NONE(x) -+ -+/* -+ * This will call two macros on each argument-pair passed in (the first two args -+ * are the names of the macros to call). Its TYPE and NAME variants will throw -+ * away the name and type arguments, respectively. __DTRACE_*_APPLY_NOCOMMA -+ * are like DTRACE_*_APPLY, but also omit the comma between arguments in the -+ * expansion of the macro. DTRACE_TYPE_APPLY_DEFAULT lets you specify a default -+ * if no variadic args are provided. -+ */ -+#define __DTRACE_DOUBLE_APPLY(type_macro, arg_macro, ...) \ -+ __DTRACE_CAT(__DTRACE_DOUBLE_APPLY_, \ -+ __DTRACE_NARGS(__VA_ARGS__))(type_macro, \ -+ arg_macro, __DTRACE_COMMA, \ -+ __DTRACE_COMMA, , ## __VA_ARGS__) -+#define __DTRACE_DOUBLE_APPLY_NOCOMMA(type_macro, arg_macro, ...) \ -+ __DTRACE_CAT(__DTRACE_DOUBLE_APPLY_, \ -+ __DTRACE_NARGS(__VA_ARGS__))(type_macro, \ -+ arg_macro, __DTRACE_NO_COMMA, \ -+ __DTRACE_NO_COMMA, , ## __VA_ARGS__) -+#define __DTRACE_TYPE_APPLY(type_macro, ...) \ -+ __DTRACE_CAT(__DTRACE_DOUBLE_APPLY_, \ -+ __DTRACE_NARGS(__VA_ARGS__))(type_macro, \ -+ __DTRACE_NONE, __DTRACE_NO_COMMA, \ -+ __DTRACE_COMMA, , ## __VA_ARGS__) -+#define __DTRACE_TYPE_APPLY_NOCOMMA(type_macro, ...) \ -+ __DTRACE_CAT(__DTRACE_DOUBLE_APPLY_, \ -+ __DTRACE_NARGS(__VA_ARGS__))(type_macro, \ -+ __DTRACE_NONE, __DTRACE_NO_COMMA, \ -+ __DTRACE_NO_COMMA, , ## __VA_ARGS__) -+#define __DTRACE_TYPE_APPLY_DEFAULT(type_macro, def, ...) \ -+ __DTRACE_CAT(__DTRACE_DOUBLE_APPLY_, \ -+ __DTRACE_NARGS(__VA_ARGS__))(type_macro, \ -+ __DTRACE_NONE, __DTRACE_NO_COMMA, \ -+ __DTRACE_COMMA, def, ## __VA_ARGS__) -+#define __DTRACE_ARG_APPLY(arg_macro, ...) \ -+ __DTRACE_CAT(__DTRACE_DOUBLE_APPLY_, \ -+ __DTRACE_NARGS(__VA_ARGS__))(__DTRACE_NONE, \ -+ arg_macro, __DTRACE_NO_COMMA, \ -+ __DTRACE_COMMA, , ## __VA_ARGS__) -+#define __DTRACE_DOUBLE_APPLY_0(t, a, comma_t, comma_a, def) def -+#define __DTRACE_DOUBLE_APPLY_2(t, a, comma_t, comma_a, def, type1, arg1) \ -+ t(type1) comma_t a(arg1) -+#define __DTRACE_DOUBLE_APPLY_4(t, a, comma_t, comma_a, def, type1, arg1, \ -+ type2, arg2) \ -+ t(type1) comma_t a(arg1) comma_a t(type2) comma_t a(arg2) -+#define __DTRACE_DOUBLE_APPLY_6(t, a, comma_t, comma_a, def, type1, \ -+ arg1, type2, arg2, type3, arg3) \ -+ t(type1) comma_t a(arg1) comma_a t(type2) comma_t a(arg2) comma_a \ -+ t(type3) comma_t a(arg3) -+#define __DTRACE_DOUBLE_APPLY_8(t, a, comma_t, comma_a, def, type1, arg1, \ -+ type2, arg2, type3, arg3, type4, arg4) \ -+ t(type1) comma_t a(arg1) comma_a t(type2) comma_t a(arg2) comma_a \ -+ t(type3) comma_t a(arg3) comma_a t(type4) comma_t a(arg4) -+#define __DTRACE_DOUBLE_APPLY_10(t, a, comma_t, comma_a, def, type1, arg1,\ -+ type2, arg2, type3, arg3, type4, arg4, \ -+ type5, arg5) \ -+ t(type1) comma_t a(arg1) comma_a t(type2) comma_t a(arg2) comma_a \ -+ t(type3) comma_t a(arg3) comma_a t(type4) comma_t a(arg4) comma_a \ -+ t(type5) comma_t a(arg5) -+#define __DTRACE_DOUBLE_APPLY_12(t, a, comma_t, comma_a, def, type1, \ -+ arg1, type2, arg2, type3, arg3, type4, \ -+ arg4, type5, arg5, type6, arg6) \ -+ t(type1) comma_t a(arg1) comma_a t(type2) comma_t a(arg2) comma_a \ -+ t(type3) comma_t a(arg3) comma_a t(type4) comma_t a(arg4) comma_a \ -+ t(type5) comma_t a(arg5) comma_a t(type6) comma_t a(arg6) -+#define __DTRACE_DOUBLE_APPLY_14(t, a, comma_t, comma_a, def, type1, \ -+ arg1, type2, arg2, type3, arg3, type4, \ -+ arg4, type5, arg5, type6, arg6, type7, \ -+ arg7) \ -+ t(type1) comma_t a(arg1) comma_a t(type2) comma_t a(arg2) comma_a \ -+ t(type3) comma_t a(arg3) comma_a t(type4) comma_t a(arg4) comma_a \ -+ t(type5) comma_t a(arg5) comma_a t(type6) comma_t a(arg6) comma_a \ -+ t(type7) comma_t a(arg7) -+#define __DTRACE_DOUBLE_APPLY_16(t, a, comma_t, comma_a, def, type1, \ -+ arg1, type2, arg2, type3, arg3, type4, \ -+ arg4, type5, arg5, type6, arg6, type7, \ -+ arg7, type8, arg8) \ -+ t(type1) comma_t a(arg1) comma_a t(type2) comma_t a(arg2) comma_a \ -+ t(type3) comma_t a(arg3) comma_a t(type4) comma_t a(arg4) comma_a \ -+ t(type5) comma_t a(arg5) comma_a t(type6) comma_t a(arg6) comma_a \ -+ t(type7) comma_t a(arg7) comma_a t(type8) comma_t a(arg8) -+#define __DTRACE_DOUBLE_APPLY_18(t, a, comma_t, comma_a, def, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5, type6, arg6, type7, arg7, type8, arg8, type9, arg9) \ -+ t(type1) comma_t a(arg1) comma_a t(type2) comma_t a(arg2) comma_a \ -+ t(type3) comma_t a(arg3) comma_a t(type4) comma_t a(arg4) comma_a \ -+ t(type5) comma_t a(arg5) comma_a t(type6) comma_t a(arg6) comma_a \ -+ t(type7) comma_t a(arg7) comma_a t(type8) comma_t a(arg8) comma_a \ -+ t(type9) comma_t a(arg9) -+#define __DTRACE_DOUBLE_APPLY_20(t, a, comma_t, comma_a, def, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5, type6, arg6, type7, arg7, type8, arg8, type9, arg9, typea, arga) \ -+ t(type1) comma_t a(arg1) comma_a t(type2) comma_t a(arg2) comma_a \ -+ t(type3) comma_t a(arg3) comma_a t(type4) comma_t a(arg4) comma_a \ -+ t(type5) comma_t a(arg5) comma_a t(type6) comma_t a(arg6) comma_a \ -+ t(type7) comma_t a(arg7) comma_a t(type8) comma_t a(arg8) comma_a \ -+ t(type9) comma_t a(arg9) comma_a t(typea) comma_t a(arga) -+#define __DTRACE_DOUBLE_APPLY_22(t, a, comma_t, comma_a, def, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5, type6, arg6, type7, arg7, type8, arg8, type9, arg9, typea, arga, typeb, argb) \ -+ t(type1) comma_t a(arg1) comma_a t(type2) comma_t a(arg2) comma_a \ -+ t(type3) comma_t a(arg3) comma_a t(type4) comma_t a(arg4) comma_a \ -+ t(type5) comma_t a(arg5) comma_a t(type6) comma_t a(arg6) comma_a \ -+ t(type7) comma_t a(arg7) comma_a t(type8) comma_t a(arg8) comma_a \ -+ t(type9) comma_t a(arg9) comma_a t(typea) comma_t a(arga) comma_a \ -+ t(typeb) comma_t a(argb) -+#define __DTRACE_DOUBLE_APPLY_24(t, a, comma_t, comma_a, def, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5, type6, arg6, type7, arg7, type8, arg8, type9, arg9, typea, arga, typeb, argb, typec, argc) \ -+ t(type1) comma_t a(arg1) comma_a t(type2) comma_t a(arg2) comma_a \ -+ t(type3) comma_t a(arg3) comma_a t(type4) comma_t a(arg4) comma_a \ -+ t(type5) comma_t a(arg5) comma_a t(type6) comma_t a(arg6) comma_a \ -+ t(type7) comma_t a(arg7) comma_a t(type8) comma_t a(arg8) comma_a \ -+ t(type9) comma_t a(arg9) comma_a t(typea) comma_t a(arga) comma_a \ -+ t(typeb) comma_t a(argb) comma_a t(typec) comma_t a(argc) -+#define __DTRACE_DOUBLE_APPLY_26(t, a, comma_t, comma_a, def, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5, type6, arg6, type7, arg7, type8, arg8, type9, arg9, typea, arga, typeb, argb, typec, argc, typed, argd) \ -+ t(type1) comma_t a(arg1) comma_a t(type2) comma_t a(arg2) comma_a \ -+ t(type3) comma_t a(arg3) comma_a t(type4) comma_t a(arg4) comma_a \ -+ t(type5) comma_t a(arg5) comma_a t(type6) comma_t a(arg6) comma_a \ -+ t(type7) comma_t a(arg7) comma_a t(type8) comma_t a(arg8) comma_a \ -+ t(type9) comma_t a(arg9) comma_a t(typea) comma_t a(arga) comma_a \ -+ t(typeb) comma_t a(argb) comma_a t(typec) comma_t a(argc) comma_a \ -+ t(typed) comma_t a(argd) -+#define __DTRACE_DOUBLE_APPLY_28(t, a, comma_t, comma_a, def, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5, type6, arg6, type7, arg7, type8, arg8, type9, arg9, typea, arga, typeb, argb, typec, argc, typed, argd, typee, arge) \ -+ t(type1) comma_t a(arg1) comma_a t(type2) comma_t a(arg2) comma_a \ -+ t(type3) comma_t a(arg3) comma_a t(type4) comma_t a(arg4) comma_a \ -+ t(type5) comma_t a(arg5) comma_a t(type6) comma_t a(arg6) comma_a \ -+ t(type7) comma_t a(arg7) comma_a t(type8) comma_t a(arg8) comma_a \ -+ t(type9) comma_t a(arg9) comma_a t(typea) comma_t a(arga) comma_a \ -+ t(typeb) comma_t a(argb) comma_a t(typec) comma_t a(argc) comma_a \ -+ t(typed) comma_t a(argd) comma_a t(typee) comma_t a(arge) -+#define __DTRACE_DOUBLE_APPLY_30(t, a, comma_t, comma_a, def, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5, type6, arg6, type7, arg7, type8, arg8, type9, arg9, typea, arga, typeb, argb, typec, argc, typed, argd, typee, arge, typef, argf) \ -+ t(type1) comma_t a(arg1) comma_a t(type2) comma_t a(arg2) comma_a \ -+ t(type3) comma_t a(arg3) comma_a t(type4) comma_t a(arg4) comma_a \ -+ t(type5) comma_t a(arg5) comma_a t(type6) comma_t a(arg6) comma_a \ -+ t(type7) comma_t a(arg7) comma_a t(type8) comma_t a(arg8) comma_a \ -+ t(type9) comma_t a(arg9) comma_a t(typea) comma_t a(arga) comma_a \ -+ t(typeb) comma_t a(argb) comma_a t(typec) comma_t a(argc) comma_a \ -+ t(typed) comma_t a(argd) comma_a t(typee) comma_t a(arge) comma_a \ -+ t(typef) comma_t a(argf) -+#define __DTRACE_DOUBLE_APPLY_32(t, a, comma_t, comma_a, def, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5, type6, arg6, type7, arg7, type8, arg8, type9, arg9, typea, arga, typeb, argb, typec, argc, typed, argd, typee, arge, typef, argf, typeg, argg) \ -+ t(type1) comma_t a(arg1) comma_a t(type2) comma_t a(arg2) comma_a \ -+ t(type3) comma_t a(arg3) comma_a t(type4) comma_t a(arg4) comma_a \ -+ t(type5) comma_t a(arg5) comma_a t(type6) comma_t a(arg6) comma_a \ -+ t(type7) comma_t a(arg7) comma_a t(type8) comma_t a(arg8) comma_a \ -+ t(type9) comma_t a(arg9) comma_a t(typea) comma_t a(arga) comma_a \ -+ t(typeb) comma_t a(argb) comma_a t(typec) comma_t a(argc) comma_a \ -+ t(typed) comma_t a(argd) comma_a t(typee) comma_t a(arge) comma_a \ -+ t(typef) comma_t a(argf) comma_a t(typeg) comma_t a(argg) -+#define __DTRACE_DOUBLE_APPLY_34(t, a, comma_t, comma_a, def, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5, type6, arg6, type7, arg7, type8, arg8, type9, arg9, typea, arga, typeb, argb, typec, argc, typed, argd, typee, arge, typef, argf, typeg, argg, typeh, argh) \ -+ t(type1) comma_t a(arg1) comma_a t(type2) comma_t a(arg2) comma_a \ -+ t(type3) comma_t a(arg3) comma_a t(type4) comma_t a(arg4) comma_a \ -+ t(type5) comma_t a(arg5) comma_a t(type6) comma_t a(arg6) comma_a \ -+ t(type7) comma_t a(arg7) comma_a t(type8) comma_t a(arg8) comma_a \ -+ t(type9) comma_t a(arg9) comma_a t(typea) comma_t a(arga) comma_a \ -+ t(typeb) comma_t a(argb) comma_a t(typec) comma_t a(argc) comma_a \ -+ t(typed) comma_t a(argd) comma_a t(typee) comma_t a(arge) comma_a \ -+ t(typef) comma_t a(argf) comma_a t(typeg) comma_t a(argg) comma_a \ -+ t(typeh) comma_t a(argh) -+#define __DTRACE_DOUBLE_APPLY_36(t, a, comma_t, comma_a, def, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5, type6, arg6, type7, arg7, type8, arg8, type9, arg9, typea, arga, typeb, argb, typec, argc, typed, argd, typee, arge, typef, argf, typeg, argg, typeh, argh, typei, argi) \ -+ t(type1) comma_t a(arg1) comma_a t(type2) comma_t a(arg2) comma_a \ -+ t(type3) comma_t a(arg3) comma_a t(type4) comma_t a(arg4) comma_a \ -+ t(type5) comma_t a(arg5) comma_a t(type6) comma_t a(arg6) comma_a \ -+ t(type7) comma_t a(arg7) comma_a t(type8) comma_t a(arg8) comma_a \ -+ t(type9) comma_t a(arg9) comma_a t(typea) comma_t a(arga) comma_a \ -+ t(typeb) comma_t a(argb) comma_a t(typec) comma_t a(argc) comma_a \ -+ t(typed) comma_t a(argd) comma_a t(typee) comma_t a(arge) comma_a \ -+ t(typef) comma_t a(argf) comma_a t(typeg) comma_t a(argg) comma_a \ -+ t(typeh) comma_t a(argh) comma_a t(typei) comma_t a(argi) -+ -+#define __DTRACE_DOUBLE_APPLY_ERROR Error: type specified without arg. -+#define __DTRACE_DOUBLE_APPLY_1 __DTRACE_DOUBLE_APPLY_ERROR -+#define __DTRACE_DOUBLE_APPLY_3 __DTRACE_DOUBLE_APPLY_ERROR -+#define __DTRACE_DOUBLE_APPLY_5 __DTRACE_DOUBLE_APPLY_ERROR -+#define __DTRACE_DOUBLE_APPLY_7 __DTRACE_DOUBLE_APPLY_ERROR -+#define __DTRACE_DOUBLE_APPLY_9 __DTRACE_DOUBLE_APPLY_ERROR -+#define __DTRACE_DOUBLE_APPLY_11 __DTRACE_DOUBLE_APPLY_ERROR -+#define __DTRACE_DOUBLE_APPLY_13 __DTRACE_DOUBLE_APPLY_ERROR -+#define __DTRACE_DOUBLE_APPLY_15 __DTRACE_DOUBLE_APPLY_ERROR -+#define __DTRACE_DOUBLE_APPLY_17 __DTRACE_DOUBLE_APPLY_ERROR -+#define __DTRACE_DOUBLE_APPLY_19 __DTRACE_DOUBLE_APPLY_ERROR -+#define __DTRACE_DOUBLE_APPLY_21 __DTRACE_DOUBLE_APPLY_ERROR -+#define __DTRACE_DOUBLE_APPLY_23 __DTRACE_DOUBLE_APPLY_ERROR -+#define __DTRACE_DOUBLE_APPLY_25 __DTRACE_DOUBLE_APPLY_ERROR -+#define __DTRACE_DOUBLE_APPLY_27 __DTRACE_DOUBLE_APPLY_ERROR -+#define __DTRACE_DOUBLE_APPLY_29 __DTRACE_DOUBLE_APPLY_ERROR -+#define __DTRACE_DOUBLE_APPLY_31 __DTRACE_DOUBLE_APPLY_ERROR -+#define __DTRACE_DOUBLE_APPLY_33 __DTRACE_DOUBLE_APPLY_ERROR -+#define __DTRACE_DOUBLE_APPLY_35 __DTRACE_DOUBLE_APPLY_ERROR -+#define __DTRACE_DOUBLE_APPLY_37 __DTRACE_DOUBLE_APPLY_ERROR -+ -+#define __DTRACE_UINTPTR_EACH(x) uintptr_t -+ -+#define __DTRACE_UINTCAST_EACH(x) (uintptr_t)(x) -+#define __DTRACE_TYPE_EACH(x) ".ascii \"" __stringify(x) ",\"\n" -+ -+/* -+ * Convert everything to the appropriate integral type, unless too large to fit -+ * into any of them, in which case its address is taken instead. -+ */ -+ -+/* -+ * This will call a macro on each argument passed in, with optional default for -+ * zero args. -+ */ -+#define __DTRACE_APPLY(macro, ...) __DTRACE_CAT(__DTRACE_APPLY_, __DTRACE_NARGS(__VA_ARGS__))(macro, , ## __VA_ARGS__) -+#define __DTRACE_APPLY_DEFAULT(macro, def, ...) __DTRACE_CAT(__DTRACE_APPLY_, __DTRACE_NARGS(__VA_ARGS__))(macro, def, ## __VA_ARGS__) -+#define __DTRACE_APPLY_0(m, def) def -+#define __DTRACE_APPLY_1(m, def, x1) m(x1) -+#define __DTRACE_APPLY_2(m, def, x1, x2) m(x1), m(x2) -+#define __DTRACE_APPLY_3(m, def, x1, x2, x3) m(x1), m(x2), m(x3) -+#define __DTRACE_APPLY_4(m, def, x1, x2, x3, x4) m(x1), m(x2), m(x3), m(x4) -+#define __DTRACE_APPLY_5(m, def, x1, x2, x3, x4, x5) \ -+ m(x1), m(x2), m(x3), m(x4), m(x5) -+#define __DTRACE_APPLY_6(m, def, x1, x2, x3, x4, x5, x6) \ -+ m(x1), m(x2), m(x3), m(x4), m(x5), m(x6) -+#define __DTRACE_APPLY_7(m, def, x1, x2, x3, x4, x5, x6, x7) \ -+ m(x1), m(x2), m(x3), m(x4), m(x5), m(x6), m(x7) -+#define __DTRACE_APPLY_8(m, def, x1, x2, x3, x4, x5, x6, x7, x8) \ -+ m(x1), m(x2), m(x3), m(x4), m(x5), m(x6), m(x7), m(x8) -+#define __DTRACE_APPLY_9(m, def, x1, x2, x3, x4, x5, x6, x7, x8, x9) \ -+ m(x1), m(x2), m(x3), m(x4), m(x5), m(x6), m(x7), m(x8), m(x9) -+#define __DTRACE_APPLY_10(m, def, x1, x2, x3, x4, x5, x6, x7, x8, x9, xa) \ -+ m(x1), m(x2), m(x3), m(x4), m(x5), m(x6), m(x7), m(x8), m(x9), m(xa) -+#define __DTRACE_APPLY_11(m, def, x1, x2, x3, x4, x5, x6, x7, x8, x9, xa, xb) \ -+ m(x1), m(x2), m(x3), m(x4), m(x5), m(x6), m(x7), m(x8), m(x9), m(xa), \ -+ m(xb) -+#define __DTRACE_APPLY_12(m, def, x1, x2, x3, x4, x5, x6, x7, x8, x9, xa, xb, xc) \ -+ m(x1), m(x2), m(x3), m(x4), m(x5), m(x6), m(x7), m(x8), m(x9), m(xa), \ -+ m(xb), m(xc) -+#define __DTRACE_APPLY_13(m, def, x1, x2, x3, x4, x5, x6, x7, x8, x9, xa, xb, xc, xd) \ -+ m(x1), m(x2), m(x3), m(x4), m(x5), m(x6), m(x7), m(x8), m(x9), m(xa), \ -+ m(xb), m(xc), m(xd) -+#define __DTRACE_APPLY_14(m, def, x1, x2, x3, x4, x5, x6, x7, x8, x9, xa, xb, xc, xd, xe) \ -+ m(x1), m(x2), m(x3), m(x4), m(x5), m(x6), m(x7), m(x8), m(x9), m(xa), \ -+ m(xb), m(xc), m(xd), m(xe) -+#define __DTRACE_APPLY_15(m, def, x1, x2, x3, x4, x5, x6, x7, x8, x9, xa, xb, xc, xd, xe, xf) \ -+ m(x1), m(x2), m(x3), m(x4), m(x5), m(x6), m(x7), m(x8), m(x9), m(xa), \ -+ m(xb), m(xc), m(xd), m(xe), m(xf) -+#define __DTRACE_APPLY_16(m, def, x1, x2, x3, x4, x5, x6, x7, x8, x9, xa, xb, xc, xd, xe, xf, xg) \ -+ m(x1), m(x2), m(x3), m(x4), m(x5), m(x6), m(x7), m(x8), m(x9), m(xa), \ -+ m(xb), m(xc), m(xd), m(xe), m(xf), m(xg) -+#define __DTRACE_APPLY_17(m, def, x1, x2, x3, x4, x5, x6, x7, x8, x9, xa, xb, xc, xd, xe, xf, xg, xh) \ -+ m(x1), m(x2), m(x3), m(x4), m(x5), m(x6), m(x7), m(x8), m(x9), m(xa), \ -+ m(xb), m(xc), m(xd), m(xe), m(xf), m(xg), m(xh) -+#define __DTRACE_APPLY_18(m, def, x1, x2, x3, x4, x5, x6, x7, x8, x9, xa, xb, xc, xd, xe, xf, xg, xh, xi) \ -+ m(x1), m(x2), m(x3), m(x4), m(x5), m(x6), m(x7), m(x8), m(x9), m(xa), \ -+ m(xb), m(xc), m(xd), m(xe), m(xf), m(xg), m(xh), m(xi) -+ -+#endif /* _LINUX_SDT_INTERNAL_H */ -diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h -index 1fb11daa5c53..7fda999da560 100644 ---- a/include/linux/tracepoint.h -+++ b/include/linux/tracepoint.h -@@ -19,6 +19,7 @@ - #include <linux/cpumask.h> - #include <linux/rcupdate.h> - #include <linux/tracepoint-defs.h> -+#include <linux/sdt.h> - - struct module; - struct tracepoint; -@@ -232,6 +233,8 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) - extern struct tracepoint __tracepoint_##name; \ - static inline void trace_##name(proto) \ - { \ -+ DTRACE_PROBE_TRACEPOINT(name, args); \ -+ DTRACE_PROTO_TRACEPOINT(name, proto); \ - if (static_key_false(&__tracepoint_##name.key)) \ - __DO_TRACE(&__tracepoint_##name, \ - TP_PROTO(data_proto), \ -@@ -298,7 +301,10 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) - #else /* !TRACEPOINTS_ENABLED */ - #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ - static inline void trace_##name(proto) \ -- { } \ -+ { \ -+ DTRACE_PROBE_TRACEPOINT(name, args); \ -+ DTRACE_PROTO_TRACEPOINT(name, proto); \ -+ } \ - static inline void trace_##name##_rcuidle(proto) \ - { } \ - static inline int \ -diff --git a/kernel/dtrace/Kconfig b/kernel/dtrace/Kconfig -index d04ca0ab7ac9..c1ec55d8750e 100644 ---- a/kernel/dtrace/Kconfig -+++ b/kernel/dtrace/Kconfig -@@ -23,6 +23,22 @@ config DT_CORE - - if DT_CORE - -+config DT_SDT -+ tristate "Statically Defined Tracing" -+ default m -+ select KALLSYMS -+ help -+ Statically defined tracepoints in the kernel. -+ -+config DT_SDT_PERF -+ bool "DTrace perf-events Probes" -+ default y -+ depends on DT_SDT -+ select TRACEPOINTS -+ help -+ Provides the perf provider, containing a DTrace probe for each -+ perf-events tracepoint in the system. -+ - config DT_SYSTRACE - tristate "System Call Tracing" - default m -diff --git a/kernel/dtrace/Makefile b/kernel/dtrace/Makefile -index 68fc3861e5d1..06329cbe52cb 100644 ---- a/kernel/dtrace/Makefile -+++ b/kernel/dtrace/Makefile -@@ -4,10 +4,11 @@ - - DT_CORE_ARCH_OBJS = $(addprefix ../../arch/$(SRCARCH)/kernel/, \ - dtrace_syscall.o dtrace_syscall_stubs.o \ -- dtrace_util.o) -+ dtrace_sdt.o dtrace_util.o) - - ifdef CONFIG_DT_CORE - obj-y += cyclic.o dtrace_os.o dtrace_cpu.o \ -+ dtrace_sdt_core.o \ - dtrace_task.o dtrace_psinfo.o \ - $(DT_CORE_ARCH_OBJS) - endif -diff --git a/kernel/dtrace/dtrace_os.c b/kernel/dtrace/dtrace_os.c -index 177b93e3177e..1b13a92fb20e 100644 ---- a/kernel/dtrace/dtrace_os.c -+++ b/kernel/dtrace/dtrace_os.c -@@ -19,6 +19,7 @@ - #include <linux/binfmts.h> - #include <linux/dtrace_cpu.h> - #include <linux/dtrace_os.h> -+#include <linux/dtrace_sdt.h> - #include <linux/fs.h> - #include <linux/hardirq.h> - #include <linux/interrupt.h> -@@ -66,6 +67,46 @@ void __init dtrace_os_init(void) - if (dtrace_pdata_cachep == NULL) - pr_debug("Can't allocate kmem cache for pdata\n"); - -+ /* -+ * A little bit of magic... -+ * We create a dummy module to represent the core Linux kernel. The -+ * only data we're interested in is the name, the SDT probe points data -+ * (to be filled in by dtrace_sdt_register()), and the probe data. -+ * DTrace uses an architecture-specific structure (hidden from us here) -+ * to hold some data. -+ */ -+ dtrace_kmod = kmalloc(sizeof(struct module), GFP_KERNEL | __GFP_ZERO); -+ if (dtrace_kmod == NULL) { -+ pr_warn("%s: cannot allocate kernel pseudo-module\n", -+ __func__); -+ return; -+ } -+ -+ strlcpy(dtrace_kmod->name, "vmlinux", MODULE_NAME_LEN); -+ -+ /* -+ * Some sizing info is required for kernel module. We are going to use -+ * modules VA range for trampoline anyway so lets pretend a kernel has -+ * no init section and VA range (0, MODULES_VADDR) is occupied by -+ * kernel itself -+ */ -+#ifdef CONFIG_X86_64 -+ dtrace_kmod->core_layout.base = (void *)__START_KERNEL_map; -+ dtrace_kmod->core_layout.size = KERNEL_IMAGE_SIZE; -+#elif defined(CONFIG_SPARC64) -+ /* Hardcoded see pgtable_64.h */ -+ dtrace_kmod->core_layout.base = (void *)0x4000000; -+ dtrace_kmod->core_layout.size = 0x2000000; -+#endif -+ -+ dtrace_kmod->state = MODULE_STATE_LIVE; -+ atomic_inc(&dtrace_kmod->refcnt); -+ -+ dtrace_mod_pdata_alloc(dtrace_kmod); -+ -+ INIT_LIST_HEAD(&dtrace_kmod->source_list); -+ INIT_LIST_HEAD(&dtrace_kmod->target_list); -+ - /* - * We need to set up a psinfo structure for PID 0 (swapper). - */ -@@ -73,7 +114,49 @@ void __init dtrace_os_init(void) - dtrace_psinfo_os_init(); - dtrace_task_init(&init_task); - dtrace_psinfo_alloc(&init_task); -+ -+ dtrace_sdt_init(); -+ dtrace_sdt_register(dtrace_kmod); -+} -+ -+#define MIN(a, b) (((a) < (b)) ? (a) : (b)) -+#define MAX(a, b) (((a) > (b)) ? (a) : (b)) -+#define TRAMP_RANGE 0x80000000 -+ -+void *dtrace_alloc_text(struct module *mp, unsigned long size) -+{ -+ unsigned long mp_start, mp_end; -+ unsigned long va_start, va_end; -+ void *trampoline; -+ -+ /* module range */ -+ mp_start = (unsigned long) mp->core_layout.base; -+ mp_end = mp_start + mp->core_layout.size; -+ -+ if (mp->init_layout.size) { -+ mp_start = MIN(mp_start, (unsigned long)mp->init_layout.base); -+ mp_end = MAX(mp_end, (unsigned long)mp->init_layout.base + -+ mp->init_layout.size); -+ } -+ -+ /* get trampoline range */ -+ va_end = MIN(mp_start + TRAMP_RANGE, MODULES_END); -+ va_start = (mp_end < TRAMP_RANGE) ? 0 : mp_end - TRAMP_RANGE; -+ va_start = MAX(va_start, MODULES_VADDR); -+ -+ trampoline = __vmalloc_node_range(size, 1, va_start, va_end, -+ GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, -+ __builtin_return_address(0)); -+ -+ return trampoline; -+} -+EXPORT_SYMBOL(dtrace_alloc_text); -+ -+void dtrace_free_text(void *ptr) -+{ -+ return vfree(ptr); - } -+EXPORT_SYMBOL(dtrace_free_text); - - /* - * MODULE SUPPORT FUNCTIONS -diff --git a/kernel/dtrace/dtrace_sdt_core.c b/kernel/dtrace/dtrace_sdt_core.c -new file mode 100644 -index 000000000000..90b86726f195 ---- /dev/null -+++ b/kernel/dtrace/dtrace_sdt_core.c -@@ -0,0 +1,364 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_sdt_core.c -+ * DESCRIPTION: DTrace - SDT probes -+ * -+ * Copyright (c) 2010, 2018, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/kernel.h> -+#include <linux/memory.h> -+#include <linux/module.h> -+#include <linux/dtrace_os.h> -+#include <linux/dtrace_sdt.h> -+#include <linux/jhash.h> -+#include <linux/sdt.h> -+#include <linux/slab.h> -+#include <linux/string.h> -+#include <linux/vmalloc.h> -+#include <asm-generic/bitsperlong.h> -+#include <asm-generic/sections.h> -+ -+const char *sdt_prefix = "__dtrace_probe_"; -+int dtrace_nosdt; -+ -+/* -+ * Compiled-in SDT probe data. -+ */ -+extern const unsigned long dtrace_sdt_probes[]; -+extern const char dtrace_sdt_strings[]; -+extern const unsigned long dtrace_sdt_nprobes; -+ -+/* -+ * Markers of core-kernel sdt_args and sdt_names sections. -+ */ -+extern const char __start_dtrace_sdt_args[]; -+extern const char __stop_dtrace_sdt_args[]; -+extern const char __start_dtrace_sdt_names[]; -+extern const char __stop_dtrace_sdt_names[]; -+ -+static int sdt_probe_set(struct sdt_probedesc *sdp, const char *name, -+ const char *func, uintptr_t addr, asm_instr_t **paddr, -+ struct sdt_probedesc *prv) -+{ -+ sdp->sdpd_name = kstrdup(name, GFP_KERNEL); -+ if (sdp->sdpd_name == NULL) { -+ kfree(sdp); -+ return 1; -+ } -+ -+ sdp->sdpd_func = kstrdup(func, GFP_KERNEL); -+ if (sdp->sdpd_func == NULL) { -+ kfree(sdp->sdpd_name); -+ kfree(sdp); -+ return 1; -+ } -+ -+ sdp->sdpd_args = NULL; -+ sdp->sdpd_offset = addr; -+ sdp->sdpd_next = NULL; -+ -+ *paddr = (asm_instr_t *)addr; -+ -+ if (prv && strcmp(prv->sdpd_name, sdp->sdpd_name) == 0 -+ && strcmp(prv->sdpd_func, sdp->sdpd_func) == 0) -+ prv->sdpd_next = sdp; -+ -+ return 0; -+} -+ -+/* -+ * Transfer the SDT args section into the sdpd_args field left NULL above. -+ * -+ * The memory pointed to by args_start must have a lifetime at least as long as -+ * that pointed to by sdpd. -+ */ -+void dtrace_sdt_stash_args(const char *module_name, -+ struct sdt_probedesc *sdpd, size_t nprobes, -+ const char *names_start, size_t names_len, -+ const char *args_start, size_t args_len) -+{ -+ struct probe_name_hashent_t { -+ const char *pnhe_name; -+ const char *pnhe_args; -+ } *args_by_name; -+ int i; -+ const char *namep, *argp; -+ size_t hashsize; -+ -+ /* -+ * We need to find the probes (and there may be many) in the sdpd -+ * corresponding to the probe with that name in the argtype section. -+ * -+ * Build a hashtable mapping from probe name -> args string, ignoring -+ * duplicate probe names except to check (in debugging mode) that they -+ * have the same args string as the first. Then cycle over the sdpd -+ * looking up each probe in turn and pointing to the same place. -+ * -+ * We don't know how many entries there are in the table, but we do know -+ * there cannot be more than nprobes (and are probably less). -+ */ -+ -+ hashsize = nprobes * 4; /* arbitrary expansion factor */ -+ args_by_name = vzalloc(hashsize * sizeof(struct probe_name_hashent_t)); -+ if (args_by_name == NULL) { -+ pr_warn("%s: cannot allocate hash for sdt args population\n", -+ __func__); -+ return; -+ } -+ -+ namep = names_start; -+ argp = args_start; -+ while ((namep < names_start + names_len) && -+ (argp < args_start + args_len)) { -+ -+ size_t l = strlen(namep); -+ u32 h = jhash(namep, l, 0) % hashsize; -+ -+ while (args_by_name[h].pnhe_name != NULL && -+ strcmp(args_by_name[h].pnhe_name, namep) != 0) { -+ h++; -+ h %= hashsize; -+ } -+ -+ if (args_by_name[h].pnhe_name == NULL) { -+ args_by_name[h].pnhe_name = namep; -+ args_by_name[h].pnhe_args = argp; -+ } -+#if defined(CONFIG_DT_DEBUG) -+ else if (strcmp(args_by_name[h].pnhe_name, namep) != 0) -+ pr_warn("%s: multiple distinct arg strings for probe " -+ "%s found: %s versus %s", -+ module_name, namep, -+ args_by_name[h].pnhe_args, -+ argp); -+#endif -+ namep += l + 1; -+ argp += strlen(argp) + 1; -+ } -+ -+#if defined(CONFIG_DT_DEBUG) -+ if ((namep < names_start + names_len) || (argp < args_start + args_len)) -+ pr_warn("%s: Not all SDT names or args consumed: %zi " -+ "bytes of names and %zi of args left over. " -+ "Some arg types will be mis-assigned.\n", module_name, -+ namep - (names_start + names_len), -+ argp - (args_start + args_len)); -+#endif -+ -+ for (i = 0; i < nprobes; i++) { -+ size_t l = strlen(sdpd[i].sdpd_name); -+ u32 h = jhash(sdpd[i].sdpd_name, l, 0) % hashsize; -+ -+ /* -+ * Is-enabled probes have no arg string. -+ */ -+ if (sdpd[i].sdpd_name[0] == '?') -+ continue; -+ -+ while (args_by_name[h].pnhe_name != NULL && -+ strcmp(sdpd[i].sdpd_name, -+ args_by_name[h].pnhe_name) != 0) { -+ h++; -+ h %= hashsize; -+ } -+ -+ if (args_by_name[h].pnhe_name == NULL) { -+ /* -+ * No arg string. Peculiar: report in debugging mode. -+ */ -+#if defined(CONFIG_DT_DEBUG) -+ pr_warn("%s: probe %s has no arg string.\n", -+ module_name, sdpd[i].sdpd_name); -+#endif -+ continue; -+ } -+ -+ sdpd[i].sdpd_args = args_by_name[h].pnhe_args; -+ } -+ vfree(args_by_name); -+} -+ -+/* -+ * Register the SDT probes for the core kernel, i.e. SDT probes that reside in -+ * vmlinux. For SDT probes in kernel modules, we use dtrace_mod_notifier(). -+ */ -+void __init dtrace_sdt_register(struct module *mp) -+{ -+ int i, cnt; -+ struct sdt_probedesc *sdps; -+ asm_instr_t **addrs; -+ int *is_enabled; -+ void *args; -+ size_t args_len; -+ -+ if (mp == NULL) { -+ pr_warn("%s: no module provided - nothing registered\n", -+ __func__); -+ return; -+ } -+ -+ /* -+ * Just in case we run into failures further on... -+ */ -+ mp->sdt_probes = NULL; -+ mp->sdt_probec = 0; -+ -+ if (dtrace_sdt_nprobes == 0 || dtrace_nosdt) -+ return; -+ -+ /* -+ * Allocate the array of SDT probe descriptions to be registered in the -+ * vmlinux pseudo-module. -+ */ -+ sdps = (struct sdt_probedesc *)vmalloc(dtrace_sdt_nprobes * -+ sizeof(struct sdt_probedesc)); -+ if (sdps == NULL) { -+ pr_warn("%s: cannot allocate SDT probe array\n", __func__); -+ return; -+ } -+ -+ /* -+ * Create a list of addresses (SDT probe locations) that need to be -+ * patched with a NOP instruction (or instruction sequence), and another -+ * array indicating whether each probe needs patching with an -+ * arch-dependent false return instead. -+ */ -+ addrs = (asm_instr_t **)vmalloc(dtrace_sdt_nprobes * -+ sizeof(asm_instr_t *)); -+ is_enabled = (int *)vmalloc(dtrace_sdt_nprobes * sizeof(int)); -+ if ((addrs == NULL) || (is_enabled == NULL)) { -+ pr_warn("%s: cannot allocate SDT probe address/is-enabled " -+ "lists\n", __func__); -+ vfree(sdps); -+ vfree(addrs); -+ vfree(is_enabled); -+ return; -+ } -+ -+ for (i = cnt = 0; i < dtrace_sdt_nprobes; i++) { -+ uintptr_t addr, poff, foff; -+ const char *fname = &dtrace_sdt_strings[foff]; -+ const char *pname; -+ -+ addr = dtrace_sdt_probes[i * 3]; /* address */ -+ poff = dtrace_sdt_probes[i * 3 + 1]; /* probe name offset */ -+ foff = dtrace_sdt_probes[i * 3 + 2]; /* func name offset */ -+ pname = &dtrace_sdt_strings[poff]; -+ fname = &dtrace_sdt_strings[foff]; -+ -+ is_enabled[cnt] = (pname[0] == '?'); -+ -+ if (sdt_probe_set(&sdps[cnt], pname, fname, addr, &addrs[cnt], -+ cnt > 0 ? &sdps[cnt - 1] : NULL)) -+ pr_warn("%s: failed to add SDT probe %s for %s\n", -+ __func__, pname, fname); -+ else -+ cnt++; -+ } -+ -+ mp->sdt_probes = sdps; -+ mp->sdt_probec = cnt; -+ -+ dtrace_sdt_nop_multi(addrs, is_enabled, cnt); -+ -+ /* -+ * Allocate space for the array of arg types, and copy it in from the -+ * (discardable) kernel section. We will need to keep it. (The -+ * identically-ordered array of probe names is not needed after -+ * initialization.) -+ */ -+ args_len = __stop_dtrace_sdt_args - __start_dtrace_sdt_args; -+ args = vmalloc(args_len); -+ if (args == NULL) { -+ pr_warn("%s: cannot allocate table of SDT arg types\n", -+ __func__); -+ goto end; -+ } -+ -+ memcpy(args, __start_dtrace_sdt_args, args_len); -+ -+ dtrace_sdt_stash_args("vmlinux", sdps, cnt, -+ __start_dtrace_sdt_names, -+ (__stop_dtrace_sdt_names - __start_dtrace_sdt_names), -+ args, args_len); -+ -+end: -+ vfree(addrs); -+ vfree(is_enabled); -+} -+ -+static int __init nosdt(char *str) -+{ -+ dtrace_nosdt = 1; -+ -+ return 0; -+} -+ -+early_param("nosdt", nosdt); -+ -+void dtrace_sdt_register_module(struct module *mp, -+ void *sdt_names_addr, size_t sdt_names_len, -+ void *sdt_args_addr, size_t sdt_args_len) -+{ -+ int i, cnt; -+ struct sdt_probedesc *sdp; -+ asm_instr_t **addrs; -+ int *is_enabled; -+ -+ if (mp->sdt_probec == 0 || mp->sdt_probes == NULL) -+ return; -+ -+ /* -+ * Create a list of addresses (SDT probe locations) that need to be -+ * patched with a NOP instruction (or instruction sequence). -+ */ -+ addrs = (asm_instr_t **)vmalloc(mp->sdt_probec * -+ sizeof(asm_instr_t *)); -+ is_enabled = (int *)vmalloc(mp->sdt_probec * sizeof(int)); -+ if ((addrs == NULL) || (is_enabled == NULL)) { -+ pr_warn("%s: cannot allocate SDT probe address list (%s)\n", -+ __func__, mp->name); -+ vfree(addrs); -+ vfree(is_enabled); -+ return; -+ } -+ -+ for (i = cnt = 0, sdp = mp->sdt_probes; i < mp->sdt_probec; -+ i++, sdp++) { -+ addrs[cnt] = (asm_instr_t *)sdp->sdpd_offset; -+ is_enabled[cnt++] = (sdp->sdpd_name[0] == '?'); -+ } -+ -+ dtrace_sdt_nop_multi(addrs, is_enabled, cnt); -+ -+ dtrace_sdt_stash_args(mp->name, mp->sdt_probes, mp->sdt_probec, -+ sdt_names_addr, sdt_names_len, -+ sdt_args_addr, sdt_args_len); -+ -+ vfree(addrs); -+ vfree(is_enabled); -+} -+ -+void __init dtrace_sdt_init(void) -+{ -+ dtrace_sdt_init_arch(); -+} -+ -+#if IS_ENABLED(CONFIG_DT_DT_PERF) -+void dtrace_sdt_perf(void) -+{ -+ DTRACE_PROBE(measure); -+} -+EXPORT_SYMBOL(dtrace_sdt_perf); -+#endif -diff --git a/kernel/module.c b/kernel/module.c -index b59b513f712c..c16f852bc1ad 100644 ---- a/kernel/module.c -+++ b/kernel/module.c -@@ -36,6 +36,7 @@ - #include <linux/string.h> - #include <linux/mutex.h> - #include <linux/rculist.h> -+#include <linux/sdt.h> - #include <linux/uaccess.h> - #include <asm/cacheflush.h> - #include <linux/set_memory.h> -@@ -43,6 +44,7 @@ - #include <linux/license.h> - #include <asm/sections.h> - #include <linux/dtrace_os.h> -+#include <linux/dtrace_sdt.h> - #include <linux/tracepoint.h> - #include <linux/ftrace.h> - #include <linux/livepatch.h> -@@ -3690,6 +3692,18 @@ static int complete_formation(struct module *mod, struct load_info *info) - { - int err; - -+#ifdef CONFIG_DTRACE -+ void *sdt_args, *sdt_names; -+ unsigned int sdt_args_len, sdt_names_len; -+ -+ sdt_names = section_objs(info, "_dtrace_sdt_names", 1, -+ &sdt_names_len); -+ sdt_args = section_objs(info, "_dtrace_sdt_args", 1, -+ &sdt_args_len); -+ dtrace_sdt_register_module(mod, sdt_names, sdt_names_len, -+ sdt_args, sdt_args_len); -+#endif -+ - mutex_lock(&module_mutex); - - /* Find duplicate symbols (must be called under lock). */ -diff --git a/scripts/.gitignore b/scripts/.gitignore -index ef45f96cd7a5..b3523b847e2a 100644 ---- a/scripts/.gitignore -+++ b/scripts/.gitignore -@@ -10,3 +10,4 @@ asn1_compiler - extract-cert - sign-file - insert-sys-cert -+kmodsdt -diff --git a/scripts/Makefile b/scripts/Makefile -index f585a1225c4e..921ab1f7825d 100644 ---- a/scripts/Makefile -+++ b/scripts/Makefile -@@ -4,6 +4,7 @@ - # the kernel for the build process. - # --------------------------------------------------------------------------- - # kallsyms: Find all symbols in vmlinux -+# kmodsdt: Post-process module .o files for SDT probes - - HOST_EXTRACFLAGS += -I$(srctree)/tools/include - -@@ -15,6 +16,7 @@ always-$(CONFIG_ASN1) += asn1_compiler - always-$(CONFIG_MODULE_SIG_FORMAT) += sign-file - always-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += extract-cert - always-$(CONFIG_SYSTEM_EXTRA_CERTIFICATE) += insert-sys-cert -+always-$(CONFIG_DTRACE) += kmodsdt - - kallsyms-objs := kallsyms.o - -@@ -40,6 +42,11 @@ HOSTCFLAGS_sorttable.o += -DUNWINDER_ORC_ENABLED - HOSTLDLIBS_sorttable = -lpthread - endif - -+ifeq ($(CONFIG_DTRACE),y) -+HOSTCFLAGS_kmodsdt.o := -I$(srctree)/include/generated -+HOSTLDLIBS_kmodsdt := -lelf -+endif -+ - hostprogs := $(always-y) $(always-m) - - # The following programs are only built on demand -diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal -index abe9dcc72bc0..2b791905e987 100644 ---- a/scripts/Makefile.modfinal -+++ b/scripts/Makefile.modfinal -@@ -2,7 +2,7 @@ - # =========================================================================== - # Module final link and CTF generation - # =========================================================================== --# 1) compile all <module>.mod.c files -+# 1) compute SDT offsets, generate SDT stubs, and compile all .mod.c files - # 2) for external modules, generate CTF for the module (there is an extra, - # externally-invoked target that does this for the entire kernel but does - # not invoke the rst of the module-building process) -@@ -32,11 +32,49 @@ modname = $(notdir $(@:.mod.o=)) - part-of-module = y - - quiet_cmd_cc_o_c = CC [M] $@ -- cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $< -+ cmd_cc_o_c = $(CC) $(c_flags) -I$(dir $@) -c -o $@ $< -+ -+quiet_cmd_as_o_S = AS $@ -+ cmd_as_o_S = $(CC) $(a_flags) -c -o $@ $< -+ -+ifdef CONFIG_DTRACE -+ -+sdtgen = $(srctree)/scripts/dtrace_sdt.sh -+ -+quiet_cmd_sdtinfo = SDTINF $@ -+ cmd_sdtinfo = $(sdtgen) sdtinfo $@ $< kmod -+ -+quiet_cmd_sdtstub = SDTSTB $@ -+ cmd_sdtstub = $(sdtgen) sdtstub $@ $< -+ -+# We depend on the .mod.c file to ensure that modpost runs before sdtinfo. -+$(modules:.ko=.sdtinfo.c): %.sdtinfo.c: %.o %.mod.c -+ $(call cmd,sdtinfo) -+ -+# We depend on the sdtinfo file because info generation rewrites the .o, -+# while sdtstubs reads it. -+$(modules:.ko=.sdtstub.S) : %.sdtstub.S: %.o %.sdtinfo.c -+ $(call cmd,sdtstub) -+ -+%.mod.o: %.mod.c %.sdtinfo.c FORCE -+ $(call if_changed_dep,cc_o_c) -+ -+$(modules:.ko=.sdtstub.o): %.sdtstub.o: %.sdtstub.S -+ $(call if_changed,as_o_S) -+ -+module-sdt-modular-prereq = %.sdtstub.o -+sdtinfo-prereq = $(modules:.ko=.sdtinfo.c) -+ -+else - - %.mod.o: %.mod.c FORCE - $(call if_changed_dep,cc_o_c) - -+module-sdt-modular-prereq = -+sdtinfo-prereq = -+ -+endif -+ - # Generate CTF for the entire kernel, or for the module alone if this is a - # build of an external module. - -@@ -95,7 +133,7 @@ $(1) $(wordlist 1,1024,$(2)) - $(if $(word 1025,$(2)),$(call xargs,$(1),$(wordlist 1025,$(words $(2)),$(2)))) - endef - --$(ctf-filelist-raw): $(ctf-builtins-prereq) $(ctf-modules) -+$(ctf-filelist-raw): $(ctf-builtins-prereq) $(ctf-modules) | $(sdtinfo-prereq) - @rm -f $(ctf-filelist-raw); - @if [[ -n "$(ctf-dir-mk)" ]]; then \ - mkdir -p "$(ctf-dir-mk)"; \ -@@ -123,7 +161,8 @@ vmlinux.ctfa: $(ctf-filelist) - else - - # The CTF depends on the output CTF file list, and that depends --# on the .o files for the modules -+# on the .o files for the modules, and on the sdtinfo files, if any -+# (for the same reason that the sdtstub does). - $(ctf-stamp): $(ctf-filelist) - $(call if_changed,ctf) - @shopt -s nullglob; \ -@@ -171,7 +210,7 @@ quiet_cmd_ld_ko_o = LD [M] $@ - $(OBJCOPY) $(module-ctf-flags) $@.tmp $@ && rm -f $@.tmp ; \ - $(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true) - --$(modules): %.ko: %.o %.mod.o $(KBUILD_LDS_MODULE) $(module-ctfs-modular-prereq) FORCE -+$(modules): %.ko: %.o %.mod.o $(KBUILD_LDS_MODULE) $(module-sdt-modular-prereq) $(module-ctfs-modular-prereq) FORCE - $(call cmd_touch_ctf) - +$(call if_changed,ld_ko_o) - -diff --git a/scripts/dtrace_sdt.sh b/scripts/dtrace_sdt.sh -new file mode 100755 -index 000000000000..c55cdfee17a1 ---- /dev/null -+++ b/scripts/dtrace_sdt.sh -@@ -0,0 +1,588 @@ -+#!/bin/sh -+# SPDX-License-Identifier: GPL-2.0 -+ -+LANG=C -+ -+# -+# Syntax: -+# dtrace_sdt.sh sdtstub <S-file> <o-file>+ -+# This is used to generate DTrace SDT probe stubs based on one -+# or more object file(s). The stubs are written to <S-file>. -+# dtrace_sdt.sh sdtinfo <c-file> <o-file> kmod -+# This is used to generate DTrace SDT probe definitions for a -+# kmod .o file. The output is written to <c-file>. -+# dtrace_sdt.sh sdtinfo <S-file> <l-file> -+# This is used to generate DTrace SDT probe definitions for a -+# linked kernel image file <l-file>. The output is written to -+# <S-file>. -+# -+ -+opr="$1" -+shift -+if [ -z "$opr" ]; then -+ echo "ERROR: Missing operation" > /dev/stderr -+ exit 1 -+fi -+ -+tfn="$1" -+shift -+if [ -z "$tfn" ]; then -+ echo "ERROR: Missing target filename" > /dev/stderr -+ exit 1 -+fi -+ -+ofn="$1" -+tok="$2" -+ -+if [ -z "$ofn" ]; then -+ echo "ERROR: Missing object file argument" > /dev/stderr -+ exit 1 -+fi -+ -+if [ "$opr" = "sdtstub" ]; then -+ ${NM} -u $* | grep -E '__dtrace_(probe|isenabled)_' | sort | uniq | \ -+ ${AWK} -v arch=${ARCH} \ -+ '{ -+ printf("\t.globl %s\n\t.type %s,@function\n%s:\n", -+ $2, $2, $2); -+ count++; -+ } -+ -+ END { -+ if (count) { -+ if (arch == "x86" || arch == "x86_64") { -+ print "\txor %eax,%eax"; -+ print "\tretq"; -+ } else if (arch == "sparc" || arch == "sparc64") { -+ print "\tretl"; -+ print "\tnop"; -+ } else if (arch == "arm" || arch == "arm64") { -+ print "\tmov w0, #0x0"; -+ print "\tret"; -+ } -+ } -+ }' > $tfn -+ exit $? -+fi -+ -+if [ "$opr" != "sdtinfo" ]; then -+ echo "ERROR: Invalid operation, should be sdtstub or sdtinfo" > /dev/stderr -+ exit 1 -+fi -+ -+if [ "$tok" = "kmod" ]; then -+ # Pre-process the object file to handle any local functions that contain -+ # SDT probes. -+ scripts/kmodsdt ${ofn} -+ -+ # Output all function symbols in the symbol table of the object file. -+ # Subsequently, output all relocation records for DTrace SDT probes. The -+ # probes are identified by either a __dtrace_probe_ or __dtrace_isenabled_ -+ # prefix. -+ # -+ # We sort the output primarily based on the section, using the value (or -+ # offset) as secondary sort criterion The overall result is that the -+ # output will be structured as a list of functions, and for any functions -+ # that contain DTrace SDT probes, relocation records will follow the -+ # function entry they are associated with. -+ # -+ # Relocations are reported by objdump per section, with a header line -+ # documenting the specific section being reported: -+ # RELOCATION RECORDS FOR [<section>]: -+ # This is followed by a column header line, and a list of relocations. -+ # The relocations are listed with 3 tokens per line: -+ # <offset> <type> <value> -+ # -+ # Three different types can show up in the output (all with 4 tokens): -+ # <section> <offset> F <value> -+ # Function within a section at a specific offset. -+ # (See STAGE 3a below.) -+ # <section> <offset> G <value> -+ # Global alias for a local function within a section at a specific -+ # offset. A function can only have one alias, and there cannot be -+ # an alias without its respective function. -+ # (See STAGE 3a below.) -+ # <section> <offset> R <value> -+ # Relocation within a section at a specific offset. -+ # (See STAGE 3b below.) -+ # -+ ${OBJDUMP} -tr ${ofn} | \ -+ awk '/^RELOC/ { -+ sect = substr($4, 2, length($4) - 3); -+ if (sect ~ /^\.(exit|init|meminit)\.text/) -+ sect = 0; -+ -+ next; -+ } -+ -+ sect && /__dtrace_probe_/ { -+ $3 = substr($3, 16); -+ sub(/[\-+].*$/, "", $3); -+ print sect " " $1 " R " $3; -+ next; -+ } -+ -+ sect && /__dtrace_isenabled_/ { -+ $3 = substr($3, 20); -+ sub(/[\-+].*$/, "", $3); -+ print sect " " $1 " R ?" $3; -+ next; -+ } -+ -+ /file format/ { -+ next; -+ } -+ -+ / F / { -+ if ($4 ~ /^\.(exit|init|meminit)\.text/) -+ next; -+ -+ if ($6 == ".hidden") -+ print $4 " " $1 " G " $7; -+ else -+ print $4 " " $1 " F " $6; -+ }' | \ -+ sort -k1,2 | \ -+ awk -v arch=${ARCH} \ -+ 'function subl(v0, v1, v0h, v0l, v1h, v1l, d, tmp) { -+ tmp = $0; -+ if (length(v0) > 8) { -+ d = length(v0); -+ v0h = strtonum("0x"substr(v0, 1, d - 8)); -+ v0l = strtonum("0x"substr(v0, d - 8 + 1)); -+ d = length(v1); -+ v1h = strtonum("0x"substr(v1, 1, d - 8)); -+ v1l = strtonum("0x"substr(v1, d - 8 + 1)); -+ -+ if (v0l >= v1l) { -+ if (v0h >= v1h) { -+ d = sprintf("%08x%08x", v0h - v1h, v0l - v1l); -+ } else { -+ printf "#error Invalid addresses: %s - %s\n", v0, v1 \ -+ >"/dev/stderr"; -+ errc++; -+ } -+ } else { -+ if (v0h > v1h) { -+ v0h--; -+ v0l += 4294967296; -+ d = sprintf("%08x%08x", v0h - v1h, v0l - v1l); -+ } else { -+ printf "#error Invalid addresses: %s - %s\n", v0, v1 \ -+ >"/dev/stderr"; -+ errc++; -+ } -+ } -+ } else { -+ v0 = strtonum("0x"v0); -+ v1 = strtonum("0x"v1); -+ d = sprintf("%016x", v0 - v1); -+ } -+ $0 = tmp; -+ -+ return d; -+ } -+ -+ BEGIN { -+ print "#include <linux/sdt.h>"; -+ -+ probec = 0; -+ } -+ -+ # -+ # Process a symbol table definition for a function in the object -+ # file ($ofn). As we pass through the symbol table, we record the -+ # function name, address, and symbol table index or alias. This -+ # information is needed for any potential DTrace probes that may exist -+ # in the function. They will be listed in relocation records -+ # subsequent to this function definition (and are processed in the -+ # next action block). -+ # -+ NF == 4 && $3 == "F" { -+ fname = $4; -+ sub(/\..*$/, "", fname); -+ alias = $4; -+ faddr = $2; -+ sub(/^0+/, "", faddr); -+ -+ next; -+ } -+ -+ NF == 4 && $3 == "G" { -+ alias = $4; -+ -+ next; -+ } -+ -+ # -+ # Process a relocation record associated with the preceding function. -+ # -+ # For kernel modules: -+ # Convert the section offset into an offset in the function where the -+ # DTrace probe is located, i.e. an offset from the start of the -+ # function. This will be resolved in an absolute address at runtime -+ # when the module is loaded. -+ # -+ NF == 4 && $3 == "R" { -+ sub(/^0+/, "", $2); -+ -+ addr = subl($2, faddr); -+ -+ if (arch == "x86" || arch == "x86_64") -+ addr = subl(addr, 1); -+ -+ protom[alias] = 1; -+ probev[probec] = sprintf(" {\042%s\042, \042%s\042 /* %s */, 0 /* sdt_args string */, (uintptr_t)%s+0x%s },", $4, fname, $1, alias, addr); -+ probec++; -+ -+ next; -+ } -+ -+ END { -+ if (probec > 0) { -+ for (alias in protom) -+ printf "extern void %s(void);\n", alias; -+ print "\nstatic struct sdt_probedesc\t_sdt_probes[] = {"; -+ for (i = 0; i < probec; i++) -+ print probev[i]; -+ print "};\n"; -+ } else -+ print "#define _sdt_probes\tNULL"; -+ -+ print "#define _sdt_probec\t" probec; -+ -+ exit(errc == 0 ? 0 : 1); -+ }' > $tfn -+else -+ # For a linked kernel (with relocation data), the scope of the DTrace SDT -+ # probe discovery can be limited to CODE sections that are not included in -+ # the init or exit code sections. -+ # -+ # First the sections records are parsed to order to determine the base -+ # address for each relevant section. -+ # -+ # Subsequently, all function symbols that are located in the sections we -+ # care about are read from the symbol table of the linked kernel object. -+ # Each symbol is reported in the output stream with its section name, -+ # address, a token identifying it as a function (or alias), and its name. -+ # -+ # Finally, each relocation record from relevant sections that relates to -+ # SDT probes are written to the output stream with its section name, -+ # address, a token # identifying it as a relocation, and its name. Probes -+ # are identified in the relocation records as symbols with either a -+ # __dtrace_probe_ or __dtrace_isenabled_ prefix. -+ # -+ # We sort the output based on the section name and address, ensuring that -+ # the output will be a list of functions, and each function record will be -+ # followed immediately by any DTrace SDT probe records that are used in -+ # that function. -+ # -+ # Three different record types can show up in the output (4 tokens each): -+ # <section> <address> F <name> -+ # Named function at a specific address. -+ # <section> <address> G <name> -+ # Global alias for a local function at a specific offset. A -+ # function can only have one alias, and there cannot be an alias -+ # without its respective function. -+ # <section> <address> R <value> -+ # Relocation within a section at a specific address -+ # -+ ${OBJDUMP} -htr ${ofn} | \ -+ awk 'function addl(v0, v1, v0h, v0l, v1h, v1l, d, tmp) { -+ tmp = $0; -+ if (length(v0) > 8 || length(v1) > 8) { -+ d = length(v0); -+ v0h = strtonum("0x"substr(v0, 1, d - 8)); -+ v0l = strtonum("0x"substr(v0, d - 8 + 1)); -+ d = length(v1); -+ v1h = strtonum("0x"substr(v1, 1, d - 8)); -+ v1l = strtonum("0x"substr(v1, d - 8 + 1)); -+ -+ v0l += v1l; -+ v0h += v1h; -+ d = sprintf("%x", v0l); -+ if (length(d) > 8) { -+ v0h++; -+ v0l -= 4294967296; -+ } -+ d = sprintf("%x", v0h); -+ if (length(d) <= 8) { -+ d = sprintf("%08x%08x", v0h, v0l); -+ } else { -+ printf "#error Invalid addresses: %s + %s\n", v0, v1 \ -+ >"/dev/stderr"; -+ errc++; -+ } -+ } else { -+ v0 = strtonum("0x"v0); -+ v1 = strtonum("0x"v1); -+ d = sprintf("%016x", v0 + v1); -+ } -+ $0 = tmp; -+ -+ return d; -+ } -+ -+ function subl(v0, v1, v0h, v0l, v1h, v1l, d, tmp) { -+ tmp = $0; -+ if (length(v0) > 8) { -+ d = length(v0); -+ v0h = strtonum("0x"substr(v0, 1, d - 8)); -+ v0l = strtonum("0x"substr(v0, d - 8 + 1)); -+ d = length(v1); -+ v1h = strtonum("0x"substr(v1, 1, d - 8)); -+ v1l = strtonum("0x"substr(v1, d - 8 + 1)); -+ -+ if (v0l >= v1l) { -+ if (v0h >= v1h) { -+ d = sprintf("%08x%08x", v0h - v1h, v0l - v1l); -+ } else { -+ printf "#error Invalid addresses: %s - %s\n", v0, v1 \ -+ >"/dev/stderr"; -+ errc++; -+ } -+ } else { -+ if (v0h > v1h) { -+ v0h--; -+ v0l += 4294967296; -+ d = sprintf("%08x%08x", v0h - v1h, v0l - v1l); -+ } else { -+ printf "#error Invalid addresses: %s - %s\n", v0, v1 \ -+ >"/dev/stderr"; -+ errc++; -+ } -+ } -+ } else { -+ v0 = strtonum("0x"v0); -+ v1 = strtonum("0x"v1); -+ d = sprintf("%016x", v0 - v1); -+ } -+ $0 = tmp; -+ -+ return d; -+ } -+ -+ NF == 7 && $2 !~ /^\.(exit|init|meminit)\.text/ { -+ snam = $2; -+ addr = $4; -+ -+ getline; -+ if (/CODE/) -+ base[snam] = addr; -+ -+ next; -+ } -+ -+ NF == 5 && $2 == "g" && $NF == "_stext" { -+ print ". " $1 " B _stext"; -+ next; -+ } -+ -+ /^RELOC/ { -+ snam = substr($4, 2, length($4) - 3); -+ if (snam in base) -+ in_reloc = 1; -+ else -+ in_reloc = 0; -+ next; -+ } -+ -+ in_reloc && /__dtrace_probe_/ { -+ $3 = substr($3, 16); -+ sub(/[\-+].*$/, "", $3); -+ print snam " " addl(base[snam], $1) " R " $3; -+ next; -+ } -+ -+ in_reloc && /__dtrace_isenabled_/ { -+ $3 = substr($3, 20); -+ sub(/[\-+].*$/, "", $3); -+ print snam " " addl(base[snam], $1) " R ?" $3; -+ next; -+ } -+ -+ / F / { -+ if (!($4 in base)) -+ next; -+ -+ if ($6 == ".hidden") -+ print $4 " " $1 " G " $7; -+ else -+ print $4 " " $1 " F " $6; -+ }' | \ -+ sort -k2 | \ -+ awk -v arch=${ARCH} \ -+ 'function subl(v0, v1, v0h, v0l, v1h, v1l, d, tmp) { -+ tmp = $0; -+ if (length(v0) > 8) { -+ d = length(v0); -+ v0h = strtonum("0x"substr(v0, 1, d - 8)); -+ v0l = strtonum("0x"substr(v0, d - 8 + 1)); -+ d = length(v1); -+ v1h = strtonum("0x"substr(v1, 1, d - 8)); -+ v1l = strtonum("0x"substr(v1, d - 8 + 1)); -+ -+ if (v0l >= v1l) { -+ if (v0h >= v1h) { -+ d = sprintf("%08x%08x", v0h - v1h, v0l - v1l); -+ } else { -+ printf "#error Invalid addresses: %x vs %x", v0, v1 \ -+ >"/dev/stderr"; -+ errc++; -+ } -+ } else { -+ printf "#error Invalid addresses: %x vs %x", v0, v1 \ -+ >"/dev/stderr"; -+ errc++; -+ } -+ } else { -+ v0 = strtonum("0x"v0); -+ v1 = strtonum("0x"v1); -+ d = sprintf("%016x", v0 - v1); -+ } -+ $0 = tmp; -+ -+ return d; -+ } -+ -+ function map_string(str, off) { -+ if (str in strmap) -+ off = strmap[str]; -+ else { -+ off = strsz; -+ strmap[str] = strsz; -+ strv[strc++] = str; -+ strsz += length(str) + 1; -+ } -+ -+ return off; -+ } -+ -+ BEGIN { -+ print "#include <asm/types.h>"; -+ print "#if BITS_PER_LONG == 64"; -+ print "# define PTR .quad"; -+ if (arch == "aarch64") -+ print "# define ALGN .align 3"; -+ else -+ print "# define ALGN .align 8"; -+ print "#else"; -+ print "# define PTR .long"; -+ if (arch == "aarch64") -+ print "# define ALGN .align 2"; -+ else -+ print "# define ALGN .align 4"; -+ print "#endif"; -+ -+ print "\t.section .rodata, \042a\042"; -+ print ""; -+ -+ print ".globl dtrace_sdt_probes"; -+ print "\tALGN"; -+ print "dtrace_sdt_probes:"; -+ -+ probec = 0; -+ stroff = 0; -+ strc = 0; -+ } -+ -+ -+ # -+ # Record the _stext address so probe locations can be expressed -+ # relative to that address. -+ # -+ NF == 4 && $1 == "." && $4 == "_stext" { -+ stext = $2; -+ next; -+ } -+ -+ # -+ # Process a symbol table definition for a function in the .text -+ # section of the kernel image. We record the function name and -+ # the address, and pre-populate the alias name with the function -+ # name. -+ # -+ # We also compare the address of the current symbol to the last -+ # recorded address, and if they are the same, we do not increment -+ # the function count. -+ # -+ NF == 4 && $3 == "F" { -+ faddr = $2; -+ fname = $4; -+ sub(/\..*$/, "", fname); -+ alias = $4; -+ -+ if ($2 != prev) -+ funcc++; -+ prev = $2; -+ -+ next; -+ } -+ -+ # -+ # When we encounter an alias symbol, we record the name. -+ # -+ NF == 4 && $3 == "G" { -+ alias = $4; -+ -+ next; -+ } -+ -+ # -+ # Process a relocation record associated with the preceding function. -+ # -+ # The address was resolved earlier, so we can simply generate the -+ # numeric information for the SDT probe information record. The -+ # text information (probe name and function name) are stored. This -+ # allows us to weed out duplicates, and it is necessary because the -+ # data blob with all the strings will be written to output later. -+ # -+ NF == 4 && $3 == "R" { -+ sub(/^0+/, "", $2); -+ -+ addr = subl($2, stext); -+ -+ # -+ # On x86, relocations point to the 2nd byte of a call instruction -+ # so we need to adjust the address. -+ # -+ if (arch == "x86" || arch == "x86_64") -+ addr = subl(addr, 1); -+ -+ print "/*"; -+ print " * " $1 " " faddr " F " fname; -+ print " * " $0; -+ print " */"; -+ printf "\tPTR\t_stext + 0x%s\n", addr; -+ printf "\tPTR\t%d\n", map_string($4); -+ printf "\tPTR\t%d\n", map_string(fname); -+ -+ probec++; -+ -+ next; -+ } -+ -+ END { -+ print ""; -+ print ".globl dtrace_sdt_strings"; -+ print "\tALGN"; -+ print "dtrace_sdt_strings:"; -+ -+ -+ for (i = 0; i < strc; i++) -+ printf "\t.asciz\t\042%s\042\n", strv[i]; -+ -+ print ""; -+ print ".globl dtrace_sdt_nprobes"; -+ print ".globl dtrace_fbt_nfuncs"; -+ print "\tALGN"; -+ print "dtrace_sdt_nprobes:"; -+ printf "\tPTR\t%d\n", probec; -+ print "dtrace_fbt_nfuncs:"; -+ printf "\tPTR\t%d\n", funcc; -+ -+ exit(errc == 0 ? 0 : 1); -+ }' > $tfn -+fi -+ -+exit $? -diff --git a/scripts/kmodsdt.c b/scripts/kmodsdt.c -new file mode 100644 -index 000000000000..1e35794467d7 ---- /dev/null -+++ b/scripts/kmodsdt.c -@@ -0,0 +1,410 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Copyright 2016 Oracle, Inc. All rights reserved. -+ * Use is subject to license terms. -+ */ -+ -+#include "../include/generated/autoconf.h" -+ -+#define ELF_TARGET_ALL -+#include <elf.h> -+#include <gelf.h> -+ -+#include <sys/types.h> -+ -+#include <unistd.h> -+#include <string.h> -+#include <limits.h> -+#include <stddef.h> -+#include <stdlib.h> -+#include <stdio.h> -+#include <fcntl.h> -+#include <errno.h> -+#include <assert.h> -+ -+typedef struct symtbl { -+ struct symtbl *next; -+ void *strtab; -+ void *symtab; -+} symtbl_t; -+ -+static int -+dt_elf_symtab_lookup(Elf_Data *data_sym, int nsym, uintptr_t addr, uint32_t shn, -+ GElf_Sym *sym) -+{ -+ int i, ret = -1; -+ GElf_Sym s; -+ -+ for (i = 0; i < nsym && gelf_getsym(data_sym, i, sym) != NULL; i++) { -+ if (GELF_ST_TYPE(sym->st_info) == STT_FUNC && -+ shn == sym->st_shndx && -+ sym->st_value <= addr && -+ addr < sym->st_value + sym->st_size) { -+ if (GELF_ST_BIND(sym->st_info) == STB_GLOBAL) -+ return i; -+ -+ ret = i; -+ s = *sym; -+ } -+ } -+ -+ if (ret >= 0) -+ *sym = s; -+ return ret; -+} -+ -+static int -+process_obj(const char *obj) -+{ -+ static const char dt_ppref[] = "__dtrace_probe_"; -+ static const char dt_spref[] = "__dta_"; -+ int fd, i, sidx, mod = 0; -+ Elf *elf = NULL; -+ GElf_Ehdr ehdr; -+ Elf_Scn *scn_rel, *scn_sym, *scn_str; -+ Elf_Data *data_rel, *data_sym, *data_str; -+ GElf_Shdr shdr_rel, shdr_sym, shdr_str; -+ GElf_Sym rsym, fsym, dsym; -+ GElf_Rela rela; -+ char *p, *r, *f, *a; -+ uint32_t eclass, emachine1, emachine2; -+ size_t symsize, nsym, nstr, isym, istr, osym, len; -+ symtbl_t *pair, *bufs = NULL; -+ char **alttab; -+ const char *elferrstr = "no error"; -+ -+ fd = open(obj, O_RDWR); -+ if (fd == -1) { -+ fprintf(stderr, "failed to open %s: %s\n", obj, -+ strerror(errno)); -+ return 1; -+ } -+ -+ if (elf_version(EV_CURRENT) == EV_NONE) { -+ fprintf(stderr, "ELF library version too old\n"); -+ return 1; -+ } -+ -+ elf = elf_begin(fd, ELF_C_RDWR, NULL); -+ if (elf == NULL) { -+ fprintf(stderr, "failed to process %s: %s\n", obj, -+ elf_errmsg(elf_errno())); -+ return 1; -+ } -+ -+ switch (elf_kind(elf)) { -+ case ELF_K_ELF: -+ break; -+ case ELF_K_AR: -+ fprintf(stderr, "archives are not permitted; %s\n", obj); -+ return 1; -+ default: -+ fprintf(stderr, "invalid file type: %s\n", obj); -+ return 1; -+ } -+ -+ if (gelf_getehdr(elf, &ehdr) == NULL) { -+ fprintf(stderr, "corrupt file: %s\n", obj); -+ return 1; -+ } -+ -+#ifdef CONFIG_64BIT -+ eclass = ELFCLASS64; -+# if defined(__sparc) -+ emachine1 = emachine2 = EM_SPARCV9; -+# elif defined(__i386) || defined(__amd64) -+ emachine1 = emachine2 = EM_X86_64; -+# elif defined(__aarch64__) -+ emachine1 = emachine2 = EM_AARCH64; -+# endif -+ symsize = sizeof(Elf64_Sym); -+#else -+ eclass = ELFCLASS32; -+# if defined(__sparc) -+ emachine1 = EM_SPARC; -+ emachine2 = EM_SPARC32PLUS; -+# elif defined(__i386) || defined(__amd64) -+ emachine1 = emachine2 = EM_386; -+# elif defined(__arm__) -+ emachine1 = emachine2 = EM_ARM; -+# endif -+ symsize = sizeof(Elf32_Sym); -+#endif -+ -+ if (ehdr.e_ident[EI_CLASS] != eclass) { -+ fprintf(stderr, "incorrect ELF class for %s: %d " -+ "(expected %d)\n", obj, ehdr.e_ident[EI_CLASS], -+ eclass); -+ return 1; -+ } -+ if (ehdr.e_machine != emachine1 && ehdr.e_machine != emachine2) { -+ fprintf(stderr, "incorrect ELF machine type for %s: %d " -+ "(expected %d or %d)\n", -+ obj, ehdr.e_machine, emachine1, emachine2); -+ return 1; -+ } -+ -+ scn_rel = NULL; -+ while ((scn_rel = elf_nextscn(elf, scn_rel)) != NULL) { -+ if (gelf_getshdr(scn_rel, &shdr_rel) == NULL) { -+ elferrstr = "failed to get section header"; -+ goto elf_err; -+ } -+ -+ /* -+ * Skip any non-relocation sections. -+ */ -+ if (shdr_rel.sh_type != SHT_RELA && shdr_rel.sh_type != SHT_REL) -+ continue; -+ -+ data_rel = elf_getdata(scn_rel, NULL); -+ if (data_rel == NULL) { -+ elferrstr = "failed to get relocation data"; -+ goto elf_err; -+ } -+ -+ /* -+ * Grab the section, section header and section data for the -+ * symbol table that this relocation section references. -+ */ -+ scn_sym = elf_getscn(elf, shdr_rel.sh_link); -+ if (scn_sym == NULL || -+ gelf_getshdr(scn_sym, &shdr_sym) == NULL || -+ (data_sym = elf_getdata(scn_sym, NULL)) == NULL) { -+ elferrstr = "failed to get symbol table"; -+ goto elf_err; -+ } -+ -+ /* -+ * Ditto for that symbol table's string table. -+ */ -+ scn_str = elf_getscn(elf, shdr_sym.sh_link); -+ if (scn_str == NULL || -+ gelf_getshdr(scn_str, &shdr_str) == NULL || -+ (data_str = elf_getdata(scn_str, NULL)) == NULL) { -+ elferrstr = "failed to get string table"; -+ goto elf_err; -+ } -+ -+ /* -+ * We're looking for relocations to symbols matching this form: -+ * -+ * __dtrace_probe_<probe> -+ * -+ * If the function containing the probe is locally scoped -+ * (static), we create an alias. The alias, a new symbol, -+ * will be global (so that it can be referenced from sdtinfo -+ * entries) and hidden (so that it is converted to a local -+ * symbol at link time). Such aliases have this form: -+ * -+ * __dta_<function>_<symindex> -+ * -+ * The <symindex> is appended to ensure that aliases are unique -+ * because they are referenced in global scope. Two local -+ * functions with identical names need to be distrinct at the -+ * level of the aliases. -+ * -+ * We take a first pass through all the relocations to -+ * populate our string table and count the number of extra -+ * symbols we'll require. Note that the <function> is -+ * sanitized to ensure that it is a valid C identifier, i.e. -+ * any periods in the name are converted to underscores. -+ */ -+ isym = osym = data_sym->d_size / symsize; -+ istr = data_str->d_size; -+ -+ /* -+ * Allocate the alias table to be the exact same size as the -+ * symtab. If an alias is required for a specific symbol, its -+ * corresponding entry in this alias table will contain the -+ * alias name. Otherwise, the entry will be NULL. -+ */ -+ alttab = (char **)calloc(isym, sizeof(char *)); -+ -+ nsym = 0; -+ nstr = 0; -+ -+ for (i = 0; i < shdr_rel.sh_size / shdr_rel.sh_entsize; i++) { -+ if (shdr_rel.sh_type == SHT_RELA) { -+ if (gelf_getrela(data_rel, i, &rela) == NULL) -+ continue; -+ } else { -+ GElf_Rel rel; -+ -+ if (gelf_getrel(data_rel, i, &rel) == NULL) -+ continue; -+ rela.r_offset = rel.r_offset; -+ rela.r_info = rel.r_info; -+ rela.r_addend = 0; -+ } -+ -+ if (gelf_getsym(data_sym, GELF_R_SYM(rela.r_info), -+ &rsym) == NULL) { -+ elferrstr = "relocation symbol not found"; -+ goto elf_err; -+ } -+ -+ assert(rsym.st_name < data_str->d_size); -+ -+ r = (char *)data_str->d_buf + rsym.st_name; -+ if (strncmp(r, dt_ppref, sizeof(dt_ppref) - 1) != 0) -+ continue; -+ -+ sidx = dt_elf_symtab_lookup(data_sym, isym, -+ rela.r_offset, -+ shdr_rel.sh_info, &fsym); -+ if (sidx < 0) { -+ fprintf(stderr, "relocation %x not in " -+ "function\n", i); -+ goto err; -+ } -+ -+ assert(fsym.st_name < data_str->d_size); -+ assert(GELF_ST_TYPE(fsym.st_info) == STT_FUNC); -+ -+ if (GELF_ST_BIND(fsym.st_info) != STB_LOCAL) -+ continue; -+ -+ f = (char *)data_str->d_buf + fsym.st_name; -+ -+ if (alttab[sidx] != NULL) -+ continue; -+ -+ len = snprintf(NULL, 0, "%s%s_%d", dt_spref, f, sidx) -+ + 1; -+ a = malloc(len); -+ assert(a != NULL); -+ nstr += snprintf(a, len, "%s%s_%d", dt_spref, f, sidx) -+ + 1; -+ for (p = a; *p != '\0'; p++) { -+ if (*p == '.') -+ *p = '_'; -+ } -+ alttab[sidx] = a; -+ nsym++; -+ } -+ -+ if (!nsym) { -+ free(alttab); -+ continue; -+ } -+ -+ pair = malloc(sizeof(symtbl_t)); -+ if (pair == NULL) { -+ fprintf(stderr, "failed to alloc new symtbl\n"); -+ goto err; -+ } -+ pair->strtab = malloc(data_str->d_size + nstr); -+ if (pair->strtab == NULL) { -+ fprintf(stderr, "failed to alloc new symtbl->strtab\n"); -+ free(pair); -+ goto err; -+ } -+ pair->symtab = malloc(data_sym->d_size + nsym * symsize); -+ if (pair->symtab == NULL) { -+ fprintf(stderr, "failed to alloc new symtbl->symtab\n"); -+ free(pair->strtab); -+ free(pair); -+ goto err; -+ } -+ -+ pair->next = bufs; -+ bufs = pair; -+ -+ memcpy(pair->strtab, data_str->d_buf, data_str->d_size); -+ data_str->d_buf = pair->strtab; -+ data_str->d_size += nstr; -+ elf_flagdata(data_str, ELF_C_SET, ELF_F_DIRTY); -+ shdr_str.sh_size += nstr; -+ gelf_update_shdr(scn_str, &shdr_str); -+ -+ memcpy(pair->symtab, data_sym->d_buf, data_sym->d_size); -+ data_sym->d_buf = pair->symtab; -+ data_sym->d_size += nsym * symsize; -+ elf_flagdata(data_sym, ELF_C_SET, ELF_F_DIRTY); -+ shdr_sym.sh_size += nsym * symsize; -+ gelf_update_shdr(scn_sym, &shdr_sym); -+ -+ nsym += isym; -+ -+ /* -+ * Now that the tables have been allocated, add the aliases as -+ * described above. Since we already know the symtab index of -+ * the symbol that the alias refers to, we can simply run down -+ * the alttab and add alias for any non-NULL entries. -+ */ -+ for (i = 1; i < osym; i++) { -+ if (alttab[i] == NULL) -+ continue; -+ -+ if (gelf_getsym(data_sym, i, &fsym) == NULL) { -+ fprintf(stderr, "failed to get symbol %d: %s\n", -+ i, elf_errmsg(elf_errno())); -+ goto err; -+ } -+ -+ assert(GELF_ST_TYPE(fsym.st_info) == STT_FUNC); -+ assert(GELF_ST_BIND(fsym.st_info) == STB_LOCAL); -+ /* -+ * Add the alias as a new symbol to the symtab. -+ */ -+ dsym = fsym; -+ dsym.st_name = istr; -+ dsym.st_info = GELF_ST_INFO(STB_GLOBAL, STT_FUNC); -+ dsym.st_other = ELF64_ST_VISIBILITY(STV_HIDDEN); -+ -+ len = strlen(alttab[i]) + 1; -+ assert(istr + len <= data_str->d_size); -+ a = (char *)data_str->d_buf + istr; -+ memcpy(a, alttab[i], len); -+ -+ gelf_update_sym(data_sym, isym, &dsym); -+ istr += len; -+ isym++; -+ -+ assert(isym <= nsym); -+ -+ mod = 1; -+ -+ free(alttab[i]); -+ } -+ -+ free(alttab); -+ } -+ -+ if (mod && elf_update(elf, ELF_C_WRITE) == -1) { -+ elferrstr = "Failed to update ELF object"; -+ goto elf_err; -+ } -+ -+ elf_end(elf); -+ close(fd); -+ -+ while ((pair = bufs) != NULL) { -+ bufs = pair->next; -+ free(pair->strtab); -+ free(pair->symtab); -+ free(pair); -+ } -+ -+ return 0; -+ -+elf_err: -+ fprintf(stderr, "%s: %s\n", elferrstr, elf_errmsg(elf_errno())); -+err: -+ fprintf(stderr, "an error was encountered while processing %s\n", obj); -+ return 1; -+} -+ -+int -+main(int argc, char *argv[]) -+{ -+ int i; -+ -+ for (i = 1; i < argc; i++) { -+ if (process_obj(argv[i])) -+ exit(1); -+ } -+ -+ exit(0); -+} -diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh -index 67adb0959177..86540859ba94 100755 ---- a/scripts/link-vmlinux.sh -+++ b/scripts/link-vmlinux.sh -@@ -39,6 +39,34 @@ info() - fi - } - -+# Generate the SDT probe point stubs object file -+# ${1} output file -+sdtstub() -+{ -+ info SDTSTB ${1} -+ ${srctree}/scripts/dtrace_sdt.sh sdtstub .tmp_sdtstub.S \ -+ ${KBUILD_VMLINUX_OBJS} -+ -+ local aflags="${KBUILD_AFLAGS} ${KBUILD_AFLAGS_KERNEL} \ -+ ${NOSTDINC_FLAGS} ${LINUXINCLUDE} ${KBUILD_CPPFLAGS}" -+ -+ ${CC} ${aflags} -c -o ${1} .tmp_sdtstub.S -+} -+ -+# Generate the SDT probe info for kernel image ${1} -+# ${2} output file -+sdtinfo() -+{ -+ info SDTINF ${2} -+ -+ ${srctree}/scripts/dtrace_sdt.sh sdtinfo .tmp_sdtinfo.S ${1} -+ -+ local aflags="${KBUILD_AFLAGS} ${KBUILD_AFLAGS_KERNEL} \ -+ ${NOSTDINC_FLAGS} ${LINUXINCLUDE} ${KBUILD_CPPFLAGS}" -+ -+ ${CC} ${aflags} -c -o ${2} .tmp_sdtinfo.S -+} -+ - # Link of vmlinux.o used for section mismatch analysis - # ${1} output file - modpost_link() -@@ -57,16 +85,19 @@ modpost_link() - - # Link of vmlinux - # ${1} - output file --# ${2}, ${3}, ... - optional extra .o files -+# ${2} - optional extra ld flag(s) -+# ${3}, ${4}, ... - optional extra .o files - vmlinux_link() - { - local lds="${objtree}/${KBUILD_LDS}" - local output=${1} -+ local flags="${2}" - local objects - - info LD ${output} - -- # skip output file argument -+ # skip output file and flags arguments -+ shift - shift - - if [ "${SRCARCH}" != "um" ]; then -@@ -80,7 +111,7 @@ vmlinux_link() - ${@}" - - ${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux} \ -- -o ${output} \ -+ ${flags} -o ${output} \ - -T ${lds} ${objects} - else - objects="-Wl,--whole-archive \ -@@ -93,7 +124,7 @@ vmlinux_link() - ${@}" - - ${CC} ${CFLAGS_vmlinux} \ -- -o ${output} \ -+ ${flags} -o ${output} \ - -Wl,-T,${lds} \ - ${objects} \ - -lutil -lrt -lpthread -@@ -190,7 +221,7 @@ kallsyms_step() - kallsymso=.tmp_kallsyms${1}.o - kallsyms_vmlinux=.tmp_vmlinux${1} - -- vmlinux_link ${kallsyms_vmlinux} "${kallsymso_prev}" ${btf_vmlinux_bin_o} -+ vmlinux_link ${kallsyms_vmlinux} "${2:-}" "${kallsymso_prev}" ${btf_vmlinux_bin_o} ${sdtstubo} ${sdtinfoo} - kallsyms ${kallsyms_vmlinux} ${kallsymso} - } - -@@ -212,6 +243,8 @@ cleanup() - rm -f .btf.* - rm -f .tmp_System.map - rm -f .tmp_kallsyms* -+ rm -f .tmp_sdtstub.* -+ rm -f .tmp_sdtinfo.* - rm -f .tmp_vmlinux* - rm -f System.map - rm -f vmlinux -@@ -262,6 +295,14 @@ fi; - # final build of init/ - ${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init need-builtin=1 - -+sdtstubo="" -+sdtinfoo="" -+if [ -n "${CONFIG_DTRACE}" ]; then -+ sdtstubo=.tmp_sdtstub.o -+ sdtinfoo=.tmp_sdtinfo.o -+ sdtstub ${sdtstubo} -+fi -+ - #link vmlinux.o - info LD vmlinux.o - modpost_link vmlinux.o -@@ -315,7 +356,23 @@ if [ -n "${CONFIG_KALLSYMS}" ]; then - # a) Verify that the System.map from vmlinux matches the map from - # ${kallsymso}. - -+ # step 1 -+ if [ -n "${CONFIG_DTRACE}" ]; then -+ sdtinfo vmlinux.o ${sdtinfoo} -+ fi -+ - kallsyms_step 1 -+ -+ if [ -n "${CONFIG_DTRACE}" ]; then -+ if [ -n "${CONFIG_ARM64}" ]; then -+ kallsyms_step 1 -+ else -+ kallsyms_step 1 -r -+ fi -+ sdtinfo ${kallsyms_vmlinux} ${sdtinfoo} vmlinux.o -+ fi -+ -+ # step 2 - kallsyms_step 2 - - # step 3 -@@ -327,7 +384,7 @@ if [ -n "${CONFIG_KALLSYMS}" ]; then - fi - fi - --vmlinux_link vmlinux "${kallsymso}" ${btf_vmlinux_bin_o} -+vmlinux_link vmlinux "" "${kallsymso}" ${btf_vmlinux_bin_o} ${sdtstubo} ${sdtinfoo} - - if [ -n "${CONFIG_BUILDTIME_TABLE_SORT}" ]; then - info SORTTAB vmlinux -diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c -index 55a0a2eccbd2..9de12ff930df 100644 ---- a/scripts/mod/modpost.c -+++ b/scripts/mod/modpost.c -@@ -2197,7 +2197,9 @@ static int check_exports(struct module *mod) - const char *basename; - exp = find_symbol(s->name); - if (!exp || exp->module == mod) { -- if (have_vmlinux && !s->weak) { -+ if (have_vmlinux && !s->weak && -+ !strstarts(s->name, "__dtrace_probe_") && -+ !strstarts(s->name, "__dtrace_isenabled_")) { - if (warn_unresolved) { - warn("\"%s\" [%s.ko] undefined!\n", - s->name, mod->name); -@@ -2252,11 +2254,22 @@ static int check_modname_len(struct module *mod) - **/ - static void add_header(struct buffer *b, struct module *mod) - { -+ const char *modname; -+ -+ modname = strrchr(mod->name, '/'); -+ if (modname != NULL) -+ modname++; -+ else -+ modname = mod->name; - buf_printf(b, "#include <linux/build-salt.h>\n"); - buf_printf(b, "#include <linux/module.h>\n"); - buf_printf(b, "#include <linux/vermagic.h>\n"); - buf_printf(b, "#include <linux/compiler.h>\n"); - buf_printf(b, "\n"); -+ buf_printf(b, "#ifdef CONFIG_DTRACE\n"); -+ buf_printf(b, "# include \"%s.sdtinfo.c\"\n", modname); -+ buf_printf(b, "#endif\n"); -+ buf_printf(b, "\n"); - buf_printf(b, "BUILD_SALT;\n"); - buf_printf(b, "\n"); - buf_printf(b, "MODULE_INFO(vermagic, VERMAGIC_STRING);\n"); -@@ -2272,6 +2285,10 @@ static void add_header(struct buffer *b, struct module *mod) - "\t.exit = cleanup_module,\n" - "#endif\n"); - buf_printf(b, "\t.arch = MODULE_ARCH_INIT,\n"); -+ buf_printf(b, "#ifdef CONFIG_DTRACE\n"); -+ buf_printf(b, "\t.sdt_probes = _sdt_probes,\n"); -+ buf_printf(b, "\t.sdt_probec = _sdt_probec,\n"); -+ buf_printf(b, "#endif\n"); - buf_printf(b, "};\n"); - } - --- -2.27.0 - diff --git a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0009-dtrace-sdt-provider-for-x86.patch b/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0009-dtrace-sdt-provider-for-x86.patch deleted file mode 100644 index 94a3403fc2f4..000000000000 --- a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0009-dtrace-sdt-provider-for-x86.patch +++ /dev/null @@ -1,1076 +0,0 @@ -From 664e8e3b74cb329afc3472c0fe70d51bdb679e99 Mon Sep 17 00:00:00 2001 -From: Kris Van Hees <kris.van.hees@oracle.com> -Date: Mon, 19 Nov 2018 18:00:18 +0000 -Subject: [PATCH 09/20] dtrace: sdt provider for x86 - -This implements the SDT provider itself. It is relatively -straightforward except for the code needed to parse the argument strings -ultimately derived from SDT DTRACE_PROBE invocations and perf-event -prototype definitions. - -Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com> -Signed-off-by: Nick Alcock <nick.alcock@oracle.com> -Signed-off-by: Tomas Jedlicka <tomas.jedlicka@oracle.com> -Signed-off-by: Eugene Loh <eugene.loh@oracle.com> -Signed-off-by: David Mc Lean <david.mclean@oracle.com> -Signed-off-by: Vincent Lim <vincent.lim@oracle.com> ---- - arch/x86/dtrace/Makefile.arch | 2 + - arch/x86/dtrace/sdt_x86_64.c | 126 ++++++++ - dtrace/Makefile | 2 + - dtrace/dt_test_dev.c | 33 ++ - dtrace/dt_test_mod.c | 5 + - dtrace/sdt_dev.c | 562 ++++++++++++++++++++++++++++++++++ - dtrace/sdt_impl.h | 87 ++++++ - dtrace/sdt_mod.c | 154 ++++++++++ - 8 files changed, 971 insertions(+) - create mode 100644 arch/x86/dtrace/sdt_x86_64.c - create mode 100644 dtrace/sdt_dev.c - create mode 100644 dtrace/sdt_impl.h - create mode 100644 dtrace/sdt_mod.c - -diff --git a/arch/x86/dtrace/Makefile.arch b/arch/x86/dtrace/Makefile.arch -index ffb9ef4d1722..8492eaee426d 100644 ---- a/arch/x86/dtrace/Makefile.arch -+++ b/arch/x86/dtrace/Makefile.arch -@@ -7,5 +7,7 @@ DTARCHDIR = ../arch/x86/dtrace - ccflags-y += -I$(srctree)/arch/x86/dtrace/include -Idtrace - - dtrace-obj += dtrace_asm_x86_64.o dtrace_isa_x86_64.o -+sdt-obj += sdt_x86_64.o - - dtrace-y += $(addprefix $(DTARCHDIR)/, $(dtrace-obj)) -+sdt-y += $(addprefix $(DTARCHDIR)/, $(sdt-obj)) -diff --git a/arch/x86/dtrace/sdt_x86_64.c b/arch/x86/dtrace/sdt_x86_64.c -new file mode 100644 -index 000000000000..e686634ff410 ---- /dev/null -+++ b/arch/x86/dtrace/sdt_x86_64.c -@@ -0,0 +1,126 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: sdt_dev.c -+ * DESCRIPTION: DTrace - SDT provider implementation for x86 -+ * -+ * Copyright (c) 2010, 2018, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/sdt.h> -+#include <linux/slab.h> -+#include <linux/vmalloc.h> -+#include <asm/dtrace_util.h> -+ -+#include "dtrace.h" -+#include "dtrace_dev.h" -+#include "sdt_impl.h" -+ -+#define SDT_PATCHVAL 0xf0 -+ -+static uint8_t sdt_invop(struct pt_regs *regs) -+{ -+ struct sdt_probe *sdt = sdt_probetab[SDT_ADDR2NDX(regs->ip)]; -+ -+ for (; sdt != NULL; sdt = sdt->sdp_hashnext) { -+ if ((uintptr_t)sdt->sdp_patchpoint == regs->ip) { -+ if (sdt->sdp_ptype == SDTPT_IS_ENABLED) -+ regs->ax = 1; -+ else { -+ struct pt_regs *old_regs = -+ this_cpu_core->cpu_dtrace_regs; -+ -+ this_cpu_core->cpu_dtrace_regs = regs; -+ -+ dtrace_probe(sdt->sdp_id, regs->di, regs->si, -+ regs->dx, regs->cx, regs->r8, -+ regs->r9, 0); -+ -+ this_cpu_core->cpu_dtrace_regs = old_regs; -+ } -+ -+ return DTRACE_INVOP_NOPS; -+ } -+ } -+ -+ return 0; -+} -+ -+void sdt_provide_probe_arch(struct sdt_probe *sdp, struct module *mp, int idx) -+{ -+ sdp->sdp_patchval = SDT_PATCHVAL; -+ sdp->sdp_savedval = *sdp->sdp_patchpoint; -+} -+ -+int sdt_provide_module_arch(void *arg, struct module *mp) -+{ -+ return 1; -+} -+ -+void sdt_destroy_module(void *arg, struct module *mp) -+{ -+} -+ -+void sdt_enable_arch(struct sdt_probe *sdp, dtrace_id_t id, void *arg) -+{ -+ dtrace_invop_enable(sdp->sdp_patchpoint, sdp->sdp_patchval); -+} -+ -+void sdt_disable_arch(struct sdt_probe *sdp, dtrace_id_t id, void *arg) -+{ -+ dtrace_invop_disable(sdp->sdp_patchpoint, sdp->sdp_savedval); -+} -+ -+uint64_t sdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, -+ int aframes) -+{ -+ struct pt_regs *regs = this_cpu_core->cpu_dtrace_regs; -+ uint64_t *st; -+ uint64_t val; -+ -+ if (regs == NULL) -+ return 0; -+ -+ switch (argno) { -+ case 0: -+ return regs->di; -+ case 1: -+ return regs->si; -+ case 2: -+ return regs->dx; -+ case 3: -+ return regs->cx; -+ case 4: -+ return regs->r8; -+ case 5: -+ return regs->r9; -+ } -+ -+ ASSERT(argno > 5); -+ -+ st = (uint64_t *)regs->sp; -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); -+ val = st[argno - 6]; -+ DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); -+ -+ return val; -+} -+ -+int sdt_dev_init_arch(void) -+{ -+ return dtrace_invop_add(sdt_invop); -+} -+ -+void sdt_dev_exit_arch(void) -+{ -+ dtrace_invop_remove(sdt_invop); -+} -diff --git a/dtrace/Makefile b/dtrace/Makefile -index b91bc69d3802..126d4985967a 100644 ---- a/dtrace/Makefile -+++ b/dtrace/Makefile -@@ -3,6 +3,7 @@ - # - - obj-$(CONFIG_DT_CORE) += dtrace.o -+obj-$(CONFIG_DT_SDT) += sdt.o - obj-$(CONFIG_DT_SYSTRACE) += systrace.o - obj-$(CONFIG_DT_DT_TEST) += dt_test.o - -@@ -15,6 +16,7 @@ dtrace-y := dtrace_mod.o dtrace_dev.o \ - dtrace_probe.o dtrace_probe_ctx.o \ - dtrace_ptofapi.o dtrace_predicate.o \ - dtrace_spec.o dtrace_state.o dtrace_util.o -+sdt-y := sdt_mod.o sdt_dev.o - systrace-y := systrace_mod.o systrace_dev.o - dt_test-y := dt_test_mod.o dt_test_dev.o - -diff --git a/dtrace/dt_test_dev.c b/dtrace/dt_test_dev.c -index 8e1f5bab8a12..b720d8091787 100644 ---- a/dtrace/dt_test_dev.c -+++ b/dtrace/dt_test_dev.c -@@ -131,6 +131,39 @@ static long dt_test_ioctl(struct file *file, - return 0; - } - -+ if (DTRACE_PROBE_ENABLED(sdt__test)) -+ DTRACE_PROBE(sdt__test__is__enabled); -+ -+ DTRACE_PROBE(sdt__test); -+ -+ /* -+ * Test translation-to-nothing. -+ */ -+ DTRACE_PROBE(sdt__test__ioctl__file, int, cmd, int :, 666, -+ char * : (), 0, struct file *, file, int, arg); -+ -+ /* -+ * Probes with every valid count of args. -+ */ -+ DTRACE_PROBE(sdt__test__arg1, int, 1); -+ DTRACE_PROBE(sdt__test__arg2, int, 1, int, 2); -+ DTRACE_PROBE(sdt__test__arg3, int, 1, int, 2, int, 3); -+ DTRACE_PROBE(sdt__test__arg4, int, 1, int, 2, int, 3, int, 4); -+ DTRACE_PROBE(sdt__test__arg5, int, 1, int, 2, int, 3, int, 4, int, 5); -+ DTRACE_PROBE(sdt__test__arg6, int, 1, int, 2, int, 3, int, 4, int, 5, int, 6); -+ DTRACE_PROBE(sdt__test__arg7, int, 1, int, 2, int, 3, int, 4, int, 5, int, 6, int, 7); -+ DTRACE_PROBE(sdt__test__arg8, int, 1, int, 2, int, 3, int, 4, int, 5, int, 6, int, 7, int, 8); -+ DTRACE_PROBE(sdt__test__arg9, int, 1, int, 2, int, 3, int, 4, int, 5, int, 6, int, 7, int, 8, int, 9); -+ DTRACE_PROBE(sdt__test__arga, int, 1, int, 2, int, 3, int, 4, int, 5, int, 6, int, 7, int, 8, int, 9, int, 10); -+ DTRACE_PROBE(sdt__test__argb, int, 1, int, 2, int, 3, int, 4, int, 5, int, 6, int, 7, int, 8, int, 9, int, 10, int, 11); -+ DTRACE_PROBE(sdt__test__argc, int, 1, int, 2, int, 3, int, 4, int, 5, int, 6, int, 7, int, 8, int, 9, int, 10, int, 11, int, 12); -+ DTRACE_PROBE(sdt__test__argd, int, 1, int, 2, int, 3, int, 4, int, 5, int, 6, int, 7, int, 8, int, 9, int, 10, int, 11, int, 12, int, 13); -+ DTRACE_PROBE(sdt__test__arge, int, 1, int, 2, int, 3, int, 4, int, 5, int, 6, int, 7, int, 8, int, 9, int, 10, int, 11, int, 12, int, 13, int, 14); -+ DTRACE_PROBE(sdt__test__argf, int, 1, int, 2, int, 3, int, 4, int, 5, int, 6, int, 7, int, 8, int, 9, int, 10, int, 11, int, 12, int, 13, int, 14, int, 15); -+ DTRACE_PROBE(sdt__test__argg, int, 1, int, 2, int, 3, int, 4, int, 5, int, 6, int, 7, int, 8, int, 9, int, 10, int, 11, int, 12, int, 13, int, 14, int, 15, int, 16); -+ DTRACE_PROBE(sdt__test__argh, int, 1, int, 2, int, 3, int, 4, int, 5, int, 6, int, 7, int, 8, int, 9, int, 10, int, 11, int, 12, int, 13, int, 14, int, 15, int, 16, int, 17); -+ DTRACE_PROBE(sdt__test__argi, int, 1, int, 2, int, 3, int, 4, int, 5, int, 6, int, 7, int, 8, int, 9, int, 10, int, 11, int, 12, int, 13, int, 14, int, 15, int, 16, int, 17, int, 18); -+ - return -EAGAIN; - } - -diff --git a/dtrace/dt_test_mod.c b/dtrace/dt_test_mod.c -index d8af71665a37..a86c8bc02ae9 100644 ---- a/dtrace/dt_test_mod.c -+++ b/dtrace/dt_test_mod.c -@@ -50,3 +50,8 @@ static struct dtrace_pops dt_test_pops = { - }; - - DT_PROVIDER_MODULE(dt_test, DTRACE_PRIV_USER) -+ -+void foo(void) -+{ -+ DTRACE_PROBE(sdt__test2); -+} -diff --git a/dtrace/sdt_dev.c b/dtrace/sdt_dev.c -new file mode 100644 -index 000000000000..78457dad8773 ---- /dev/null -+++ b/dtrace/sdt_dev.c -@@ -0,0 +1,562 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: sdt_dev.c -+ * DESCRIPTION: DTrace - SDT provider device driver -+ * -+ * Copyright (c) 2010, 2018, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/ctype.h> -+#include <linux/fs.h> -+#include <linux/miscdevice.h> -+#include <linux/sdt.h> -+#include <linux/slab.h> -+#include <linux/uaccess.h> -+#include <linux/vmalloc.h> -+ -+#include "dtrace.h" -+#include "dtrace_dev.h" -+#include "sdt_impl.h" -+ -+#define SDT_PROBETAB_SIZE 0x1000 /* 4k entries -- 16K total */ -+ -+struct sdt_probe **sdt_probetab; -+int sdt_probetab_size; -+int sdt_probetab_mask; -+ -+/* -+ * Return, in newly-allocated space, a version of the passed-in type 'vartype' -+ * which has been cleaned up suitably for CTF: leading and trailing spaces (if -+ * any) removed, and optionally a trailing argument removed as well. -+ * -+ * Type strings look like either -+ * -+ * type (for SDT, as in function prototypes), or -+ * -+ * type argname (for perf: as in function declarations). -+ * -+ * Translator components ": (foo, foo)", if any, have been removed by this -+ * stage. -+ */ -+static char *cleanup_type(const char *vartype, int arg_strip) -+{ -+ const char *cleaned; -+ const char *p; -+ -+ cleaned = vartype + strspn(vartype, " \t"); -+ for (p = cleaned + strlen(cleaned) - 1; p > cleaned && isspace(*p); -+ p--); -+ if (arg_strip) { -+ for (; p > cleaned && (isalnum(*p) || *p == '_'); p--); -+ for (; p > cleaned && isspace(*p); p--); -+ } -+ p++; -+ -+ return kstrndup(cleaned, p - cleaned, GFP_KERNEL); -+} -+ -+/* -+ * Set up the args lists, extracting them from their sdpd entry and parsing them -+ * into an sdt_argdesc array for each probe. -+ */ -+static struct sdt_argdesc * -+sdt_setup_args(struct sdt_probedesc *sdpd, -+ size_t *sdp_nargdesc) -+{ -+ struct sdt_argdesc *args; -+ char *argstr; -+ char *p; -+ int arg_strip = 0; -+ char *next_arg = NULL; -+ size_t arg = 0, sarg = 0, i; -+ -+ *sdp_nargdesc = 0; -+ -+ if ((sdpd->sdpd_args == NULL) || (sdpd->sdpd_args[0] == '\0')) -+ return NULL; -+ -+ /* -+ * Take a copy of the string so we can mutate it without causing trouble -+ * on module reload. -+ */ -+ argstr = kstrdup(sdpd->sdpd_args, GFP_KERNEL); -+ if (argstr == NULL) -+ goto oom; -+ -+ /* -+ * Handle the common case of a trailing comma before we allocate space, -+ * and elide it. -+ */ -+ p = argstr + strlen(argstr) - 1; -+ if (p[0] == ',' && p[1] == '\0') -+ *p = '\0'; -+ -+ /* -+ * This works for counting the number of args even though translator -+ * strings can contain commas, because each comma denotes a new probe -+ * argument. It may overcount in the case of elided arguments -+ * ("foo : ,"): we compensate for that further down, and ignore the tiny -+ * memory leak that results. -+ */ -+ for (p = argstr; p != NULL; p = strchr(p + 1, ',')) -+ (*sdp_nargdesc)++; -+ -+ args = kzalloc(*sdp_nargdesc * sizeof(struct sdt_argdesc), -+ GFP_KERNEL); -+ if (args == NULL) -+ goto oom_argstr; -+ -+ /* -+ * We need to transform each arg (stripping off a terminal argument -+ * name) if this is a perf probe. -+ */ -+ if (strncmp(sdpd->sdpd_name, "__perf_", strlen("__perf_")) == 0) -+ arg_strip = 1; -+ -+ next_arg = argstr; -+ do { -+ char *tok; -+ char *xlator = NULL, *p; -+ char *native; -+ int parens = 0; -+ int empty_xlation; -+ -+ /* -+ * Find the end of this arg, and figure out if it has any -+ * translators. Clean up the type of the arg (or native type, -+ * if this is a translated type). -+ */ -+ tok = next_arg; -+ next_arg = NULL; -+ p = strpbrk(tok, "():,"); -+ while (p && !next_arg) { -+ switch (*p) { -+ case '(': -+ parens++; -+ break; -+ case ')': -+ if (parens > 0) -+ parens--; -+ break; -+ case ':': -+ *p = '\0'; -+ xlator = p + 1; -+ break; -+ case ',': -+ if (parens == 0) { -+ *p = '\0'; -+ next_arg = p + 1; -+ } -+ break; -+ } -+ p = strpbrk(p + 1, "():,"); -+ } -+ -+ native = cleanup_type(tok, arg_strip); -+ if (native == NULL) { -+ args[arg].sda_native = args[arg].sda_xlate = NULL; -+ goto full_oom; -+ } -+ -+ /* -+ * Special case: perf's DECLARE_TRACE_NOARGS passes a single arg -+ * 'void'. Spot and skip it. -+ */ -+ if (!xlator && arg_strip && strcmp(native, "void") == 0) { -+ kfree(native); -+ (*sdp_nargdesc)--; -+ sarg++; -+ continue; -+ } -+ -+ /* -+ * No translator: straight mapping. -+ */ -+ if (xlator == NULL) { -+ ASSERT(arg < *sdp_nargdesc); -+ args[arg].sda_mapping = sarg; -+ args[arg].sda_native = native; -+ args[arg].sda_xlate = NULL; -+ arg++; -+ sarg++; -+ continue; -+ } -+ -+ /* -+ * If this is a perf probe, warn: translations cannot exist for -+ * these, and have no defined format yet in any case. We can -+ * struggle on by assuming they look like SDT translations. -+ */ -+ if (arg_strip) -+ pr_warn("Perf probe %s has at least one SDT translation, " -+ "which should be impossible.", sdpd->sdpd_name); -+ -+ /* -+ * Zero or more translations. (If there are zero, i.e. a pair -+ * of empty parentheses or a colon with nothing after it, we -+ * have to decrement the nargdesc.) -+ */ -+ -+ empty_xlation = 1; -+ while ((p = strsep(&xlator, "(,)")) != NULL) { -+ /* -+ * Skip the empty space before the ( or after the ). -+ */ -+ if (strspn(p, " \t") == strlen(p)) -+ continue; -+ -+ ASSERT(arg < *sdp_nargdesc); -+ -+ empty_xlation = 0; -+ args[arg].sda_mapping = sarg; -+ args[arg].sda_native = kstrdup(native, GFP_KERNEL); -+ args[arg].sda_xlate = cleanup_type(p, 0); -+ if ((args[arg].sda_native == NULL) || -+ (args[arg].sda_xlate == NULL)) { -+ pr_warn("Unable to create argdesc list for " -+ "probe %s: out of memory\n", -+ sdpd->sdpd_name); -+ kfree(native); -+ goto full_oom; -+ } -+ arg++; -+ } -+ if (empty_xlation) -+ (*sdp_nargdesc)--; -+ -+ kfree(native); -+ sarg++; -+ } while (next_arg != NULL); -+ -+ kfree(argstr); -+ return args; -+ -+full_oom: -+ for (i = 0; i < arg; i++) { -+ kfree(args[i].sda_native); -+ kfree(args[i].sda_xlate); -+ } -+ kfree(args); -+oom_argstr: -+ kfree(argstr); -+oom: -+ *sdp_nargdesc = 0; -+ pr_warn("Unable to create argdesc list for probe %s: " -+ "out of memory\n", sdpd->sdpd_name); -+ return NULL; -+} -+ -+void sdt_provide_module(void *arg, struct module *mp) -+{ -+ char *modname = mp->name; -+ struct dtrace_mprovider *prov; -+ struct sdt_probedesc *sdpd; -+ struct sdt_probe *sdp, *prv; -+ int idx, len; -+ int probes_skipped = 0; -+ -+ /* If module setup has failed then do not provide anything. */ -+ if (PDATA(mp) == NULL) -+ return; -+ -+ /* -+ * Nothing to do if the module SDT probes were already created. -+ */ -+ if (PDATA(mp)->sdt_probe_cnt != 0) -+ return; -+ -+ /* -+ * Nothing to do if there are no SDT probes. -+ */ -+ if (mp->sdt_probec == 0) -+ return; -+ -+ /* -+ * Nothing if arch specific module setup fails. -+ */ -+ if (!sdt_provide_module_arch(NULL, mp)) -+ return; -+ -+ /* -+ * Do not provide any probes unless all SDT providers have been created -+ * for this meta-provider. -+ */ -+ for (prov = sdt_providers; prov->dtmp_name != NULL; prov++) { -+ if (prov->dtmp_id == DTRACE_PROVNONE) -+ return; -+ } -+ -+ for (idx = 0, sdpd = mp->sdt_probes; idx < mp->sdt_probec; -+ idx++, sdpd++) { -+ char *name = sdpd->sdpd_name, *nname; -+ int i, j; -+ struct dtrace_mprovider *prov; -+ dtrace_id_t id; -+ enum fasttrap_probe_type ptype; -+ -+ if (name[0] == '?') { -+ ptype = SDTPT_IS_ENABLED; -+ name++; -+ } else -+ ptype = SDTPT_OFFSETS; -+ -+ for (prov = sdt_providers; prov->dtmp_pref != NULL; prov++) { -+ char *prefix = prov->dtmp_pref; -+ int len = strlen(prefix); -+ -+ if (strncmp(name, prefix, len) == 0) { -+ name += len; -+ break; -+ } -+ } -+ -+ nname = kmalloc(len = strlen(name) + 1, GFP_KERNEL); -+ if (nname == NULL) { -+ probes_skipped++; -+ continue; -+ } -+ -+ for (i = j = 0; name[j] != '\0'; i++) { -+ if (name[j] == '_' && name[j + 1] == '_') { -+ nname[i] = '-'; -+ j += 2; -+ } else -+ nname[i] = name[j++]; -+ } -+ -+ nname[i] = '\0'; -+ -+ sdp = kzalloc(sizeof(struct sdt_probe), GFP_KERNEL); -+ if (sdp == NULL) { -+ probes_skipped++; -+ continue; -+ } -+ -+ sdp->sdp_loadcnt = 1; /* FIXME */ -+ sdp->sdp_module = mp; -+ sdp->sdp_name = nname; -+ sdp->sdp_namelen = len; -+ sdp->sdp_provider = prov; -+ sdp->sdp_ptype = ptype; -+ -+ sdp->sdp_argdesc = sdt_setup_args(sdpd, &sdp->sdp_nargdesc); -+ -+ id = dtrace_probe_lookup(prov->dtmp_id, modname, -+ sdpd->sdpd_func, nname); -+ if (id != DTRACE_IDNONE) { -+ prv = dtrace_probe_arg(prov->dtmp_id, id); -+ ASSERT(prv != NULL); -+ -+ sdp->sdp_next = prv->sdp_next; -+ sdp->sdp_id = id; -+ prv->sdp_next = sdp; -+ } else { -+ sdp->sdp_id = dtrace_probe_create(prov->dtmp_id, -+ modname, -+ sdpd->sdpd_func, -+ nname, SDT_AFRAMES, -+ sdp); -+ -+ /* -+ * If we failed to create the probe just skip it. -+ */ -+ if (sdp->sdp_id == DTRACE_IDNONE) { -+ kfree(sdp); -+ probes_skipped++; -+ continue; -+ } -+ -+ PDATA(mp)->sdt_probe_cnt++; -+ } -+ -+ sdp->sdp_patchpoint = (asm_instr_t *)sdpd->sdpd_offset; -+ -+ sdt_provide_probe_arch(sdp, mp, idx); -+ -+ sdp->sdp_hashnext = sdt_probetab[ -+ SDT_ADDR2NDX(sdp->sdp_patchpoint)]; -+ sdt_probetab[SDT_ADDR2NDX(sdp->sdp_patchpoint)] = sdp; -+ } -+ -+ if (probes_skipped != 0) -+ pr_warn("sdt: Failed to provide %d probes in %s (out of memory)\n", -+ probes_skipped, mp->name); -+} -+ -+int sdt_enable(void *arg, dtrace_id_t id, void *parg) -+{ -+ struct sdt_probe *sdp = parg; -+ struct sdt_probe *curr; -+ -+ /* -+ * Ensure that we have a reference to the module. -+ */ -+ if (!try_module_get(sdp->sdp_module)) -+ return -EAGAIN; -+ -+ /* -+ * If at least one other enabled probe exists for this module, drop the -+ * reference we took above, because we only need one to prevent the -+ * module from being unloaded. -+ */ -+ PDATA(sdp->sdp_module)->enabled_cnt++; -+ if (PDATA(sdp->sdp_module)->enabled_cnt > 1) -+ module_put(sdp->sdp_module); -+ -+ for (curr = sdp; curr != NULL; curr = curr->sdp_next) -+ sdt_enable_arch(curr, id, arg); -+ -+ return 0; -+} -+ -+void sdt_disable(void *arg, dtrace_id_t id, void *parg) -+{ -+ struct sdt_probe *sdp = parg; -+ struct sdt_probe *curr; -+ -+ for (curr = sdp; curr != NULL; curr = curr->sdp_next) -+ sdt_disable_arch(curr, id, arg); -+ -+ /* -+ * If we are disabling a probe, we know it was enabled, and therefore -+ * we know that we have a reference on the module to prevent it from -+ * being unloaded. If we disable the last probe on the module, we can -+ * drop the reference. -+ */ -+ PDATA(sdp->sdp_module)->enabled_cnt--; -+ if (PDATA(sdp->sdp_module)->enabled_cnt == 0) -+ module_put(sdp->sdp_module); -+} -+ -+void sdt_getargdesc(void *arg, dtrace_id_t id, void *parg, -+ struct dtrace_argdesc *desc) -+{ -+ struct sdt_probe *sdp = parg; -+ -+ desc->dtargd_native[0] = '\0'; -+ desc->dtargd_xlate[0] = '\0'; -+ -+ while ((sdp->sdp_ptype == SDTPT_IS_ENABLED) && -+ (sdp->sdp_next != NULL)) -+ sdp = sdp->sdp_next; -+ -+ if (sdp->sdp_nargdesc <= desc->dtargd_ndx) { -+ desc->dtargd_ndx = DTRACE_ARGNONE; -+ return; -+ } -+ -+ if (sdp->sdp_argdesc[desc->dtargd_ndx].sda_native != NULL) -+ strlcpy(desc->dtargd_native, -+ sdp->sdp_argdesc[desc->dtargd_ndx].sda_native, -+ sizeof(desc->dtargd_native)); -+ -+ if (sdp->sdp_argdesc[desc->dtargd_ndx].sda_xlate != NULL) -+ strlcpy(desc->dtargd_xlate, -+ sdp->sdp_argdesc[desc->dtargd_ndx].sda_xlate, -+ sizeof(desc->dtargd_xlate)); -+ -+ desc->dtargd_mapping = sdp->sdp_argdesc[desc->dtargd_ndx].sda_mapping; -+} -+ -+void sdt_destroy(void *arg, dtrace_id_t id, void *parg) -+{ -+ struct sdt_probe *sdp = parg; -+ -+ PDATA(sdp->sdp_module)->sdt_probe_cnt--; -+ -+ while (sdp != NULL) { -+ struct sdt_probe *old = sdp, *last, *hash; -+ int ndx; -+ size_t i; -+ -+ ndx = SDT_ADDR2NDX(sdp->sdp_patchpoint); -+ last = NULL; -+ hash = sdt_probetab[ndx]; -+ -+ while (hash != sdp) { -+ ASSERT(hash != NULL); -+ last = hash; -+ hash = hash->sdp_hashnext; -+ } -+ -+ if (last != NULL) -+ last->sdp_hashnext = sdp->sdp_hashnext; -+ else -+ sdt_probetab[ndx] = sdp->sdp_hashnext; -+ -+ for (i = 0; i < sdp->sdp_nargdesc; i++) { -+ kfree(sdp->sdp_argdesc[i].sda_native); -+ kfree(sdp->sdp_argdesc[i].sda_xlate); -+ } -+ kfree(sdp->sdp_argdesc); -+ kfree(sdp->sdp_name); -+ sdp = sdp->sdp_next; -+ kfree(old); -+ } -+} -+ -+static int sdt_open(struct inode *inode, struct file *file) -+{ -+ return -EAGAIN; -+} -+ -+static int sdt_close(struct inode *inode, struct file *file) -+{ -+ return 0; -+} -+ -+static const struct file_operations sdt_fops = { -+ .owner = THIS_MODULE, -+ .open = sdt_open, -+ .release = sdt_close, -+}; -+ -+static struct miscdevice sdt_dev = { -+ .minor = DT_DEV_SDT_MINOR, -+ .name = "sdt", -+ .nodename = "dtrace/provider/sdt", -+ .fops = &sdt_fops, -+}; -+ -+int sdt_dev_init(void) -+{ -+ int ret = 0; -+ -+ ret = misc_register(&sdt_dev); -+ if (ret) { -+ pr_err("%s: Can't register misc device %d\n", -+ sdt_dev.name, sdt_dev.minor); -+ return ret; -+ } -+ -+ if (sdt_probetab_size == 0) -+ sdt_probetab_size = SDT_PROBETAB_SIZE; -+ -+ sdt_probetab_mask = sdt_probetab_size - 1; -+ sdt_probetab = vzalloc(sdt_probetab_size * sizeof(struct sdt_probe *)); -+ if (sdt_probetab == NULL) -+ return -ENOMEM; -+ -+ sdt_dev_init_arch(); -+ -+ return ret; -+} -+ -+void sdt_dev_exit(void) -+{ -+ sdt_dev_exit_arch(); -+ -+ vfree(sdt_probetab); -+ -+ misc_deregister(&sdt_dev); -+} -diff --git a/dtrace/sdt_impl.h b/dtrace/sdt_impl.h -new file mode 100644 -index 000000000000..e5b7f4f80270 ---- /dev/null -+++ b/dtrace/sdt_impl.h -@@ -0,0 +1,87 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Dynamic Tracing for Linux - Statically Defined Tracing provider -+ * -+ * Copyright (c) 2010, 2017, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#ifndef _SDT_IMPL_H_ -+#define _SDT_IMPL_H_ -+ -+#include <linux/sdt.h> -+#include <asm/dtrace_arch.h> -+#include <dtrace/sdt_arch.h> -+ -+extern struct module *dtrace_kmod; -+ -+struct sdt_argdesc; -+ -+enum fasttrap_probe_type { -+ SDTPT_NONE = 0, -+ SDTPT_OFFSETS, -+ SDTPT_IS_ENABLED -+}; -+ -+struct sdt_probe { -+ struct dtrace_mprovider *sdp_provider; /* provider */ -+ char *sdp_name; /* name of probe */ -+ int sdp_namelen; /* length of allocated name */ -+ dtrace_id_t sdp_id; /* probe ID */ -+ struct module *sdp_module; /* modctl for module */ -+ int sdp_loadcnt; /* load count for module */ -+ int sdp_primary; /* non-zero if primary mod */ -+ enum fasttrap_probe_type sdp_ptype; /* probe type */ -+ asm_instr_t *sdp_patchpoint;/* patch point */ -+ asm_instr_t sdp_patchval; /* instruction to patch */ -+ asm_instr_t sdp_savedval; /* saved instruction value */ -+ struct sdt_argdesc *sdp_argdesc; /* arguments for this probe */ -+ size_t sdp_nargdesc; /* number of arguments */ -+ struct sdt_probe *sdp_next; /* next probe */ -+ struct sdt_probe *sdp_hashnext; /* next on hash */ -+}; -+ -+struct sdt_argdesc { -+ int sda_mapping; -+ char *sda_native; -+ char *sda_xlate; -+}; -+ -+extern struct dtrace_mprovider sdt_providers[]; -+extern struct sdt_probe **sdt_probetab; -+extern int sdt_probetab_size; -+extern int sdt_probetab_mask; -+ -+#define SDT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & \ -+ sdt_probetab_mask) -+ -+extern void sdt_provide_probe_arch(struct sdt_probe *, struct module *, int); -+extern int sdt_provide_module_arch(void *, struct module *); -+extern void sdt_enable_arch(struct sdt_probe *, dtrace_id_t, void *); -+extern void sdt_disable_arch(struct sdt_probe *, dtrace_id_t, void *); -+ -+extern void sdt_provide_module(void *, struct module *); -+extern void sdt_destroy_module(void *, struct module *); -+extern int sdt_enable(void *, dtrace_id_t, void *); -+extern void sdt_disable(void *, dtrace_id_t, void *); -+extern void sdt_getargdesc(void *, dtrace_id_t, void *, -+ struct dtrace_argdesc *); -+extern uint64_t sdt_getarg(void *, dtrace_id_t, void *, int, int); -+extern void sdt_destroy(void *, dtrace_id_t, void *); -+ -+extern int sdt_dev_init(void); -+extern void sdt_dev_exit(void); -+ -+extern int sdt_dev_init_arch(void); -+extern void sdt_dev_exit_arch(void); -+ -+#endif /* _SDT_IMPL_H_ */ -diff --git a/dtrace/sdt_mod.c b/dtrace/sdt_mod.c -new file mode 100644 -index 000000000000..1de9e72396aa ---- /dev/null -+++ b/dtrace/sdt_mod.c -@@ -0,0 +1,154 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: sdt_mod.c -+ * DESCRIPTION: DTrace - SDT provider kernel module -+ * -+ * Copyright (c) 2010, 2017, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/module.h> -+ -+#include "dtrace.h" -+#include "dtrace_dev.h" -+#include "sdt_impl.h" -+ -+MODULE_AUTHOR("Kris Van Hees (kris.van.hees@oracle.com)"); -+MODULE_DESCRIPTION("Profile Interrupt Tracing"); -+MODULE_VERSION("v0.1"); -+MODULE_LICENSE("GPL"); -+ -+static struct dtrace_pattr vtrace_attr = { -+{ DTRACE_STABILITY_UNSTABLE, DTRACE_STABILITY_UNSTABLE, DTRACE_CLASS_ISA }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, -+{ DTRACE_STABILITY_UNSTABLE, DTRACE_STABILITY_UNSTABLE, DTRACE_CLASS_ISA }, -+}; -+ -+static struct dtrace_pattr info_attr = { -+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, -+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, -+}; -+ -+static struct dtrace_pattr fc_attr = { -+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, -+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, -+}; -+ -+static struct dtrace_pattr fpu_attr = { -+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, -+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_CPU }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, -+}; -+ -+static struct dtrace_pattr fsinfo_attr = { -+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, -+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, -+}; -+ -+static struct dtrace_pattr stab_attr = { -+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, -+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, -+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, -+}; -+ -+static struct dtrace_pattr sdt_attr = { -+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, -+}; -+ -+static struct dtrace_pattr xpv_attr = { -+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_PLATFORM }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_PLATFORM }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_PLATFORM }, -+}; -+ -+static struct dtrace_pattr iscsi_attr = { -+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, -+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, -+}; -+ -+static struct dtrace_pattr perf_attr = { -+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, -+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, -+}; -+ -+static struct dtrace_pops sdt_pops = { -+ .dtps_provide = NULL, -+ .dtps_provide_module = sdt_provide_module, -+ .dtps_destroy_module = sdt_destroy_module, -+ .dtps_enable = sdt_enable, -+ .dtps_disable = sdt_disable, -+ .dtps_suspend = NULL, -+ .dtps_resume = NULL, -+ .dtps_getargdesc = sdt_getargdesc, -+#ifdef CONFIG_SPARC64 -+ .dtps_getargval = NULL, -+#else -+ .dtps_getargval = sdt_getarg, -+#endif -+ .dtps_usermode = NULL, -+ .dtps_destroy = sdt_destroy, -+}; -+ -+struct dtrace_mprovider sdt_providers[] = { -+ { "vtrace", "__vtrace_", &vtrace_attr, DTRACE_PRIV_KERNEL, &sdt_pops, 0 }, -+ { "sysinfo", "__cpu_sysinfo_", &info_attr, DTRACE_PRIV_KERNEL, &sdt_pops, 0 }, -+ { "vminfo", "__cpu_vminfo_", &info_attr, DTRACE_PRIV_KERNEL, &sdt_pops, 0 }, -+ { "fpuinfo", "__fpuinfo_", &fpu_attr, DTRACE_PRIV_KERNEL, &sdt_pops, 0 }, -+ { "sched", "__sched_", &stab_attr, DTRACE_PRIV_KERNEL, &sdt_pops, 0 }, -+ { "proc", "__proc_", &stab_attr, DTRACE_PRIV_KERNEL, &sdt_pops, 0 }, -+ { "io", "__io_", &stab_attr, DTRACE_PRIV_KERNEL, &sdt_pops, 0 }, -+ { "ip", "__ip_", &stab_attr, DTRACE_PRIV_KERNEL, &sdt_pops, 0 }, -+ { "lockstat", "__lockstat_", &stab_attr, DTRACE_PRIV_KERNEL, &sdt_pops, 0 }, -+ { "tcp", "__tcp_", &stab_attr, DTRACE_PRIV_KERNEL, &sdt_pops, 0 }, -+ { "udp", "__udp_", &stab_attr, DTRACE_PRIV_KERNEL, &sdt_pops, 0 }, -+ { "mib", "__mib_", &stab_attr, DTRACE_PRIV_KERNEL, &sdt_pops, 0 }, -+ { "fsinfo", "__fsinfo_", &fsinfo_attr, DTRACE_PRIV_KERNEL, &sdt_pops, 0 }, -+ { "iscsi", "__iscsi_", &iscsi_attr, DTRACE_PRIV_KERNEL, &sdt_pops, 0 }, -+ { "nfsv3", "__nfsv3_", &stab_attr, DTRACE_PRIV_KERNEL, &sdt_pops, 0 }, -+ { "nfsv4", "__nfsv4_", &stab_attr, DTRACE_PRIV_KERNEL, &sdt_pops, 0 }, -+ { "xpv", "__xpv_", &xpv_attr, DTRACE_PRIV_KERNEL, &sdt_pops, 0 }, -+ { "fc", "__fc_", &fc_attr, DTRACE_PRIV_KERNEL, &sdt_pops, 0 }, -+ { "srp", "__srp_", &fc_attr, DTRACE_PRIV_KERNEL, &sdt_pops, 0 }, -+ { "sysevent", "__sysevent_", &stab_attr, DTRACE_PRIV_KERNEL, &sdt_pops, 0 }, -+ { "perf", "__perf_", &perf_attr, DTRACE_PRIV_KERNEL, &sdt_pops, 0 }, -+ { "sdt", NULL, &sdt_attr, DTRACE_PRIV_KERNEL, &sdt_pops, 0 }, -+ { NULL } -+}; -+ -+DT_MULTI_PROVIDER_MODULE(sdt, sdt_providers) --- -2.27.0 - diff --git a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0010-dtrace-profile-provider-and-test-probe-core-componen.patch b/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0010-dtrace-profile-provider-and-test-probe-core-componen.patch deleted file mode 100644 index f587bc7d1662..000000000000 --- a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0010-dtrace-profile-provider-and-test-probe-core-componen.patch +++ /dev/null @@ -1,50 +0,0 @@ -From d0c52214b02345870408364f3eef0b97a837124c Mon Sep 17 00:00:00 2001 -From: Kris Van Hees <kris.van.hees@oracle.com> -Date: Mon, 19 Nov 2018 17:18:23 +0000 -Subject: [PATCH 10/20] dtrace: profile provider and test probe core components - -Only Kconfig changes are needed here: everything else is purely modular. - -Signed-off-by: Nick Alcock <nick.alcock@oracle.com> -Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com> -Signed-off-by: Tomas Jedlicka <tomas.jedlicka@oracle.com> -Signed-off-by: Eugene Loh <eugene.loh@oracle.com> -Signed-off-by: David Mc Lean <david.mclean@oracle.com> -Signed-off-by: Vincent Lim <vincent.lim@oracle.com> ---- - kernel/dtrace/Kconfig | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/kernel/dtrace/Kconfig b/kernel/dtrace/Kconfig -index c1ec55d8750e..7b88206fe835 100644 ---- a/kernel/dtrace/Kconfig -+++ b/kernel/dtrace/Kconfig -@@ -23,6 +23,12 @@ config DT_CORE - - if DT_CORE - -+config DT_PROFILE -+ tristate "Profile Interrupt Tracing" -+ default m -+ help -+ The profile and tick providers, firing probes at specific intervals. -+ - config DT_SDT - tristate "Statically Defined Tracing" - default m -@@ -54,6 +60,12 @@ config DT_DT_TEST - help - A test provider used by the testsuite. - -+config DT_DT_PERF -+ tristate "DTrace Performance Test Probe" -+ default m -+ help -+ A test provider used for performance testing. -+ - config DT_DEBUG - bool "DTrace debugging" - default m --- -2.27.0 - diff --git a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0011-dtrace-profile-and-tick-providers-built-on-cyclics.patch b/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0011-dtrace-profile-and-tick-providers-built-on-cyclics.patch deleted file mode 100644 index d79aa679f4a2..000000000000 --- a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0011-dtrace-profile-and-tick-providers-built-on-cyclics.patch +++ /dev/null @@ -1,641 +0,0 @@ -From 775c86a5adc60a7ca1771cedb966c1d726f309b9 Mon Sep 17 00:00:00 2001 -From: Kris Van Hees <kris.van.hees@oracle.com> -Date: Mon, 19 Nov 2018 18:05:13 +0000 -Subject: [PATCH 11/20] dtrace: profile and tick providers built on cyclics - -Probes are constructed dynamically as called upon by the user: some -default commonly-used probes for common timing frequencies are provided -whether or not called upon. - -Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com> -Signed-off-by: Nick Alcock <nick.alcock@oracle.com> -Signed-off-by: Tomas Jedlicka <tomas.jedlicka@oracle.com> -Signed-off-by: Eugene Loh <eugene.loh@oracle.com> -Signed-off-by: David Mc Lean <david.mclean@oracle.com> -Signed-off-by: Vincent Lim <vincent.lim@oracle.com> ---- - dtrace/Makefile | 2 + - dtrace/profile.h | 32 +++ - dtrace/profile_dev.c | 491 +++++++++++++++++++++++++++++++++++++++++++ - dtrace/profile_mod.c | 52 +++++ - 4 files changed, 577 insertions(+) - create mode 100644 dtrace/profile.h - create mode 100644 dtrace/profile_dev.c - create mode 100644 dtrace/profile_mod.c - -diff --git a/dtrace/Makefile b/dtrace/Makefile -index 126d4985967a..5e6fb362a4e9 100644 ---- a/dtrace/Makefile -+++ b/dtrace/Makefile -@@ -3,6 +3,7 @@ - # - - obj-$(CONFIG_DT_CORE) += dtrace.o -+obj-$(CONFIG_DT_PROFILE) += profile.o - obj-$(CONFIG_DT_SDT) += sdt.o - obj-$(CONFIG_DT_SYSTRACE) += systrace.o - obj-$(CONFIG_DT_DT_TEST) += dt_test.o -@@ -16,6 +17,7 @@ dtrace-y := dtrace_mod.o dtrace_dev.o \ - dtrace_probe.o dtrace_probe_ctx.o \ - dtrace_ptofapi.o dtrace_predicate.o \ - dtrace_spec.o dtrace_state.o dtrace_util.o -+profile-y := profile_mod.o profile_dev.o - sdt-y := sdt_mod.o sdt_dev.o - systrace-y := systrace_mod.o systrace_dev.o - dt_test-y := dt_test_mod.o dt_test_dev.o -diff --git a/dtrace/profile.h b/dtrace/profile.h -new file mode 100644 -index 000000000000..713886d1d8e8 ---- /dev/null -+++ b/dtrace/profile.h -@@ -0,0 +1,32 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Dynamic Tracing for Linux - profile provider -+ * -+ * Copyright (c) 2010, 2017, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#ifndef _PROFILE_H_ -+#define _PROFILE_H_ -+ -+extern void profile_provide(void *, const struct dtrace_probedesc *); -+extern int profile_enable(void *, dtrace_id_t, void *); -+extern void profile_disable(void *, dtrace_id_t, void *); -+extern int profile_usermode(void *, dtrace_id_t, void *); -+extern void profile_destroy(void *, dtrace_id_t, void *); -+ -+extern dtrace_provider_id_t profile_id; -+ -+extern int profile_dev_init(void); -+extern void profile_dev_exit(void); -+ -+#endif /* _PROFILE_H_ */ -diff --git a/dtrace/profile_dev.c b/dtrace/profile_dev.c -new file mode 100644 -index 000000000000..ce7261fcc911 ---- /dev/null -+++ b/dtrace/profile_dev.c -@@ -0,0 +1,491 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: profile_dev.c -+ * DESCRIPTION: DTrace - profile provider device driver -+ * -+ * Copyright (c) 2010, 2018, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/fs.h> -+#include <linux/ktime.h> -+#include <linux/miscdevice.h> -+#include <linux/slab.h> -+#include <asm/irq_regs.h> -+#include <asm/ptrace.h> -+ -+#include <linux/hardirq.h> -+#include <linux/profile.h> -+ -+#include "dtrace.h" -+#include "dtrace_dev.h" -+#include "profile.h" -+ -+#define PROF_NAMELEN 15 -+#define PROF_PROFILE 0 -+#define PROF_TICK 1 -+#define PROF_PREFIX_PROFILE "profile-" -+#define PROF_PREFIX_TICK "tick-" -+ -+struct profile_probe { -+ char prof_name[PROF_NAMELEN]; -+ dtrace_id_t prof_id; -+ int prof_kind; -+ ktime_t prof_interval; -+ cyclic_id_t prof_cyclic; -+}; -+ -+struct profile_probe_percpu { -+ ktime_t profc_expected; -+ ktime_t profc_interval; -+ struct profile_probe *profc_probe; -+}; -+ -+static ktime_t profile_interval_min = KTIME_INIT(0, NANOSEC / 5000); -+static int profile_aframes; -+ -+static int profile_rates[] = { -+ 97, 199, 499, 997, 1999, -+ 4001, 4999, 0, 0, 0, -+ 0, 0, 0, 0, 0, -+ 0, 0, 0, 0, 0, -+ }; -+static int profile_ticks[] = { -+ 1, 10, 100, 500, 1000, -+ 5000, 0, 0, 0, 0, -+ 0, 0, 0, 0, 0, -+ }; -+ -+/* -+ * profile_max defines the upper bound on the number of profile probes that -+ * can exist (this is to prevent malicious or clumsy users from exhausing -+ * system resources by creating a slew of profile probes). At mod load time, -+ * this gets its value from PROFILE_MAX_DEFAULT or profile-max-probes if it's -+ * present as module parameter. -+ * FIXME: module parameter yet to be implemented. -+ */ -+#define PROFILE_MAX_DEFAULT 1000 /* default max. number of probes */ -+ -+static int profile_max; /* maximum number of profile probes */ -+static atomic_t profile_total; /* current number of profile probes */ -+ -+static void profile_tick_fn(uintptr_t arg) -+{ -+ struct profile_probe *prof = (struct profile_probe *)arg; -+ unsigned long pc = 0, upc = 0; -+ struct pt_regs *regs = get_irq_regs(); -+ -+ /* -+ * If regs == NULL, then we were called from from softirq context which -+ * also means that we didn't actually interrupt any processing (kernel -+ * or user space). -+ * If regs != NULL, then we did actually get called from hardirq -+ * because the timer interrupt did really interrupt something that was -+ * going on on the CPU (could be user mode or kernel mode). -+ */ -+ if (regs == NULL) { -+ uint64_t stack[8]; -+ -+ dtrace_getpcstack(stack, 8, 0, NULL); -+ pc = stack[7]; -+ } else if (user_mode(regs)) -+ upc = instruction_pointer(regs); -+ else -+ pc = instruction_pointer(regs); -+ -+ dtrace_probe(prof->prof_id, pc, upc, 0, 0, 0, 0, 0); -+} -+ -+static void profile_prof_fn(uintptr_t arg) -+{ -+ struct profile_probe_percpu *pcpu = (struct profile_probe_percpu *)arg; -+ struct profile_probe *prof = pcpu->profc_probe; -+ ktime_t late; -+ struct pt_regs *regs = get_irq_regs(); -+ unsigned long pc = 0, upc = 0; -+ -+ late = ktime_sub(dtrace_gethrtime(), pcpu->profc_expected); -+ pcpu->profc_expected = ktime_add(pcpu->profc_expected, -+ pcpu->profc_interval); -+ -+ /* -+ * If regs == NULL, then we were called from from softirq context which -+ * also means that we didn't actually interrupt any processing (kernel -+ * or user space). -+ * If regs != NULL, then we did actually get called from hardirq -+ * because the timer interrupt did really interrupt something that was -+ * going on on the CPU (could be user mode or kernel mode). -+ */ -+ if (regs == NULL) { -+ uint64_t stack[8]; -+ -+ dtrace_getpcstack(stack, 8, 0, NULL); -+ pc = stack[7]; -+ } else if (user_mode(regs)) -+ upc = instruction_pointer(regs); -+ else -+ pc = instruction_pointer(regs); -+ -+ dtrace_probe(prof->prof_id, pc, upc, ktime_to_ns(late), 0, 0, 0, 0); -+} -+ -+static void profile_online(void *arg, processorid_t cpu, -+ struct cyc_handler *hdlr, -+ struct cyc_time *when) -+{ -+ struct profile_probe *prof = arg; -+ struct profile_probe_percpu *pcpu; -+ -+ pcpu = kzalloc(sizeof(struct profile_probe_percpu), GFP_KERNEL); -+ pcpu->profc_probe = prof; -+ -+ hdlr->cyh_func = profile_prof_fn; -+ hdlr->cyh_arg = (uintptr_t)pcpu; -+ hdlr->cyh_level = CY_HIGH_LEVEL; -+ -+ when->cyt_interval = prof->prof_interval; -+ when->cyt_when = ktime_add(dtrace_gethrtime(), when->cyt_interval); -+ -+ pcpu->profc_expected = when->cyt_when; -+ pcpu->profc_interval = when->cyt_interval; -+} -+ -+static void profile_offline(void *arg, processorid_t cpu, void *oarg) -+{ -+ struct profile_probe_percpu *pcpu = oarg; -+ -+ if (pcpu->profc_probe == arg) { -+ kfree(pcpu); -+ return; -+ } -+ -+ WARN_ONCE(1, "%s: called with mismatched probe info (%p vs %p)" -+ " - leaking %lu bytes\n", __func__, pcpu->profc_probe, arg, -+ sizeof(struct profile_probe_percpu)); -+ -+} -+ -+static void profile_create(ktime_t interval, const char *name, int kind) -+{ -+ struct profile_probe *prof; -+ int nr_frames = 0; /* FIXME */ -+ -+ if (profile_aframes) -+ nr_frames = profile_aframes; -+ -+ if (ktime_lt(interval, profile_interval_min)) -+ return; -+ -+ if (dtrace_probe_lookup(profile_id, NULL, NULL, name) != 0) -+ return; -+ -+ prof = kzalloc(sizeof(struct profile_probe), GFP_KERNEL); -+ if (prof == NULL) { -+ pr_warn("Unable to create probe %s: out of memory\n", name); -+ return; -+ } -+ -+ atomic_inc(&profile_total); -+ if (atomic_read(&profile_total) > profile_max) -+ goto errout; -+ -+ strcpy(prof->prof_name, name); -+ prof->prof_interval = interval; -+ prof->prof_cyclic = CYCLIC_NONE; -+ prof->prof_kind = kind; -+ prof->prof_id = dtrace_probe_create(profile_id, NULL, NULL, name, -+ nr_frames, prof); -+ -+ if (prof->prof_id == DTRACE_IDNONE) { -+ pr_warn("Unable to create probe %s: out of memory\n", name); -+ goto errout; -+ } -+ -+ return; -+ -+errout: -+ kfree(prof); -+ atomic_dec(&profile_total); -+ return; -+} -+ -+void profile_provide(void *arg, const struct dtrace_probedesc *desc) -+{ -+ int i, j, rate, kind; -+ long val = 0, mult = 1, mult_s = 0, mult_ns = 0, len; -+ ktime_t interval; -+ const char *name, *suffix = NULL; -+ const struct { -+ char *prefix; -+ int kind; -+ } types[] = { -+ { PROF_PREFIX_PROFILE, PROF_PROFILE }, -+ { PROF_PREFIX_TICK, PROF_TICK }, -+ { NULL, 0 }, -+ }; -+ -+ const struct { -+ char *name; -+ long mult_s; -+ long mult_ns; -+ } suffixes[] = { -+ { "ns", 0, 1 }, -+ { "nsec", 0, 1 }, -+ { "us", 0, NANOSEC / MICROSEC }, -+ { "usec", 0, NANOSEC / MICROSEC }, -+ { "ms", 0, NANOSEC / MILLISEC }, -+ { "msec", 0, NANOSEC / MILLISEC }, -+ { "s", 1, 0 }, -+ { "sec", 1, 0 }, -+ { "m", 60, 0 }, -+ { "min", 60, 0 }, -+ { "h", 60 * 60, 0 }, -+ { "hour", 60 * 60, 0 }, -+ { "d", 24 * 60 * 60, 0 }, -+ { "day", 24 * 60 * 60, 0 }, -+ { "hz", 0, 0 }, -+ { NULL, }, -+ }; -+ -+ if (desc == NULL) { -+ char n[PROF_NAMELEN]; -+ -+ /* -+ * If no description was provided, provide all of our probes. -+ */ -+ for (i = 0; i < sizeof(profile_rates) / sizeof(int); i++) { -+ rate = profile_rates[i]; -+ if (rate == 0) -+ continue; -+ -+ snprintf(n, PROF_NAMELEN, "%s%d", -+ PROF_PREFIX_PROFILE, rate); -+ profile_create(ktime_set(0, NANOSEC / rate), -+ n, PROF_PROFILE); -+ } -+ -+ for (i = 0; i < sizeof(profile_ticks) / sizeof(int); i++) { -+ rate = profile_ticks[i]; -+ if (rate == 0) -+ continue; -+ -+ snprintf(n, PROF_NAMELEN, "%s%d", -+ PROF_PREFIX_TICK, rate); -+ profile_create(ktime_set(0, NANOSEC / rate), -+ n, PROF_TICK); -+ } -+ -+ return; -+ } -+ -+ name = desc->dtpd_name; -+ -+ for (i = 0; types[i].prefix != NULL; i++) { -+ len = strlen(types[i].prefix); -+ -+ if (strncmp(name, types[i].prefix, len) != 0) -+ continue; -+ -+ break; -+ } -+ -+ if (types[i].prefix == NULL) -+ return; -+ -+ kind = types[i].kind; -+ -+ /* -+ * We need to start before any time suffix. -+ */ -+ for (j = strlen(name); j >= len; j--) { -+ if (name[j] >= '0' && name[j] <= '9') -+ break; -+ -+ suffix = &name[j]; -+ } -+ -+ if (suffix == NULL) { -+ WARN_ONCE(1, "%s: missing time suffix in %s\n", __func__, name); -+ return; -+ } -+ -+ /* -+ * Now determine the numerical value present in the probe name. -+ */ -+ for (; j >= len; j--) { -+ if (name[j] < '0' || name[j] > '9') -+ return; -+ -+ val += (name[j] - '0') * mult; -+ mult *= 10; -+ } -+ -+ if (val == 0) -+ return; -+ -+ /* -+ * Look up the suffix to determine the multiplier. -+ */ -+ for (i = 0; suffixes[i].name != NULL; i++) { -+ if (strcasecmp(suffixes[i].name, suffix) == 0) { -+ mult_s = suffixes[i].mult_s; -+ mult_ns = suffixes[i].mult_ns; -+ break; -+ } -+ } -+ -+ if (suffixes[i].name == NULL && *suffix != '\0') -+ return; -+ -+ if (mult_s == 0 && mult_ns == 0) { -+ /* -+ * The default is frequency (per-second). -+ */ -+ interval = ns_to_ktime((int64_t)NANOSEC / val); -+ } else { -+ long sec; -+ long nsec = val * mult_ns; -+ -+ sec = nsec / NANOSEC; -+ nsec %= NANOSEC; -+ -+ interval = ktime_set(val * mult_s + sec, nsec); -+ } -+ -+ -+ profile_create(interval, name, kind); -+} -+ -+int profile_enable(void *arg, dtrace_id_t id, void *parg) -+{ -+ struct profile_probe *prof = parg; -+ struct cyc_time when; -+ -+ if (!ktime_nz(prof->prof_interval)) { -+ WARN_ONCE(1, "%s: trying to enable 0-interval probe %s\n", -+ __func__, prof->prof_name); -+ return 1; -+ } -+ if (!MUTEX_HELD(&cpu_lock)) { -+ WARN_ONCE(1, "%s: not holding cpu_lock\n", __func__); -+ return 1; -+ } -+ -+ if (prof->prof_kind == PROF_TICK) { -+ struct cyc_handler hdlr; -+ -+ hdlr.cyh_func = profile_tick_fn; -+ hdlr.cyh_arg = (uintptr_t)prof; -+ hdlr.cyh_level = CY_HIGH_LEVEL; -+ -+ when.cyt_interval = prof->prof_interval; -+ when.cyt_when = ktime_set(0, 0); -+ -+ prof->prof_cyclic = cyclic_add(&hdlr, &when); -+ } else if (prof->prof_kind == PROF_PROFILE) { -+ struct cyc_omni_handler omni; -+ -+ omni.cyo_online = profile_online; -+ omni.cyo_offline = profile_offline; -+ omni.cyo_arg = prof; -+ -+ prof->prof_cyclic = cyclic_add_omni(&omni); -+ } else -+ pr_warn_once("%s: Invalid profile type %d\n", -+ __func__, prof->prof_kind); -+ -+ return 0; -+} -+ -+void profile_disable(void *arg, dtrace_id_t id, void *parg) -+{ -+ struct profile_probe *prof = parg; -+ -+ if (prof->prof_cyclic == CYCLIC_NONE) { -+ WARN_ONCE(1, "%s: trying to disable probe %s without cyclic\n", -+ __func__, prof->prof_name); -+ return; -+ } -+ if (!MUTEX_HELD(&cpu_lock)) { -+ WARN_ONCE(1, "%s: not holding cpu_lock\n", __func__); -+ return; -+ } -+ -+ cyclic_remove(prof->prof_cyclic); -+ prof->prof_cyclic = CYCLIC_NONE; -+} -+ -+int profile_usermode(void *arg, dtrace_id_t id, void *parg) -+{ -+ return 1; /* FIXME: awaiting unprivileged tracing */ -+} -+ -+void profile_destroy(void *arg, dtrace_id_t id, void *parg) -+{ -+ struct profile_probe *prof = parg; -+ -+ if (prof->prof_cyclic == CYCLIC_NONE) { -+ kfree(prof); -+ -+ if (atomic_read(&profile_total) >= 1) { -+ atomic_dec(&profile_total); -+ return; -+ } -+ -+ WARN_ONCE(1, "%s: profile_total refcount is 0!\n", __func__); -+ } -+ -+ WARN_ONCE(1, "%s: %s still assigned to cyclic\n", -+ __func__, prof->prof_name); -+} -+ -+static int profile_open(struct inode *inode, struct file *file) -+{ -+ return -EAGAIN; -+} -+ -+static int profile_close(struct inode *inode, struct file *file) -+{ -+ return 0; -+} -+ -+static const struct file_operations profile_fops = { -+ .owner = THIS_MODULE, -+ .open = profile_open, -+ .release = profile_close, -+}; -+ -+static struct miscdevice profile_dev = { -+ .minor = DT_DEV_PROFILE_MINOR, -+ .name = "profile", -+ .nodename = "dtrace/provider/profile", -+ .fops = &profile_fops, -+}; -+ -+int profile_dev_init(void) -+{ -+ int ret = 0; -+ -+ ret = misc_register(&profile_dev); -+ if (ret) -+ pr_err("%s: Can't register misc device %d\n", -+ profile_dev.name, profile_dev.minor); -+ -+ profile_max = PROFILE_MAX_DEFAULT; -+ -+ return ret; -+} -+ -+void profile_dev_exit(void) -+{ -+ misc_deregister(&profile_dev); -+} -diff --git a/dtrace/profile_mod.c b/dtrace/profile_mod.c -new file mode 100644 -index 000000000000..1fb54a29e57e ---- /dev/null -+++ b/dtrace/profile_mod.c -@@ -0,0 +1,52 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: profile_mod.c -+ * DESCRIPTION: DTrace - Profile provider kernel module -+ * -+ * Copyright (c) 2010, 2017, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/module.h> -+ -+#include "dtrace.h" -+#include "dtrace_dev.h" -+#include "profile.h" -+ -+MODULE_AUTHOR("Kris Van Hees (kris.van.hees@oracle.com)"); -+MODULE_DESCRIPTION("Profile Interrupt Tracing"); -+MODULE_VERSION("v0.1"); -+MODULE_LICENSE("GPL"); -+ -+static const struct dtrace_pattr profile_attr = { -+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, -+{ DTRACE_STABILITY_UNSTABLE, DTRACE_STABILITY_UNSTABLE, DTRACE_CLASS_UNKNOWN }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, -+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, -+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, -+}; -+ -+static struct dtrace_pops profile_pops = { -+ .dtps_provide = profile_provide, -+ .dtps_provide_module = NULL, -+ .dtps_destroy_module = NULL, -+ .dtps_enable = profile_enable, -+ .dtps_disable = profile_disable, -+ .dtps_suspend = NULL, -+ .dtps_resume = NULL, -+ .dtps_getargdesc = NULL, -+ .dtps_getargval = NULL, -+ .dtps_usermode = profile_usermode, -+ .dtps_destroy = profile_destroy, -+}; -+ -+DT_PROVIDER_MODULE(profile, DTRACE_PRIV_KERNEL | DTRACE_PRIV_USER) --- -2.27.0 - diff --git a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0012-dtrace-USDT-and-pid-provider-core-and-x86-components.patch b/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0012-dtrace-USDT-and-pid-provider-core-and-x86-components.patch deleted file mode 100644 index acd7621276c8..000000000000 --- a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0012-dtrace-USDT-and-pid-provider-core-and-x86-components.patch +++ /dev/null @@ -1,412 +0,0 @@ -From 557622245d84d39d27c81cbbb7c67f510993a3af Mon Sep 17 00:00:00 2001 -From: Kris Van Hees <kris.van.hees@oracle.com> -Date: Mon, 19 Nov 2018 18:58:27 +0000 -Subject: [PATCH 12/20] dtrace: USDT and pid provider core and x86 components - -This implements the core (linked-in) machinery needed for userspace -statically-defined tracepoints (for historical reasons, known as -'fasttrap' by DTrace) and for the pid provider, which allows USDT -probes to be dropped at addresses that do not correspond to symbols, -at locations named as a symbol plus an offset. Both are implemented in -terms of kprobes. - -Signed-off-by: Nick Alcock <nick.alcock@oracle.com> -Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com> -Signed-off-by: Tomas Jedlicka <tomas.jedlicka@oracle.com> -Signed-off-by: Eugene Loh <eugene.loh@oracle.com> -Signed-off-by: David Mc Lean <david.mclean@oracle.com> -Signed-off-by: Vincent Lim <vincent.lim@oracle.com> ---- - include/linux/dtrace_os.h | 23 +++ - kernel/dtrace/Kconfig | 10 ++ - kernel/dtrace/dtrace_os.c | 272 ++++++++++++++++++++++++++++++++++++ - kernel/dtrace/dtrace_task.c | 15 ++ - 4 files changed, 320 insertions(+) - -diff --git a/include/linux/dtrace_os.h b/include/linux/dtrace_os.h -index f2921ce039a7..836825b1c7be 100644 ---- a/include/linux/dtrace_os.h -+++ b/include/linux/dtrace_os.h -@@ -14,6 +14,9 @@ - #include <linux/mm.h> - #include <linux/notifier.h> - #include <linux/timekeeper_internal.h> -+#if IS_ENABLED(CONFIG_DT_FASTTRAP) -+#include <linux/uprobes.h> -+#endif - #include <asm/unistd.h> - #include <linux/dtrace_cpu.h> - #include <linux/dtrace_task.h> -@@ -94,8 +97,28 @@ static inline int dtrace_no_pf(struct pt_regs *regs) - } - - extern void (*dtrace_helpers_cleanup)(struct task_struct *); -+extern void (*dtrace_fasttrap_probes_cleanup)(struct task_struct *); - extern void (*dtrace_helpers_fork)(struct task_struct *, struct task_struct *); - -+#if IS_ENABLED(CONFIG_DT_FASTTRAP) -+struct fasttrap_machtp { -+ struct inode *fmtp_ino; -+ loff_t fmtp_off; -+ struct uprobe_consumer fmtp_cns; -+}; -+ -+extern int (*dtrace_tracepoint_hit)(struct fasttrap_machtp *, -+ struct pt_regs *, int); -+ -+extern struct task_struct *register_pid_provider(pid_t); -+extern void unregister_pid_provider(pid_t); -+ -+extern int dtrace_copy_code(pid_t, uint8_t *, uintptr_t, size_t); -+extern int dtrace_tracepoint_enable(pid_t, uintptr_t, int, -+ struct fasttrap_machtp *); -+extern int dtrace_tracepoint_disable(pid_t, struct fasttrap_machtp *); -+#endif /* CONFIG_DT_FASTTRAP || CONFIG_DT_FASTTRAP_MODULE */ -+ - #else - - /* -diff --git a/kernel/dtrace/Kconfig b/kernel/dtrace/Kconfig -index 7b88206fe835..6bf6620981cd 100644 ---- a/kernel/dtrace/Kconfig -+++ b/kernel/dtrace/Kconfig -@@ -23,6 +23,16 @@ config DT_CORE - - if DT_CORE - -+config DT_FASTTRAP -+ tristate "Fasttrap Tracing" -+ default m -+ depends on ARCH_SUPPORTS_UPROBES -+ select UPROBE_EVENT -+ help -+ Userspace tracing, providing the kernel support needed for tracing -+ userspace programs. Currently, only statically defined probes -+ (USDT) are supported. -+ - config DT_PROFILE - tristate "Profile Interrupt Tracing" - default m -diff --git a/kernel/dtrace/dtrace_os.c b/kernel/dtrace/dtrace_os.c -index 1b13a92fb20e..7140fd64cb7a 100644 ---- a/kernel/dtrace/dtrace_os.c -+++ b/kernel/dtrace/dtrace_os.c -@@ -40,6 +40,10 @@ - #include <linux/shmem_fs.h> - #include <linux/dtrace_task_impl.h> - -+#if IS_ENABLED(CONFIG_DT_FASTTRAP) -+# include <linux/uprobes.h> -+#endif /* CONFIG_DT_FASTTRAP || CONFIG_DT_FASTTRAP_MODULE */ -+ - /* - * OS SPECIFIC DTRACE SETUP - */ -@@ -413,3 +417,271 @@ void dtrace_disable(void) - dtrace_enabled = 0; - } - EXPORT_SYMBOL(dtrace_disable); -+ -+/* -+ * USER SPACE TRACING (FASTTRAP) SUPPORT -+ */ -+ -+#if IS_ENABLED(CONFIG_DT_FASTTRAP) -+int (*dtrace_tracepoint_hit)(struct fasttrap_machtp *, struct pt_regs *, int); -+EXPORT_SYMBOL(dtrace_tracepoint_hit); -+ -+struct task_struct *register_pid_provider(pid_t pid) -+{ -+ struct task_struct *p; -+ -+ /* -+ * Make sure the process exists, (FIXME: isn't a child created as the -+ * result of a vfork(2)), and isn't a zombie (but may be in fork). -+ */ -+ rcu_read_lock(); -+ p = find_task_by_vpid(pid); -+ if (p == NULL) { -+ rcu_read_unlock(); -+ return NULL; -+ } -+ -+ get_task_struct(p); -+ rcu_read_unlock(); -+ -+ if (p->state & TASK_DEAD || p->dt_task == NULL || -+ p->exit_state & (EXIT_ZOMBIE | EXIT_DEAD)) { -+ put_task_struct(p); -+ return NULL; -+ } -+ -+ /* -+ * Increment dtrace_probes so that the process knows to inform us -+ * when it exits or execs. fasttrap_provider_free() decrements this -+ * when we're done with this provider. -+ */ -+ if (p->dt_task != NULL) -+ p->dt_task->dt_probes++; -+ put_task_struct(p); -+ -+ return p; -+} -+EXPORT_SYMBOL(register_pid_provider); -+ -+void unregister_pid_provider(pid_t pid) -+{ -+ struct task_struct *p; -+ -+ /* -+ * Decrement dtrace_probes on the process whose provider we're -+ * freeing. We don't have to worry about clobbering somone else's -+ * modifications to it because we have locked the bucket that -+ * corresponds to this process's hash chain in the provider hash -+ * table. Don't sweat it if we can't find the process. -+ */ -+ rcu_read_lock(); -+ read_lock(&tasklist_lock); -+ if ((p = find_task_by_vpid(pid)) == NULL) { -+ read_unlock(&tasklist_lock); -+ rcu_read_unlock(); -+ return; -+ } -+ -+ get_task_struct(p); -+ read_unlock(&tasklist_lock); -+ rcu_read_unlock(); -+ -+ if (p->dt_task != NULL) -+ p->dt_task->dt_probes--; -+ put_task_struct(p); -+} -+EXPORT_SYMBOL(unregister_pid_provider); -+ -+int dtrace_copy_code(pid_t pid, uint8_t *buf, uintptr_t addr, size_t size) -+{ -+ struct task_struct *p; -+ struct inode *ino; -+ struct vm_area_struct *vma; -+ struct address_space *map; -+ loff_t off; -+ int rc = 0; -+ -+ /* -+ * First we determine the inode and offset that 'addr' refers to in the -+ * task referenced by 'pid'. -+ */ -+ rcu_read_lock(); -+ p = find_task_by_vpid(pid); -+ if (!p) { -+ rcu_read_unlock(); -+ pr_warn("PID %d not found\n", pid); -+ return -ESRCH; -+ } -+ get_task_struct(p); -+ rcu_read_unlock(); -+ -+ down_write(&p->mm->mmap_sem); -+ vma = find_vma(p->mm, addr); -+ if (vma == NULL || vma->vm_file == NULL) { -+ rc = -EFAULT; -+ goto out; -+ } -+ -+ ino = vma->vm_file->f_mapping->host; -+ map = ino->i_mapping; -+ off = ((loff_t)vma->vm_pgoff << PAGE_SHIFT) + (addr - vma->vm_start); -+ -+ if (map->a_ops->readpage == NULL && !shmem_mapping(ino->i_mapping)) { -+ rc = -EIO; -+ goto out; -+ } -+ -+ /* -+ * Armed with inode and offset, we can start reading pages... -+ */ -+ do { -+ int len; -+ struct page *page; -+ void *kaddr; -+ -+ /* -+ * We cannot read beyond the end of the inode content. -+ */ -+ if (off >= i_size_read(ino)) -+ break; -+ -+ len = min_t(int, size, PAGE_SIZE - (off & ~PAGE_MASK)); -+ -+ /* -+ * Make sure that the page we're tring to read is populated and -+ * in page cache. -+ */ -+ if (map->a_ops->readpage) -+ page = read_mapping_page(map, off >> PAGE_SHIFT, -+ vma->vm_file); -+ else -+ page = shmem_read_mapping_page(map, off >> PAGE_SHIFT); -+ -+ if (IS_ERR(page)) { -+ rc = PTR_ERR(page); -+ break; -+ } -+ -+ kaddr = kmap_atomic(page); -+ memcpy(buf, kaddr + (off & ~PAGE_MASK), len); -+ kunmap_atomic(kaddr); -+ put_page(page); -+ -+ buf += len; -+ off += len; -+ size -= len; -+ } while (size > 0); -+ -+out: -+ up_write(&p->mm->mmap_sem); -+ put_task_struct(p); -+ -+ return rc; -+} -+EXPORT_SYMBOL(dtrace_copy_code); -+ -+static int handler(struct uprobe_consumer *self, struct pt_regs *regs, -+ int is_ret) -+{ -+ struct fasttrap_machtp *mtp; -+ int rc = 0; -+ -+ mtp = container_of(self, struct fasttrap_machtp, fmtp_cns); -+ -+ read_lock(&this_cpu_core->cpu_ft_lock); -+ if (dtrace_tracepoint_hit == NULL) -+ pr_warn("Fasttrap probes, but no handler\n"); -+ else -+ rc = (*dtrace_tracepoint_hit)(mtp, regs, is_ret); -+ read_unlock(&this_cpu_core->cpu_ft_lock); -+ -+ return rc; -+} -+ -+static int prb_handler(struct uprobe_consumer *self, struct pt_regs *regs) -+{ -+ return handler(self, regs, 0); -+} -+ -+static int ret_handler(struct uprobe_consumer *self, unsigned long func, -+ struct pt_regs *regs) -+{ -+ return handler(self, regs, 1); -+} -+ -+int dtrace_tracepoint_enable(pid_t pid, uintptr_t addr, int is_ret, -+ struct fasttrap_machtp *mtp) -+{ -+ struct task_struct *p; -+ struct inode *ino; -+ struct vm_area_struct *vma; -+ loff_t off; -+ int rc = 0; -+ -+ mtp->fmtp_ino = NULL; -+ mtp->fmtp_off = 0; -+ -+ p = find_task_by_vpid(pid); -+ if (!p) { -+ pr_warn("PID %d not found\n", pid); -+ return -ESRCH; -+ } -+ -+ if (p->dt_task == NULL) { -+ pr_warn("PID %d no dtrace_task\n", pid); -+ return -EFAULT; -+ } -+ -+ vma = find_vma(p->mm, addr); -+ if (vma == NULL || vma->vm_file == NULL) -+ return -EFAULT; -+ -+ ino = vma->vm_file->f_mapping->host; -+ off = ((loff_t)vma->vm_pgoff << PAGE_SHIFT) + (addr - vma->vm_start); -+ -+ if (is_ret) -+ mtp->fmtp_cns.ret_handler = ret_handler; -+ else -+ mtp->fmtp_cns.handler = prb_handler; -+ -+ rc = uprobe_register(ino, off, &mtp->fmtp_cns); -+ -+ /* -+ * If successful, increment the count of the number of -+ * tracepoints active in the victim process. -+ */ -+ if (rc == 0) { -+ mtp->fmtp_ino = ino; -+ mtp->fmtp_off = off; -+ -+ p->dt_task->dt_tp_count++; -+ } -+ -+ return rc; -+} -+EXPORT_SYMBOL(dtrace_tracepoint_enable); -+ -+int dtrace_tracepoint_disable(pid_t pid, struct fasttrap_machtp *mtp) -+{ -+ struct task_struct *p; -+ -+ if (!mtp || !mtp->fmtp_ino) -+ return -ENOENT; -+ -+ uprobe_unregister(mtp->fmtp_ino, mtp->fmtp_off, &mtp->fmtp_cns); -+ -+ mtp->fmtp_ino = NULL; -+ mtp->fmtp_off = 0; -+ -+ /* -+ * Decrement the count of the number of tracepoints active in -+ * the victim process (if it still exists). -+ */ -+ p = find_task_by_vpid(pid); -+ if (p != NULL && p->dt_task != NULL) -+ p->dt_task->dt_tp_count--; -+ -+ return 0; -+} -+EXPORT_SYMBOL(dtrace_tracepoint_disable); -+#endif /* CONFIG_DT_FASTTRAP || CONFIG_DT_FASTTRAP_MODULE */ -diff --git a/kernel/dtrace/dtrace_task.c b/kernel/dtrace/dtrace_task.c -index 02bcc6b7e0a2..8bae6e79c7f1 100644 ---- a/kernel/dtrace/dtrace_task.c -+++ b/kernel/dtrace/dtrace_task.c -@@ -22,6 +22,14 @@ - - struct kmem_cache *dtrace_task_cachep; - -+/* -+ * Fasttrap hooks that need to be called when a fasttrap meta provider -+ * is loaded and registered with the framework. -+ */ -+void (*dtrace_helpers_cleanup)(struct task_struct *); -+EXPORT_SYMBOL(dtrace_helpers_cleanup); -+void (*dtrace_fasttrap_probes_cleanup)(struct task_struct *); -+EXPORT_SYMBOL(dtrace_fasttrap_probes_cleanup); - void (*dtrace_helpers_fork)(struct task_struct *, struct task_struct *); - EXPORT_SYMBOL(dtrace_helpers_fork); - -@@ -76,6 +84,13 @@ static void dtrace_task_cleanup(struct task_struct *tsk) - if (tsk->dt_task == NULL) - return; - -+ /* Handle fasttrap provider cleanups. */ -+ if (tsk->dt_task->dt_helpers != NULL && dtrace_helpers_cleanup != NULL) -+ (*dtrace_helpers_cleanup)(tsk); -+ -+ if (tsk->dt_task->dt_probes && dtrace_fasttrap_probes_cleanup != NULL) -+ (*dtrace_fasttrap_probes_cleanup)(tsk); -+ - /* Release psinfo if any. */ - psinfo = tsk->dt_task->dt_psinfo; - if (psinfo != NULL) { --- -2.27.0 - diff --git a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0013-dtrace-USDT-and-pid-providers.patch b/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0013-dtrace-USDT-and-pid-providers.patch deleted file mode 100644 index fe5e07f8f0a7..000000000000 --- a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0013-dtrace-USDT-and-pid-providers.patch +++ /dev/null @@ -1,2691 +0,0 @@ -From d560282c7319f039b6bf1ce00d33725f962d6cc3 Mon Sep 17 00:00:00 2001 -From: Kris Van Hees <kris.van.hees@oracle.com> -Date: Mon, 19 Nov 2018 18:08:41 +0000 -Subject: [PATCH 13/20] dtrace: USDT and pid providers - -For historical reasons, these are provided in a module named -fasttrap.ko. - -Much of this is arch-dependent code for jump-table detection and -implementation of globbed pid probes ("probe everything you can in this -function"), as well as arch-dependent code to look up arguments and code -to ensure that dropping a kprobe in a single process does not affect -other processes running from the same binary. - -Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com> -Signed-off-by: Nick Alcock <nick.alcock@oracle.com> -Signed-off-by: Tomas Jedlicka <tomas.jedlicka@oracle.com> -Signed-off-by: Eugene Loh <eugene.loh@oracle.com> -Signed-off-by: David Mc Lean <david.mclean@oracle.com> -Signed-off-by: Vincent Lim <vincent.lim@oracle.com> ---- - arch/x86/dtrace/Makefile.arch | 2 + - arch/x86/dtrace/fasttrap_x86_64.c | 364 +++ - .../x86/dtrace/include/dtrace/fasttrap_arch.h | 29 + - dtrace/Makefile | 2 + - dtrace/fasttrap_dev.c | 1986 +++++++++++++++++ - dtrace/fasttrap_impl.h | 172 ++ - dtrace/fasttrap_mod.c | 38 + - 7 files changed, 2593 insertions(+) - create mode 100644 arch/x86/dtrace/fasttrap_x86_64.c - create mode 100644 arch/x86/dtrace/include/dtrace/fasttrap_arch.h - create mode 100644 dtrace/fasttrap_dev.c - create mode 100644 dtrace/fasttrap_impl.h - create mode 100644 dtrace/fasttrap_mod.c - -diff --git a/arch/x86/dtrace/Makefile.arch b/arch/x86/dtrace/Makefile.arch -index 8492eaee426d..e4655557e06a 100644 ---- a/arch/x86/dtrace/Makefile.arch -+++ b/arch/x86/dtrace/Makefile.arch -@@ -7,7 +7,9 @@ DTARCHDIR = ../arch/x86/dtrace - ccflags-y += -I$(srctree)/arch/x86/dtrace/include -Idtrace - - dtrace-obj += dtrace_asm_x86_64.o dtrace_isa_x86_64.o -+fasttrap-obj += fasttrap_x86_64.o - sdt-obj += sdt_x86_64.o - - dtrace-y += $(addprefix $(DTARCHDIR)/, $(dtrace-obj)) -+fasttrap-y += $(addprefix $(DTARCHDIR)/, $(fasttrap-obj)) - sdt-y += $(addprefix $(DTARCHDIR)/, $(sdt-obj)) -diff --git a/arch/x86/dtrace/fasttrap_x86_64.c b/arch/x86/dtrace/fasttrap_x86_64.c -new file mode 100644 -index 000000000000..012b2a50a46a ---- /dev/null -+++ b/arch/x86/dtrace/fasttrap_x86_64.c -@@ -0,0 +1,364 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: fasttrap_x86_64.c -+ * DESCRIPTION: DTrace - fasttrap provider implementation for x86 -+ * -+ * Copyright (c) 2010, 2018, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <asm/insn.h> -+#include <linux/kernel.h> -+#include <linux/slab.h> -+#include <linux/uaccess.h> -+ -+#include "dtrace.h" -+#include "dtrace_dev.h" -+#include "fasttrap_impl.h" -+ -+#define DISASM_REX_PREFIX(pfx) (((pfx) & 0xf0) == 0x40) -+#define DISASM_MODRM_REG(modrm) (((modrm) >> 3) & 0x07) -+ -+static int has_jump_table(const asm_instr_t *addr, size_t size) -+{ -+ const asm_instr_t *end = addr + size; -+ -+ while (addr < end) { -+ int len; -+ -+ /* -+ * Register-dependent jump instructions start with a 0xff byte -+ * and have the modrm.reg field set to 4. Such instructions -+ * tend to be used for jump tables. -+ */ -+ if ((addr[0] == 0xff && DISASM_MODRM_REG(addr[1]) == 4) || -+ (DISASM_REX_PREFIX(addr[0]) && addr[1] == 0xff && -+ DISASM_MODRM_REG(addr[2]) == 4)) -+ return 1; -+ -+ len = dtrace_instr_size(addr); -+ -+ /* -+ * If we encounter a problem decoding an instruction, we will -+ * assume that there might be a jump table. Better safe than -+ * sorry... -+ */ -+ if (len < 0) -+ return 1; -+ -+ addr += len; -+ } -+ -+ return 0; -+} -+ -+static uint64_t *fasttrap_all_offsets(asm_instr_t *text, size_t size, -+ uint64_t *np) -+{ -+ uint64_t *offs = NULL; -+ uint64_t noffs; -+ asm_instr_t *instr; -+ asm_instr_t *end; -+ -+ /* -+ * Two passes are taken through this section of code. The first time -+ * around we merely count the number of probe points. The second time, -+ * we actually record their locations. -+ */ -+again: -+ noffs = 0; -+ instr = text; -+ end = text + size; -+ -+ while (instr < end) { -+ int len; -+ -+ /* -+ * If we fail to decode an instruction, it is time to give up. -+ */ -+ len = dtrace_instr_size(instr); -+ if (len < 0) -+ goto fail; -+ -+ if (offs) -+ offs[noffs] = (uint64_t)(instr - text); -+ noffs++; -+ -+ instr += len; -+ } -+ -+ if (offs == NULL) { -+ /* -+ * No matching offsets found - we are done. -+ */ -+ if (noffs == 0) -+ goto fail; -+ -+ /* -+ * We know how many tracepoint locations there are for this -+ * probe, so allocate member to record them, and kick off the -+ * second pass. -+ */ -+ offs = kmalloc(sizeof(uint64_t) * noffs, GFP_KERNEL); -+ if (!offs) -+ goto fail; -+ -+ goto again; -+ } -+ -+ *np = noffs; -+ -+ return offs; -+ -+fail: -+ *np = 0; -+ kfree(offs); -+ -+ return NULL; -+} -+ -+uint64_t *fasttrap_glob_offsets(struct fasttrap_probe_spec *probe, -+ uint64_t *np) -+{ -+ size_t size = probe->ftps_size; -+ asm_instr_t *text = NULL; -+ asm_instr_t *instr; -+ asm_instr_t *end; -+ uint64_t *offs = NULL; -+ uint64_t noffs; -+ int ret = 0; -+ char ostr[sizeof(instr) * 2 + 1]; -+ -+ text = kmalloc(size, GFP_KERNEL); -+ if (!text) -+ goto fail; -+ -+ ret = dtrace_copy_code(probe->ftps_pid, (uint8_t *)text, -+ probe->ftps_pc, size); -+ if (ret != 0) -+ goto fail; -+ -+ if (has_jump_table(text, size)) -+ goto fail; -+ -+ if (probe->ftps_glen == 1 && probe->ftps_gstr[0] == '*') { -+ offs = fasttrap_all_offsets(text, size, &noffs); -+ goto out; -+ } -+ -+ /* -+ * Two passes are taken through this section of code. The first time -+ * around we merely count the number of probe points. The second time, -+ * we actually record their locations. -+ */ -+again: -+ noffs = 0; -+ instr = text; -+ end = text + size; -+ -+ while (instr < end) { -+ int len; -+ uint64_t off = (uint64_t)(instr - text); -+ -+ /* -+ * If we fail to decode an instruction, it is time to give up. -+ */ -+ len = dtrace_instr_size(instr); -+ if (len < 0) -+ goto fail; -+ -+ snprintf(ostr, sizeof(ostr), "%llx", off); -+ if (dtrace_gmatch(ostr, probe->ftps_gstr)) { -+ if (offs) -+ offs[noffs] = off; -+ noffs++; -+ } -+ -+ instr += len; -+ } -+ -+ if (offs == NULL) { -+ /* -+ * No matching offsets found - we are done. -+ */ -+ if (noffs == 0) -+ goto fail; -+ -+ /* -+ * We know how many tracepoint locations there are for this -+ * probe, so allocate member to record them, and kick off the -+ * second pass. -+ */ -+ offs = kmalloc(sizeof(uint64_t) * noffs, GFP_KERNEL); -+ if (!offs) -+ goto fail; -+ -+ goto again; -+ } -+ -+out: -+ kfree(text); -+ -+ *np = noffs; -+ -+ return offs; -+ -+fail: -+ kfree(offs); -+ kfree(text); -+ -+ *np = 0; -+ return NULL; -+} -+ -+uint64_t fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno, -+ int aframes) -+{ -+ struct pt_regs *regs = this_cpu_core->cpu_dtrace_regs; -+ uint64_t *st; -+ uint64_t val; -+ -+ if (regs == NULL) -+ return 0; -+ -+ switch (argno) { -+ case 0: -+ return regs->di; -+ case 1: -+ return regs->si; -+ case 2: -+ return regs->dx; -+ case 3: -+ return regs->cx; -+ case 4: -+ return regs->r8; -+ case 5: -+ return regs->r9; -+ } -+ -+ ASSERT(argno > 5); -+ -+ pagefault_disable(); -+ st = (uint64_t *)regs->sp; -+ __copy_from_user_inatomic_nocache(&val, (void *)&st[argno - 6 + 1], -+ sizeof(st[0])); -+ pagefault_enable(); -+ -+ return val; -+} -+ -+uint64_t fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, -+ int argno, int aframes) -+{ -+ struct pt_regs *regs = this_cpu_core->cpu_dtrace_regs; -+ uint64_t *st; -+ uint64_t val; -+ -+ if (regs == NULL) -+ return 0; -+ -+ switch (argno) { -+ case 0: -+ return regs->di; -+ case 1: -+ return regs->si; -+ case 2: -+ return regs->dx; -+ case 3: -+ return regs->cx; -+ case 4: -+ return regs->r8; -+ case 5: -+ return regs->r9; -+ } -+ -+ ASSERT(argno > 5); -+ -+ pagefault_disable(); -+ st = (uint64_t *)regs->sp; -+ __copy_from_user_inatomic_nocache(&val, (void *)&st[argno - 6], -+ sizeof(st[0])); -+ pagefault_enable(); -+ -+ return val; -+} -+ -+static void fasttrap_map_args(struct fasttrap_probe *probe, -+ struct pt_regs *regs, -+ int argc, uintptr_t *argv) -+{ -+ int i, x, cap = min(argc, (int)probe->ftp_nargs); -+ uintptr_t *st = (uintptr_t *)regs->sp; -+ -+ for (i = 0; i < cap; i++) { -+ switch (x = probe->ftp_argmap[i]) { -+ case 0: -+ argv[i] = regs->di; -+ break; -+ case 1: -+ argv[i] = regs->si; -+ break; -+ case 2: -+ argv[i] = regs->dx; -+ break; -+ case 3: -+ argv[i] = regs->cx; -+ break; -+ case 4: -+ argv[i] = regs->r8; -+ break; -+ case 5: -+ argv[i] = regs->r9; -+ break; -+ default: -+ ASSERT(x > 5); -+ -+ __copy_from_user_inatomic_nocache(&argv[i], -+ (void *)&st[x - 6], -+ sizeof(st[0])); -+ } -+ } -+ -+ while (i < argc) -+ argv[i++] = 0; -+} -+ -+void fasttrap_pid_probe_arch(struct fasttrap_probe *ftp, struct pt_regs *regs) -+{ -+ if (ftp->ftp_argmap == NULL) { -+ dtrace_probe(ftp->ftp_id, regs->di, regs->si, regs->dx, -+ regs->cx, regs->r8, regs->r9, 0); -+ } else { -+ uintptr_t t[6]; -+ -+ fasttrap_map_args(ftp, regs, sizeof(t) / sizeof(t[0]), t); -+ dtrace_probe(ftp->ftp_id, t[0], t[1], t[2], t[3], -+ t[4], t[5], 0); -+ } -+} -+ -+void fasttrap_pid_retprobe_arch(struct fasttrap_probe *ftp, -+ struct pt_regs *regs) -+{ -+ /* -+ * FIXME: The first argument to the probe should be the offset in the -+ * function that the return occured at, but uprobes doesn't give -+ * us that information (or so it seems). -+ */ -+ dtrace_probe(ftp->ftp_id, 0, regs->ax, regs->dx, 0, 0, 0, 0); -+} -+ -+void fasttrap_set_enabled(struct pt_regs *regs) -+{ -+ regs->ax = 1; -+} -+ -diff --git a/arch/x86/dtrace/include/dtrace/fasttrap_arch.h b/arch/x86/dtrace/include/dtrace/fasttrap_arch.h -new file mode 100644 -index 000000000000..abbe9cb2bf38 ---- /dev/null -+++ b/arch/x86/dtrace/include/dtrace/fasttrap_arch.h -@@ -0,0 +1,29 @@ -+/* -+ * Dynamic Tracing for Linux - Fasttrap provider implementation defines -+ * -+ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _X86_64_FASTTRAP_ARCH_H -+#define _X86_64_FASTTRAP_ARCH_H -+ -+#define FASTTRAP_ENTRY_AFRAMES 8 -+#define FASTTRAP_RETURN_AFRAMES 8 -+#define FASTTRAP_OFFSET_AFRAMES 8 -+ -+#endif /* _X86_64_FASTTRAP_ARCH_H */ -diff --git a/dtrace/Makefile b/dtrace/Makefile -index 5e6fb362a4e9..c7e3fc512a6c 100644 ---- a/dtrace/Makefile -+++ b/dtrace/Makefile -@@ -3,6 +3,7 @@ - # - - obj-$(CONFIG_DT_CORE) += dtrace.o -+obj-$(CONFIG_DT_FASTTRAP) += fasttrap.o - obj-$(CONFIG_DT_PROFILE) += profile.o - obj-$(CONFIG_DT_SDT) += sdt.o - obj-$(CONFIG_DT_SYSTRACE) += systrace.o -@@ -17,6 +18,7 @@ dtrace-y := dtrace_mod.o dtrace_dev.o \ - dtrace_probe.o dtrace_probe_ctx.o \ - dtrace_ptofapi.o dtrace_predicate.o \ - dtrace_spec.o dtrace_state.o dtrace_util.o -+fasttrap-y := fasttrap_mod.o fasttrap_dev.o - profile-y := profile_mod.o profile_dev.o - sdt-y := sdt_mod.o sdt_dev.o - systrace-y := systrace_mod.o systrace_dev.o -diff --git a/dtrace/fasttrap_dev.c b/dtrace/fasttrap_dev.c -new file mode 100644 -index 000000000000..f48581e00f4b ---- /dev/null -+++ b/dtrace/fasttrap_dev.c -@@ -0,0 +1,1986 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: fasttrap_dev.c -+ * DESCRIPTION: DTrace - fasttrap provider device driver -+ * -+ * Copyright (c) 2010, 2018, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/atomic.h> -+#include <linux/fs.h> -+#include <linux/miscdevice.h> -+#include <linux/slab.h> -+#include <linux/sort.h> -+#include <linux/uaccess.h> -+#include <linux/vmalloc.h> -+#include <linux/workqueue.h> -+ -+#include "dtrace.h" -+#include "dtrace_dev.h" -+#include "fasttrap_impl.h" -+ -+#define FASTTRAP_MAX_DEFAULT 250000 -+static uint32_t fasttrap_max; -+static uint64_t fasttrap_pid_count; -+static atomic_t fasttrap_total; -+ -+#define FASTTRAP_TPOINTS_DEFAULT_SIZE 0x4000 -+#define FASTTRAP_PROVIDERS_DEFAULT_SIZE 0x100 -+#define FASTTRAP_PROCS_DEFAULT_SIZE 0x100 -+ -+#define FASTTRAP_PID_NAME "pid" -+#define FASTTRAP_ENABLE_FAIL 1 -+#define FASTTRAP_ENABLE_PARTIAL 2 -+ -+struct fasttrap_hash fasttrap_tpoints; -+static struct fasttrap_hash fasttrap_provs; -+static struct fasttrap_hash fasttrap_procs; -+ -+#define FASTTRAP_PROVS_INDEX(pid, name) \ -+ ((fasttrap_hash_str(name) + (pid)) & fasttrap_provs.fth_mask) -+#define FASTTRAP_PROCS_INDEX(pid) ((pid) & fasttrap_procs.fth_mask) -+ -+#define FASTTRAP_TPOINTS_ELEM(pid, pc) \ -+ FASTTRAP_ELEM_BUCKET(&fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]) -+#define FASTTRAP_PROVS_ELEM(pid, name) \ -+ FASTTRAP_ELEM_BUCKET(&fasttrap_provs.fth_table[FASTTRAP_PROVS_INDEX(pid, name)]) -+#define FASTTRAP_PROCS_ELEM(pid) \ -+ FASTTRAP_ELEM_BUCKET(&fasttrap_procs.fth_table[FASTTRAP_PROCS_INDEX(pid)]) -+ -+#define CLEANUP_NONE 0 -+#define CLEANUP_SCHEDULED 1 -+#define CLEANUP_DEFERRED 2 -+ -+DEFINE_MUTEX(fasttrap_cleanup_mtx); -+DEFINE_MUTEX(fasttrap_count_mtx); -+static uint_t fasttrap_cleanup_state; -+static uint_t fasttrap_cleanup_work; -+ -+static struct kmem_cache *tracepoint_cachep; -+ -+/* -+ * Generation count on modifications to the global tracepoint lookup table. -+ */ -+static volatile uint64_t fasttrap_mod_gen; -+ -+static void fasttrap_pid_cleanup(void); -+static void fasttrap_probes_cleanup(struct task_struct *); -+ -+static int fasttrap_pid_probe(struct fasttrap_machtp *mtp, -+ struct pt_regs *regs, int is_ret) -+{ -+ struct fasttrap_tracepoint *tp; -+ struct fasttrap_id *id; -+ int is_enabled = 0; -+ -+ tp = container_of(mtp, struct fasttrap_tracepoint, ftt_mtp); -+ -+ /* -+ * Verify that this probe event is actually related to the current -+ * process (task group). If not, ignore it. -+ * -+ * TODO: The underlying probe mechanism should register a single -+ * handler for the (inode, offset) combination. When the handler -+ * is called, it should run through a list of fasttrap -+ * tracepoints associated with the OS-level probe, looking for -+ * one that is related to the current task. -+ */ -+ if (tp->ftt_pid != current->tgid) -+ return 0; -+ -+ if (atomic64_read(&tp->ftt_proc->ftpc_acount) == 0) -+ return 0; -+ -+ this_cpu_core->cpu_dtrace_regs = regs; -+ -+ if (!is_ret) { -+ for (id = tp->ftt_ids; id != NULL; id = id->fti_next) { -+ struct fasttrap_probe *ftp = id->fti_probe; -+ -+ if (id->fti_ptype == DTFTP_IS_ENABLED) -+ is_enabled = 1; -+ else -+ fasttrap_pid_probe_arch(ftp, regs); -+ } -+ } else { -+ for (id = tp->ftt_retids; id != NULL; id = id->fti_next) { -+ struct fasttrap_probe *ftp = id->fti_probe; -+ -+ fasttrap_pid_retprobe_arch(ftp, regs); -+ } -+ } -+ -+ this_cpu_core->cpu_dtrace_regs = NULL; -+ -+ if (is_enabled) -+ fasttrap_set_enabled(regs); -+ -+ return 0; -+} -+ -+static void fasttrap_pid_provide(void *arg, -+ const struct dtrace_probedesc *desc) -+{ -+ /* -+ * There are no "default" pid probes. -+ */ -+} -+ -+static void fasttrap_enable_callbacks(void) -+{ -+ /* -+ * We don't have to play the RW lock game here because we're providing -+ * something rather than taking something away -- we can be sure that -+ * no threads have tried to follow these function pointers yet. -+ */ -+ mutex_lock(&fasttrap_count_mtx); -+ if (fasttrap_pid_count == 0) { -+ ASSERT(dtrace_tracepoint_hit == NULL); -+ -+ dtrace_fasttrap_probes_cleanup = &fasttrap_probes_cleanup; -+ dtrace_tracepoint_hit = &fasttrap_pid_probe; -+ } -+ -+ ASSERT(dtrace_fasttrap_probes_cleanup == &fasttrap_probes_cleanup); -+ ASSERT(dtrace_tracepoint_hit == &fasttrap_pid_probe); -+ -+ fasttrap_pid_count++; -+ mutex_unlock(&fasttrap_count_mtx); -+} -+ -+static void fasttrap_disable_callbacks(void) -+{ -+ ASSERT(MUTEX_HELD(&cpu_lock)); -+ -+ mutex_lock(&fasttrap_count_mtx); -+ ASSERT(fasttrap_pid_count > 0); -+ fasttrap_pid_count--; -+ -+ if (fasttrap_pid_count == 0) { -+ int cpu; -+ -+ for_each_present_cpu(cpu) { -+ struct cpu_core *cpuc = per_cpu_core(cpu); -+ -+ write_lock(&cpuc->cpu_ft_lock); -+ } -+ -+ dtrace_tracepoint_hit = NULL; -+ dtrace_fasttrap_probes_cleanup = NULL; -+ -+ for_each_present_cpu(cpu) { -+ struct cpu_core *cpuc = per_cpu_core(cpu); -+ -+ write_unlock(&cpuc->cpu_ft_lock); -+ } -+ } -+ -+ mutex_unlock(&fasttrap_count_mtx); -+} -+ -+/* -+ * This function ensures that no threads are actively using the memory -+ * associated with probes that were formerly live. -+ */ -+static void fasttrap_mod_barrier(uint64_t gen) -+{ -+ int cpu; -+ -+ if (gen < fasttrap_mod_gen) -+ return; -+ -+ fasttrap_mod_gen++; -+ -+ for_each_present_cpu(cpu) { -+ struct cpu_core *cpuc = per_cpu_core(cpu); -+ -+ mutex_lock(&cpuc->cpuc_pid_lock); -+ mutex_unlock(&cpuc->cpuc_pid_lock); -+ } -+} -+ -+static int fasttrap_tracepoint_enable(struct fasttrap_probe *probe, -+ uint_t index) -+{ -+ struct fasttrap_tracepoint *tp, *new_tp = NULL; -+ struct fasttrap_bucket *bucket; -+ struct fasttrap_id *id; -+ pid_t pid; -+ uintptr_t pc; -+ -+ ASSERT(index < probe->ftp_ntps); -+ -+ pid = probe->ftp_pid; -+ pc = probe->ftp_tps[index].fit_tp->ftt_pc; -+ id = &probe->ftp_tps[index].fit_id; -+ -+ ASSERT(probe->ftp_tps[index].fit_tp->ftt_pid == pid); -+ -+ /* -+ * Before we make any modifications, make sure we've imposed a barrier -+ * on the generation in which this probe was last modified. -+ */ -+ fasttrap_mod_barrier(probe->ftp_gen); -+ -+ bucket = FASTTRAP_TPOINTS_ELEM(pid, pc); -+ -+ /* -+ * If the tracepoint has already been enabled, just add our id to the -+ * list of interested probes. This may be our second time through -+ * this path in which case we'll have constructed the tracepoint we'd -+ * like to install. If we can't find a match, and have an allocated -+ * tracepoint ready to go, enable that one now. -+ * -+ * A tracepoint whose process is defunct is also considered defunct. -+ */ -+again: -+ mutex_lock(&bucket->ftb_mtx); -+ for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { -+ /* -+ * Note that it's safe to access the active count on the -+ * associated proc structure because we know that at least one -+ * provider (this one) will still be around throughout this -+ * operation. -+ */ -+ if (tp->ftt_pid != pid || tp->ftt_pc != pc || -+ atomic64_read(&tp->ftt_proc->ftpc_acount) == 0) -+ continue; -+ -+ /* -+ * Now that we've found a matching tracepoint, it would be -+ * a decent idea to confirm that the tracepoint is still -+ * enabled and the trap instruction hasn't been overwritten. -+ * Since this is a little hairy, we'll punt for now. -+ */ -+ -+ /* -+ * This can't be the first interested probe. We don't have -+ * to worry about another thread being in the midst of -+ * deleting this tracepoint (which would be the only valid -+ * reason for a tracepoint to have no interested probes) -+ * since we're holding P_PR_LOCK for this process. -+ */ -+ ASSERT(tp->ftt_ids != NULL || tp->ftt_retids != NULL); -+ -+ switch (id->fti_ptype) { -+ case DTFTP_ENTRY: -+ case DTFTP_OFFSETS: -+ case DTFTP_IS_ENABLED: -+ if (tp->ftt_ids == NULL) /* return tp */ -+ continue; -+ -+ ASSERT(tp->ftt_retids == NULL); -+ -+ id->fti_next = tp->ftt_ids; -+ dtrace_membar_producer(); -+ tp->ftt_ids = id; -+ dtrace_membar_producer(); -+ break; -+ -+ case DTFTP_RETURN: -+ case DTFTP_POST_OFFSETS: -+ if (tp->ftt_retids == NULL) /* non-return tp */ -+ continue; -+ -+ ASSERT(tp->ftt_ids == NULL); -+ -+ id->fti_next = tp->ftt_retids; -+ dtrace_membar_producer(); -+ tp->ftt_retids = id; -+ dtrace_membar_producer(); -+ break; -+ -+ default: -+ ASSERT(0); /* FIXME */ -+ } -+ -+ mutex_unlock(&bucket->ftb_mtx); -+ -+ if (new_tp != NULL) { -+ new_tp->ftt_ids = NULL; -+ new_tp->ftt_retids = NULL; -+ } -+ -+ return 0; -+ } -+ -+ /* -+ * If we have a good tracepoint ready to go, install it now while -+ * we have the lock held and no one can screw with us. -+ */ -+ if (new_tp != NULL) { -+ int rc = 0; -+ -+ new_tp->ftt_next = bucket->ftb_data; -+ dtrace_membar_producer(); -+ bucket->ftb_data = new_tp; -+ dtrace_membar_producer(); -+ mutex_unlock(&bucket->ftb_mtx); -+ -+ /* -+ * Activate the tracepoint in the ISA-specific manner. -+ * If this fails, we need to report the failure, but -+ * indicate that this tracepoint must still be disabled -+ * by calling fasttrap_tracepoint_disable(). -+ */ -+ rc = dtrace_tracepoint_enable(pid, pc, -+ id->fti_ptype == DTFTP_RETURN, -+ &new_tp->ftt_mtp); -+ -+ return rc ? FASTTRAP_ENABLE_PARTIAL : 0; -+ } -+ -+ mutex_unlock(&bucket->ftb_mtx); -+ -+ /* -+ * Initialize the tracepoint that's been preallocated with the probe. -+ */ -+ new_tp = probe->ftp_tps[index].fit_tp; -+ -+ ASSERT(new_tp->ftt_pid == pid); -+ ASSERT(new_tp->ftt_pc == pc); -+ ASSERT(new_tp->ftt_proc == probe->ftp_prov->ftp_proc); -+ ASSERT(new_tp->ftt_ids == NULL); -+ ASSERT(new_tp->ftt_retids == NULL); -+ -+ switch (id->fti_ptype) { -+ case DTFTP_ENTRY: -+ case DTFTP_OFFSETS: -+ case DTFTP_IS_ENABLED: -+ id->fti_next = NULL; -+ new_tp->ftt_ids = id; -+ break; -+ -+ case DTFTP_RETURN: -+ case DTFTP_POST_OFFSETS: -+ id->fti_next = NULL; -+ new_tp->ftt_retids = id; -+ break; -+ -+ default: -+ ASSERT(0); -+ } -+ -+ goto again; -+} -+ -+static void fasttrap_tracepoint_disable(struct fasttrap_probe *probe, -+ uint_t index) -+{ -+ struct fasttrap_bucket *bucket; -+ struct fasttrap_provider *prov = probe->ftp_prov; -+ struct fasttrap_tracepoint **pp, *tp; -+ struct fasttrap_id *id, **idp = NULL; -+ pid_t pid; -+ uintptr_t pc; -+ -+ ASSERT(index < probe->ftp_ntps); -+ -+ pid = probe->ftp_pid; -+ pc = probe->ftp_tps[index].fit_tp->ftt_pc; -+ id = &probe->ftp_tps[index].fit_id; -+ -+ ASSERT(probe->ftp_tps[index].fit_tp->ftt_pid == pid); -+ -+ /* -+ * Find the tracepoint and make sure that our id is one of the -+ * ones registered with it. Return probes are linked in their -+ * own tracepoint, even though they share the (pi, pc) pair with -+ * entry probes. -+ */ -+ bucket = FASTTRAP_TPOINTS_ELEM(pid, pc); -+ mutex_lock(&bucket->ftb_mtx); -+ for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { -+ if (tp->ftt_pid != pid || tp->ftt_pc != pc || -+ tp->ftt_proc != prov->ftp_proc) -+ continue; -+ -+ if (id->fti_ptype == DTFTP_RETURN && tp->ftt_retids == NULL) -+ continue; -+ -+ if (id->fti_ptype != DTFTP_RETURN && tp->ftt_ids == NULL) -+ continue; -+ -+ break; -+ } -+ -+ /* -+ * If we somehow lost this tracepoint, we are in trouble. -+ */ -+ ASSERT(tp != NULL); -+ -+ switch (id->fti_ptype) { -+ case DTFTP_ENTRY: -+ case DTFTP_OFFSETS: -+ case DTFTP_IS_ENABLED: -+ ASSERT(tp->ftt_ids != NULL); -+ idp = &tp->ftt_ids; -+ break; -+ -+ case DTFTP_RETURN: -+ case DTFTP_POST_OFFSETS: -+ ASSERT(tp->ftt_retids != NULL); -+ idp = &tp->ftt_retids; -+ break; -+ -+ default: -+ ASSERT(0); -+ } -+ -+ while ((*idp)->fti_probe != probe) { -+ idp = &(*idp)->fti_next; -+ ASSERT(*idp != NULL); -+ } -+ -+ id = *idp; -+ *idp = id->fti_next; -+ dtrace_membar_producer(); -+ -+ ASSERT(id->fti_probe == probe); -+ -+ /* -+ * If there are other registered enablings of this tracepoint, we're -+ * all done, but if this was the last probe assocated with this -+ * this tracepoint, we need to remove and free it. -+ */ -+ if (tp->ftt_ids != NULL || tp->ftt_retids != NULL) { -+ /* -+ * If the current probe's tracepoint is in use, swap it -+ * for an unused tracepoint. -+ */ -+ if (tp == probe->ftp_tps[index].fit_tp) { -+ struct fasttrap_probe *tmp_probe; -+ struct fasttrap_tracepoint **tmp_tp; -+ uint_t tmp_index; -+ -+ if (tp->ftt_ids != NULL) { -+ tmp_probe = tp->ftt_ids->fti_probe; -+ tmp_index = FASTTRAP_ID_INDEX(tp->ftt_ids); -+ tmp_tp = &tmp_probe->ftp_tps[tmp_index].fit_tp; -+ } else { -+ tmp_probe = tp->ftt_retids->fti_probe; -+ tmp_index = FASTTRAP_ID_INDEX(tp->ftt_retids); -+ tmp_tp = &tmp_probe->ftp_tps[tmp_index].fit_tp; -+ } -+ -+ ASSERT(*tmp_tp != NULL); -+ ASSERT(*tmp_tp != probe->ftp_tps[index].fit_tp); -+ ASSERT((*tmp_tp)->ftt_ids == NULL); -+ ASSERT((*tmp_tp)->ftt_retids == NULL); -+ -+ probe->ftp_tps[index].fit_tp = *tmp_tp; -+ *tmp_tp = tp; -+ } -+ -+ mutex_unlock(&bucket->ftb_mtx); -+ -+ /* -+ * Tag the modified probe with the generation in which it was -+ * changed. -+ */ -+ probe->ftp_gen = fasttrap_mod_gen; -+ return; -+ } -+ -+ mutex_unlock(&bucket->ftb_mtx); -+ -+ dtrace_tracepoint_disable(pid, &tp->ftt_mtp); -+ -+ /* -+ * Remove the probe from the hash table of active tracepoints. -+ */ -+ mutex_lock(&bucket->ftb_mtx); -+ pp = (struct fasttrap_tracepoint **)&bucket->ftb_data; -+ ASSERT(*pp != NULL); -+ while (*pp != tp) { -+ pp = &(*pp)->ftt_next; -+ ASSERT(*pp != NULL); -+ } -+ -+ *pp = tp->ftt_next; -+ dtrace_membar_producer(); -+ -+ mutex_unlock(&bucket->ftb_mtx); -+ -+ /* -+ * Tag the modified probe with the generation in which it was changed. -+ */ -+ probe->ftp_gen = fasttrap_mod_gen; -+} -+ -+static int fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg) -+{ -+ struct fasttrap_probe *probe = parg; -+ int i, rc; -+ -+ ASSERT(probe != NULL); -+ ASSERT(!probe->ftp_enabled); -+ ASSERT(id == probe->ftp_id); -+ ASSERT(MUTEX_HELD(&cpu_lock)); -+ -+ /* -+ * Increment the count of enabled probes on this probe's provider; -+ * the provider can't go away while the probe still exists. We -+ * must increment this even if we aren't able to properly enable -+ * this probe. -+ */ -+ mutex_lock(&probe->ftp_prov->ftp_mtx); -+ probe->ftp_prov->ftp_rcount++; -+ mutex_unlock(&probe->ftp_prov->ftp_mtx); -+ -+ /* -+ * If this probe's provider is retired (meaning it was valid in a -+ * previously exec'ed incarnation of this address space), bail out. The -+ * provider can't go away while we're in this code path. -+ */ -+ if (probe->ftp_prov->ftp_retired) -+ return 0; -+ -+#ifdef FIXME -+ /* -+ * If we can't find the process, it may be that we're in the context of -+ * a fork in which the traced process is being born and we're copying -+ * USDT probes. Otherwise, the process is gone so bail. -+ */ -+ p = sprlock(probe->ftp_pid); -+ if (p == NULL) { -+ if ((curproc->p_flag & SFORKING) == 0) -+ return 0; -+ -+ mutex_enter(&pidlock); -+ p = prfind(probe->ftp_pid); -+ -+ /* -+ * Confirm that curproc is indeed forking the process in which -+ * we're trying to enable probes. -+ */ -+ ASSERT(p != NULL); -+ ASSERT(p->p_parent == curproc); -+ ASSERT(p->p_stat == SIDL); -+ -+ mutex_enter(&p->p_lock); -+ mutex_exit(&pidlock); -+ -+ sprlock_proc(p); -+ } -+ -+ ASSERT(!(p->p_flag & SVFORK)); -+ mutex_exit(&p->p_lock); -+#endif -+ -+ /* -+ * We have to enable the trap entry point before any user threads have -+ * the chance to execute the trap instruction we're about to place -+ * in their process's text. -+ */ -+ fasttrap_enable_callbacks(); -+ -+ /* -+ * Enable all the tracepoints and add this probe's id to each -+ * tracepoint's list of active probes. -+ */ -+ for (i = 0; i < probe->ftp_ntps; i++) { -+ rc = fasttrap_tracepoint_enable(probe, i); -+ if (rc != 0) { -+ /* -+ * If enabling the tracepoint failed completely, -+ * we don't have to disable it; if the failure -+ * was only partial we must disable it. -+ */ -+ if (rc == FASTTRAP_ENABLE_FAIL) -+ i--; -+ else -+ ASSERT(rc == FASTTRAP_ENABLE_PARTIAL); -+ -+ /* -+ * Back up and pull out all the tracepoints we've -+ * created so far for this probe. -+ */ -+ while (i >= 0) { -+ fasttrap_tracepoint_disable(probe, i); -+ i--; -+ } -+ -+#ifdef FIXME -+ mutex_enter(&p->p_lock); -+ sprunlock(p); -+#endif -+ -+ /* -+ * Since we're not actually enabling this probe, -+ * drop our reference on the trap table entry. -+ */ -+ fasttrap_disable_callbacks(); -+ return 0; -+ } -+ } -+ -+#ifdef FIXME -+ mutex_enter(&p->p_lock); -+ sprunlock(p); -+#endif -+ -+ probe->ftp_enabled = 1; -+ return 0; -+} -+ -+static void fasttrap_pid_disable(void *arg, dtrace_id_t id, void *parg) -+{ -+ struct fasttrap_probe *probe = parg; -+ struct fasttrap_provider *prov = probe->ftp_prov; -+ int i, whack = 0; -+ -+ ASSERT(id == probe->ftp_id); -+ -+ mutex_lock(&prov->ftp_mtx); -+ -+ /* -+ * Disable all the associated tracepoints (for fully enabled probes). -+ */ -+ if (probe->ftp_enabled) { -+ for (i = 0; i < probe->ftp_ntps; i++) -+ fasttrap_tracepoint_disable(probe, i); -+ } -+ -+ ASSERT(prov->ftp_rcount > 0); -+ prov->ftp_rcount--; -+ -+ if ((prov->ftp_retired || prov->ftp_rcount == 0) && !prov->ftp_marked) -+ whack = prov->ftp_marked = 1; -+ -+ mutex_unlock(&prov->ftp_mtx); -+ -+ if (whack) -+ fasttrap_pid_cleanup(); -+ -+ if (!probe->ftp_enabled) -+ return; -+ -+ probe->ftp_enabled = 0; -+ -+ ASSERT(MUTEX_HELD(&cpu_lock)); -+ fasttrap_disable_callbacks(); -+} -+ -+static void fasttrap_pid_getargdesc(void *arg, dtrace_id_t id, void *parg, -+ struct dtrace_argdesc *desc) -+{ -+ struct fasttrap_probe *probe = parg; -+ char *str; -+ int i, ndx; -+ -+ desc->dtargd_native[0] = '\0'; -+ desc->dtargd_xlate[0] = '\0'; -+ -+ if (probe->ftp_prov->ftp_retired != 0 || -+ desc->dtargd_ndx >= probe->ftp_nargs) { -+ desc->dtargd_ndx = DTRACE_ARGNONE; -+ return; -+ } -+ -+ ndx = (probe->ftp_argmap != NULL) ? probe->ftp_argmap[desc->dtargd_ndx] -+ : desc->dtargd_ndx; -+ -+ str = probe->ftp_ntypes; -+ for (i = 0; i < ndx; i++) -+ str += strlen(str) + 1; -+ -+ ASSERT(strlen(str + 1) < sizeof(desc->dtargd_native)); -+ strcpy(desc->dtargd_native, str); -+ -+ if (probe->ftp_xtypes == NULL) -+ return; -+ -+ str = probe->ftp_xtypes; -+ for (i = 0; i < desc->dtargd_ndx; i++) -+ str += strlen(str) + 1; -+ -+ ASSERT(strlen(str + 1) < sizeof(desc->dtargd_xlate)); -+ strcpy(desc->dtargd_xlate, str); -+} -+ -+static void fasttrap_pid_destroy(void *arg, dtrace_id_t id, void *parg) -+{ -+ struct fasttrap_probe *probe = parg; -+ int i; -+ -+ ASSERT(probe != NULL); -+ ASSERT(!probe->ftp_enabled); -+ ASSERT(atomic_read(&fasttrap_total) >= probe->ftp_ntps); -+ -+ atomic_add(-probe->ftp_ntps, &fasttrap_total); -+ -+ if (probe->ftp_gen + 1 >= fasttrap_mod_gen) -+ fasttrap_mod_barrier(probe->ftp_gen); -+ -+ for (i = 0; i < probe->ftp_ntps; i++) -+ kmem_cache_free(tracepoint_cachep, probe->ftp_tps[i].fit_tp); -+ -+ kfree(probe); -+} -+ -+static const struct dtrace_pattr pid_attr = { -+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, -+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, -+}; -+ -+static struct dtrace_pops pid_pops = { -+ .dtps_provide = fasttrap_pid_provide, -+ .dtps_provide_module = NULL, -+ .dtps_destroy_module = NULL, -+ .dtps_enable = fasttrap_pid_enable, -+ .dtps_disable = fasttrap_pid_disable, -+ .dtps_suspend = NULL, -+ .dtps_resume = NULL, -+ .dtps_getargdesc = fasttrap_pid_getargdesc, -+ .dtps_getargval = fasttrap_pid_getarg, -+ .dtps_usermode = NULL, -+ .dtps_destroy = fasttrap_pid_destroy -+}; -+ -+static struct dtrace_pops usdt_pops = { -+ .dtps_provide = fasttrap_pid_provide, -+ .dtps_provide_module = NULL, -+ .dtps_destroy_module = NULL, -+ .dtps_enable = fasttrap_pid_enable, -+ .dtps_disable = fasttrap_pid_disable, -+ .dtps_suspend = NULL, -+ .dtps_resume = NULL, -+ .dtps_getargdesc = fasttrap_pid_getargdesc, -+ .dtps_getargval = fasttrap_usdt_getarg, -+ .dtps_usermode = NULL, -+ .dtps_destroy = fasttrap_pid_destroy -+}; -+ -+static uint_t fasttrap_hash_str(const char *p) -+{ -+ unsigned int g; -+ uint_t hval = 0; -+ -+ while (*p) { -+ hval = (hval << 4) + *p++; -+ g = hval & 0xf0000000; -+ if (g != 0) -+ hval ^= g >> 24; -+ hval &= ~g; -+ } -+ -+ return hval; -+} -+ -+static int fasttrap_uint32_cmp(const void *ap, const void *bp) -+{ -+ return (*(const uint32_t *)ap - *(const uint32_t *)bp); -+} -+ -+void fasttrap_meta_create_probe(void *arg, void *parg, -+ struct dtrace_helper_probedesc *dhpb) -+{ -+ struct fasttrap_provider *provider = parg; -+ struct fasttrap_probe *pp; -+ struct fasttrap_tracepoint *tp; -+ int i, j; -+ uint32_t ntps; -+ -+ /* -+ * Since the meta provider count is non-zero we don't have to worry -+ * about this provider disappearing. -+ */ -+ ASSERT(provider->ftp_mcount > 0); -+ -+ /* -+ * The offsets must be unique. -+ */ -+ sort(dhpb->dthpb_offs, dhpb->dthpb_noffs, sizeof(uint32_t), -+ fasttrap_uint32_cmp, NULL); -+ for (i = 1; i < dhpb->dthpb_noffs; i++) { -+ if (dhpb->dthpb_base + dhpb->dthpb_offs[i] <= -+ dhpb->dthpb_base + dhpb->dthpb_offs[i - 1]) -+ return; -+ } -+ -+ sort(dhpb->dthpb_enoffs, dhpb->dthpb_nenoffs, sizeof(uint32_t), -+ fasttrap_uint32_cmp, NULL); -+ for (i = 1; i < dhpb->dthpb_nenoffs; i++) { -+ if (dhpb->dthpb_base + dhpb->dthpb_enoffs[i] <= -+ dhpb->dthpb_base + dhpb->dthpb_enoffs[i - 1]) -+ return; -+ } -+ -+ /* -+ * Grab the creation lock to ensure consistency between calls to -+ * dtrace_probe_lookup() and dtrace_probe_create() in the face of -+ * other threads creating probes. -+ */ -+ mutex_lock(&provider->ftp_cmtx); -+ -+ if (dtrace_probe_lookup(provider->ftp_provid, dhpb->dthpb_mod, -+ dhpb->dthpb_func, dhpb->dthpb_name) != DTRACE_IDNONE) { -+ mutex_unlock(&provider->ftp_cmtx); -+ return; -+ } -+ -+ ntps = dhpb->dthpb_noffs + dhpb->dthpb_nenoffs; -+ ASSERT(ntps > 0); -+ -+ pp = kzalloc(offsetof(struct fasttrap_probe, ftp_tps[ntps]), -+ GFP_KERNEL); -+ if (pp == NULL) { -+ pr_warn("Unable to create probe %s: out of memory\n", -+ dhpb->dthpb_name); -+ mutex_unlock(&provider->ftp_cmtx); -+ return; -+ } -+ -+ atomic_add(ntps, &fasttrap_total); -+ if (atomic_read(&fasttrap_total) > fasttrap_max) { -+ kfree(pp); -+ atomic_add(-ntps, &fasttrap_total); -+ mutex_unlock(&provider->ftp_cmtx); -+ return; -+ } -+ -+ pp->ftp_prov = provider; -+ pp->ftp_pid = provider->ftp_pid; -+ pp->ftp_ntps = ntps; -+ pp->ftp_nargs = dhpb->dthpb_xargc; -+ pp->ftp_xtypes = dhpb->dthpb_xtypes; -+ pp->ftp_ntypes = dhpb->dthpb_ntypes; -+ -+ /* -+ * First create a tracepoint for each actual point of interest. -+ */ -+ for (i = 0; i < dhpb->dthpb_noffs; i++) { -+ tp = kmem_cache_alloc(tracepoint_cachep, GFP_KERNEL); -+ if (tp == NULL) -+ goto fail; -+ -+ tp->ftt_proc = provider->ftp_proc; -+ tp->ftt_pc = dhpb->dthpb_base + dhpb->dthpb_offs[i]; -+ tp->ftt_pid = provider->ftp_pid; -+ memset(&tp->ftt_mtp, 0, sizeof(struct fasttrap_machtp)); -+ tp->ftt_ids = NULL; -+ tp->ftt_retids = NULL; -+ tp->ftt_next = NULL; -+ -+ dt_dbg_dof(" Tracepoint at 0x%lx (0x%llx + 0x%x)\n", -+ tp->ftt_pc, dhpb->dthpb_base, dhpb->dthpb_offs[i]); -+ -+ pp->ftp_tps[i].fit_tp = tp; -+ pp->ftp_tps[i].fit_id.fti_probe = pp; -+ pp->ftp_tps[i].fit_id.fti_ptype = DTFTP_OFFSETS; -+ } -+ -+ /* -+ * Then create a tracepoint for each is-enabled point. -+ */ -+ for (j = 0; i < ntps; i++, j++) { -+ tp = kmem_cache_alloc(tracepoint_cachep, GFP_KERNEL); -+ if (tp == NULL) -+ goto fail; -+ -+ tp->ftt_proc = provider->ftp_proc; -+ tp->ftt_pc = dhpb->dthpb_base + dhpb->dthpb_enoffs[j]; -+ tp->ftt_pid = provider->ftp_pid; -+ memset(&tp->ftt_mtp, 0, sizeof(struct fasttrap_machtp)); -+ tp->ftt_ids = NULL; -+ tp->ftt_retids = NULL; -+ tp->ftt_next = NULL; -+ -+ pp->ftp_tps[i].fit_tp = tp; -+ pp->ftp_tps[i].fit_id.fti_probe = pp; -+ pp->ftp_tps[i].fit_id.fti_ptype = DTFTP_IS_ENABLED; -+ } -+ -+ /* -+ * If the arguments are shuffled around we set the argument remapping -+ * table. Later, when the probe fires, we only remap the arguments -+ * if the table is non-NULL. -+ */ -+ for (i = 0; i < dhpb->dthpb_xargc; i++) { -+ if (dhpb->dthpb_args[i] != i) { -+ pp->ftp_argmap = dhpb->dthpb_args; -+ break; -+ } -+ } -+ -+ /* -+ * The probe is fully constructed -- register it with DTrace. -+ */ -+ pp->ftp_id = dtrace_probe_create(provider->ftp_provid, dhpb->dthpb_mod, -+ dhpb->dthpb_func, dhpb->dthpb_name, -+ FASTTRAP_OFFSET_AFRAMES, pp); -+ if (pp->ftp_id == DTRACE_IDNONE) -+ goto fail; -+ -+ mutex_unlock(&provider->ftp_cmtx); -+ return; -+ -+fail: -+ pr_warn("Unable to create probe %s: out of memory\n", -+ dhpb->dthpb_name); -+ -+ for (i = 0; i < ntps; i++) -+ kmem_cache_free(tracepoint_cachep, pp->ftp_tps[i].fit_tp); -+ -+ kfree(pp); -+ atomic_add(-ntps, &fasttrap_total); -+ mutex_unlock(&provider->ftp_cmtx); -+} -+ -+static void fasttrap_proc_release(struct fasttrap_proc *proc) -+{ -+ struct fasttrap_bucket *bucket; -+ struct fasttrap_proc *fprc, **fprcp; -+ pid_t pid = proc->ftpc_pid; -+ -+ mutex_lock(&proc->ftpc_mtx); -+ -+ ASSERT(proc->ftpc_rcount != 0); -+ ASSERT(atomic64_read(&proc->ftpc_acount) <= proc->ftpc_rcount); -+ -+ if (--proc->ftpc_rcount != 0) { -+ mutex_unlock(&proc->ftpc_mtx); -+ return; -+ } -+ -+ mutex_unlock(&proc->ftpc_mtx); -+ -+ /* -+ * There should definitely be no live providers associated with this -+ * process at this point. -+ */ -+ ASSERT(atomic64_read(&proc->ftpc_acount) == 0); -+ -+ bucket = FASTTRAP_PROCS_ELEM(pid); -+ mutex_lock(&bucket->ftb_mtx); -+ -+ fprcp = (struct fasttrap_proc **)&bucket->ftb_data; -+ while ((fprc = *fprcp) != NULL) { -+ if (fprc == proc) -+ break; -+ -+ fprcp = &fprc->ftpc_next; -+ } -+ -+ /* -+ * Something strange has happened if we can't find the proc. -+ */ -+ ASSERT(fprc != NULL); -+ -+ *fprcp = fprc->ftpc_next; -+ -+ mutex_unlock(&bucket->ftb_mtx); -+ -+ kfree(fprc); -+} -+ -+static void fasttrap_provider_free(struct fasttrap_provider *provider) -+{ -+ pid_t pid = provider->ftp_pid; -+ -+ /* -+ * There need to be no associated enabled probes, no consumers -+ * creating probes, and no meta providers referencing this provider. -+ */ -+ ASSERT(provider->ftp_rcount == 0); -+ ASSERT(provider->ftp_ccount == 0); -+ ASSERT(provider->ftp_mcount == 0); -+ -+ /* -+ * If this provider hasn't been retired, we need to explicitly drop the -+ * count of active providers on the associated process structure. -+ */ -+ if (!provider->ftp_retired) { -+ atomic64_add(-1, &provider->ftp_proc->ftpc_acount); -+ ASSERT(atomic64_read(&provider->ftp_proc->ftpc_acount) < -+ provider->ftp_proc->ftpc_rcount); -+ } -+ -+ fasttrap_proc_release(provider->ftp_proc); -+ -+ kfree(provider); -+ -+ unregister_pid_provider(pid); -+} -+ -+static struct fasttrap_proc *fasttrap_proc_lookup(pid_t pid) -+{ -+ struct fasttrap_bucket *bucket; -+ struct fasttrap_proc *fprc, *new_fprc; -+ -+ bucket = FASTTRAP_PROCS_ELEM(pid); -+ mutex_lock(&bucket->ftb_mtx); -+ -+ for (fprc = bucket->ftb_data; fprc != NULL; fprc = fprc->ftpc_next) { -+ if (fprc->ftpc_pid == pid && -+ atomic64_read(&fprc->ftpc_acount) != 0) { -+ mutex_lock(&fprc->ftpc_mtx); -+ mutex_unlock(&bucket->ftb_mtx); -+ fprc->ftpc_rcount++; -+ atomic64_inc(&fprc->ftpc_acount); -+ ASSERT(atomic64_read(&fprc->ftpc_acount) <= -+ fprc->ftpc_rcount); -+ mutex_unlock(&fprc->ftpc_mtx); -+ -+ return fprc; -+ } -+ } -+ -+ /* -+ * Drop the bucket lock so we don't try to perform a sleeping -+ * allocation under it. -+ */ -+ mutex_unlock(&bucket->ftb_mtx); -+ -+ new_fprc = kzalloc(sizeof(struct fasttrap_proc), GFP_KERNEL); -+ if (new_fprc == NULL) -+ return NULL; -+ -+ new_fprc->ftpc_pid = pid; -+ new_fprc->ftpc_rcount = 1; -+ atomic64_set(&new_fprc->ftpc_acount, 1); -+ mutex_init(&new_fprc->ftpc_mtx); -+ -+ mutex_lock(&bucket->ftb_mtx); -+ -+ /* -+ * Take another lap through the list to make sure a proc hasn't -+ * been created for this pid while we weren't under the bucket lock. -+ */ -+ for (fprc = bucket->ftb_data; fprc != NULL; fprc = fprc->ftpc_next) { -+ if (fprc->ftpc_pid == pid && -+ atomic64_read(&fprc->ftpc_acount) != 0) { -+ mutex_lock(&fprc->ftpc_mtx); -+ mutex_unlock(&bucket->ftb_mtx); -+ fprc->ftpc_rcount++; -+ atomic64_inc(&fprc->ftpc_acount); -+ ASSERT(atomic64_read(&fprc->ftpc_acount) <= -+ fprc->ftpc_rcount); -+ mutex_unlock(&fprc->ftpc_mtx); -+ -+ kfree(new_fprc); -+ -+ return fprc; -+ } -+ } -+ -+ new_fprc->ftpc_next = bucket->ftb_data; -+ bucket->ftb_data = new_fprc; -+ -+ mutex_unlock(&bucket->ftb_mtx); -+ -+ return new_fprc; -+} -+ -+/* -+ * Lookup a fasttrap-managed provider based on its name and associated pid. -+ * If the pattr argument is non-NULL, this function instantiates the provider -+ * if it doesn't exist otherwise it returns NULL. The provider is returned -+ * with its lock held. -+ */ -+static struct fasttrap_provider * -+fasttrap_provider_lookup(pid_t pid, const char *name, -+ const struct dtrace_pattr *pa) -+{ -+ struct fasttrap_provider *fp, *new_fp = NULL; -+ struct fasttrap_proc *proc = NULL; -+ struct fasttrap_bucket *bucket; -+ char provname[DTRACE_PROVNAMELEN]; -+ struct task_struct *p; -+ const struct cred *cred = NULL; -+ -+ ASSERT(strlen(name) < sizeof(fp->ftp_name)); -+ ASSERT(pa != NULL); -+ -+ bucket = FASTTRAP_PROVS_ELEM(pid, name); -+ mutex_lock(&bucket->ftb_mtx); -+ -+ /* -+ * Take a lap through the list and return the match if we find it. -+ */ -+ for (fp = bucket->ftb_data; fp != NULL; fp = fp->ftp_next) { -+ if (fp->ftp_pid == pid && strcmp(fp->ftp_name, name) == 0 && -+ !fp->ftp_retired) { -+ mutex_lock(&fp->ftp_mtx); -+ mutex_unlock(&bucket->ftb_mtx); -+ -+ return fp; -+ } -+ } -+ -+ /* -+ * Drop the bucket lock so we don't try to perform a sleeping -+ * allocation under it. -+ */ -+ mutex_unlock(&bucket->ftb_mtx); -+ -+ p = register_pid_provider(pid); -+ if (p == NULL) -+ goto fail; -+ -+ /* -+ * Grab the credentials for this process so we have -+ * something to pass to dtrace_register(). -+ */ -+ cred = get_cred(p->cred); -+ -+ proc = fasttrap_proc_lookup(pid); -+ if (proc == NULL) -+ goto fail; -+ -+ new_fp = kzalloc(sizeof(struct fasttrap_provider), GFP_KERNEL); -+ if (new_fp == NULL) -+ goto fail; -+ -+ new_fp->ftp_pid = pid; -+ new_fp->ftp_proc = proc; -+ mutex_init(&new_fp->ftp_mtx); -+ mutex_init(&new_fp->ftp_cmtx); -+ -+ ASSERT(new_fp->ftp_proc != NULL); -+ -+ mutex_lock(&bucket->ftb_mtx); -+ -+ /* -+ * Take another lap through the list to make sure a provider hasn't -+ * been created for this pid while we weren't under the bucket lock. -+ */ -+ for (fp = bucket->ftb_data; fp != NULL; fp = fp->ftp_next) { -+ if (fp->ftp_pid == pid && strcmp(fp->ftp_name, name) == 0 && -+ !fp->ftp_retired) { -+ mutex_lock(&fp->ftp_mtx); -+ mutex_unlock(&bucket->ftb_mtx); -+ fasttrap_provider_free(new_fp); -+ put_cred(cred); -+ -+ return fp; -+ } -+ } -+ -+ strcpy(new_fp->ftp_name, name); -+ -+ /* -+ * Fail and return NULL if either the provider name is too long -+ * or we fail to register this new provider with the DTrace -+ * framework. Note that this is the only place we ever construct -+ * the full provider name -- we keep it in pieces in the provider -+ * structure. -+ */ -+ if (snprintf(provname, sizeof(provname), "%s%u", name, (uint_t)pid) >= -+ sizeof(provname) || -+ dtrace_register(provname, pa, -+ DTRACE_PRIV_PROC | DTRACE_PRIV_OWNER, cred, -+ pa == &pid_attr ? &pid_pops : &usdt_pops, -+ new_fp, &new_fp->ftp_provid) != 0) { -+ mutex_unlock(&bucket->ftb_mtx); -+ fasttrap_provider_free(new_fp); -+ put_cred(cred); -+ return NULL; -+ } -+ -+ new_fp->ftp_next = bucket->ftb_data; -+ bucket->ftb_data = new_fp; -+ -+ mutex_lock(&new_fp->ftp_mtx); -+ mutex_unlock(&bucket->ftb_mtx); -+ -+ put_cred(cred); -+ -+ return new_fp; -+ -+fail: -+ if (proc) -+ fasttrap_proc_release(proc); -+ if (cred) -+ put_cred(cred); -+ if (p) -+ unregister_pid_provider(pid); -+ -+ return NULL; -+} -+ -+void *fasttrap_meta_provide(void *arg, struct dtrace_helper_provdesc *dhpv, -+ pid_t pid) -+{ -+ struct fasttrap_provider *provider; -+ -+ if (strlen(dhpv->dthpv_provname) + 10 >= sizeof(provider->ftp_name)) { -+ pr_warn("Failed to instantiate provider %s: name too long " -+ "to accommodate pid\n", dhpv->dthpv_provname); -+ return NULL; -+ } -+ -+ /* -+ * Don't let folks spoof the true pid provider. -+ */ -+ if (strcmp(dhpv->dthpv_provname, FASTTRAP_PID_NAME) == 0) { -+ pr_warn("Failed to instantiate provider %s: %s is an invalid " -+ "name\n", dhpv->dthpv_provname, FASTTRAP_PID_NAME); -+ return NULL; -+ } -+ -+ /* -+ * The highest stability class that fasttrap supports is ISA; cap -+ * the stability of the new provider accordingly. -+ */ -+ if (dhpv->dthpv_pattr.dtpa_provider.dtat_class > DTRACE_CLASS_ISA) -+ dhpv->dthpv_pattr.dtpa_provider.dtat_class = DTRACE_CLASS_ISA; -+ if (dhpv->dthpv_pattr.dtpa_mod.dtat_class > DTRACE_CLASS_ISA) -+ dhpv->dthpv_pattr.dtpa_mod.dtat_class = DTRACE_CLASS_ISA; -+ if (dhpv->dthpv_pattr.dtpa_func.dtat_class > DTRACE_CLASS_ISA) -+ dhpv->dthpv_pattr.dtpa_func.dtat_class = DTRACE_CLASS_ISA; -+ if (dhpv->dthpv_pattr.dtpa_name.dtat_class > DTRACE_CLASS_ISA) -+ dhpv->dthpv_pattr.dtpa_name.dtat_class = DTRACE_CLASS_ISA; -+ if (dhpv->dthpv_pattr.dtpa_args.dtat_class > DTRACE_CLASS_ISA) -+ dhpv->dthpv_pattr.dtpa_args.dtat_class = DTRACE_CLASS_ISA; -+ -+ provider = fasttrap_provider_lookup(pid, dhpv->dthpv_provname, -+ &dhpv->dthpv_pattr); -+ if (provider == NULL) { -+ pr_warn("Failed to instantiate provider %s for process %u\n", -+ dhpv->dthpv_provname, (uint_t)pid); -+ return NULL; -+ } -+ -+ /* -+ * Up the meta provider count so this provider isn't removed until the -+ * meta provider has been told to remove it. -+ */ -+ provider->ftp_mcount++; -+ -+ mutex_unlock(&provider->ftp_mtx); -+ -+ return provider; -+} -+ -+static void fasttrap_pid_cleanup_cb(struct work_struct *work) -+{ -+ struct fasttrap_provider **fpp, *fp; -+ struct fasttrap_bucket *bucket; -+ dtrace_provider_id_t provid; -+ int i, later = 0; -+ -+ static volatile int in; -+ -+ ASSERT(in == 0); -+ in = 1; -+ -+ mutex_lock(&fasttrap_cleanup_mtx); -+ if (!fasttrap_cleanup_work && fasttrap_cleanup_state == CLEANUP_NONE) { -+ mutex_unlock(&fasttrap_cleanup_mtx); -+ in = 0; -+ return; -+ } -+ -+ dt_dbg_prov("Fasttrap provider cleanup callback processing...\n"); -+ while (fasttrap_cleanup_work) { -+ fasttrap_cleanup_work = 0; -+ mutex_unlock(&fasttrap_cleanup_mtx); -+ -+ later = 0; -+ -+ /* -+ * Iterate over all the providers trying to remove the marked -+ * ones. If a provider is marked but not retired, we just -+ * have to take a crack at removing it -- it's no big deal if -+ * we can't. -+ */ -+ for (i = 0; i < fasttrap_provs.fth_nent; i++) { -+ bucket = FASTTRAP_ELEM_BUCKET(&fasttrap_provs.fth_table[i]); -+ mutex_lock(&bucket->ftb_mtx); -+ fpp = (struct fasttrap_provider **)&bucket->ftb_data; -+ -+ while ((fp = *fpp) != NULL) { -+ dt_dbg_prov(" Trying to unregister %s%d " -+ "(%smarked)\n", -+ fp->ftp_name, fp->ftp_pid, -+ fp->ftp_marked ? "not " : ""); -+ if (!fp->ftp_marked) { -+ fpp = &fp->ftp_next; -+ continue; -+ } -+ -+ dt_dbg_prov(" ccount %llu, mcount %llu " -+ "rcount %llu, %sretired, " -+ "%smarked\n", -+ fp->ftp_ccount, fp->ftp_mcount, -+ fp->ftp_rcount, -+ fp->ftp_retired ? "" : "not ", -+ fp->ftp_marked ? "" : "not "); -+ -+ mutex_lock(&fp->ftp_mtx); -+ -+ /* -+ * If this provider has consumers actively -+ * creating probes (ftp_ccount) or is a USDT -+ * provider (ftp_mcount), we can't unregister -+ * or even condense. -+ */ -+ if (fp->ftp_ccount != 0 || -+ fp->ftp_mcount != 0) { -+ mutex_unlock(&fp->ftp_mtx); -+ fp->ftp_marked = 0; -+ continue; -+ } -+ -+ if (!fp->ftp_retired || fp->ftp_rcount != 0) -+ fp->ftp_marked = 0; -+ -+ mutex_unlock(&fp->ftp_mtx); -+ -+ /* -+ * If we successfully unregister this -+ * provider we can remove it from the hash -+ * chain and free the memory. If our attempt -+ * to unregister fails and this is a retired -+ * provider, increment our flag to try again -+ * pretty soon. If we've consumed more than -+ * half of our total permitted number of -+ * probes call dtrace_condense() to try to -+ * clean out the unenabled probes. -+ */ -+ provid = fp->ftp_provid; -+ mutex_lock(&module_mutex); -+ if (dtrace_unregister(provid) != 0) { -+ if (atomic_read(&fasttrap_total) > -+ fasttrap_max / 2) -+ dtrace_condense(provid); -+ -+ later += fp->ftp_marked; -+ fpp = &fp->ftp_next; -+ } else { -+ *fpp = fp->ftp_next; -+ fasttrap_provider_free(fp); -+ } -+ mutex_unlock(&module_mutex); -+ } -+ -+ mutex_unlock(&bucket->ftb_mtx); -+ } -+ -+ mutex_lock(&fasttrap_cleanup_mtx); -+ } -+ -+ ASSERT(fasttrap_cleanup_state != CLEANUP_NONE); -+ -+ /* -+ * If we were unable to remove a retired provider, try again after -+ * a second. This situation can occur in certain circumstances where -+ * providers cannot be unregistered even though they have no probes -+ * enabled because of an execution of dtrace -l or something similar. -+ * If the timeout has been disabled (set to 1 because we're trying -+ * to detach), we set fasttrap_cleanup_work to ensure that we'll -+ * get a chance to do that work if and when the timeout is reenabled -+ * (if detach fails). -+ */ -+ if (later > 0) { -+ dt_dbg_prov(" Some providers were not removed " -+ " (state %d, later = %d)\n", -+ fasttrap_cleanup_state, later); -+ if (fasttrap_cleanup_state == CLEANUP_DEFERRED) -+ fasttrap_cleanup_work = 1; -+ else { -+ struct delayed_work *dw = container_of( -+ work, -+ struct delayed_work, -+ work); -+ -+ fasttrap_cleanup_state = CLEANUP_SCHEDULED; -+ schedule_delayed_work(dw, HZ); -+ } -+ } else -+ fasttrap_cleanup_state = CLEANUP_NONE; -+ -+ mutex_unlock(&fasttrap_cleanup_mtx); -+ in = 0; -+ -+ dt_dbg_prov("Fasttrap provider cleanup callback done\n"); -+} -+ -+static DECLARE_DELAYED_WORK(fasttrap_cleanup, fasttrap_pid_cleanup_cb); -+ -+/* -+ * Activate the asynchronous cleanup mechanism. -+ */ -+static void fasttrap_pid_cleanup(void) -+{ -+ mutex_lock(&fasttrap_cleanup_mtx); -+ fasttrap_cleanup_work = 1; -+ fasttrap_cleanup_state = CLEANUP_SCHEDULED; -+ schedule_delayed_work(&fasttrap_cleanup, 3); -+ mutex_unlock(&fasttrap_cleanup_mtx); -+} -+ -+void fasttrap_provider_retire(pid_t pid, const char *name, int mprov) -+{ -+ struct fasttrap_provider *fp; -+ struct fasttrap_bucket *bucket; -+ dtrace_provider_id_t provid; -+ -+ ASSERT(strlen(name) < sizeof(fp->ftp_name)); -+ -+ dt_dbg_prov("Retiring %s %sprovider for PID %d\n", -+ name, mprov ? "meta-" : "", pid); -+ -+ bucket = FASTTRAP_PROVS_ELEM(pid, name); -+ mutex_lock(&bucket->ftb_mtx); -+ -+ for (fp = bucket->ftb_data; fp != NULL; fp = fp->ftp_next) { -+ if (fp->ftp_pid == pid && strcmp(fp->ftp_name, name) == 0 && -+ !fp->ftp_retired) -+ break; -+ } -+ -+ if (fp == NULL) { -+ mutex_unlock(&bucket->ftb_mtx); -+ return; -+ } -+ -+ mutex_lock(&fp->ftp_mtx); -+ ASSERT(!mprov || fp->ftp_mcount > 0); -+ if (mprov && --fp->ftp_mcount != 0) { -+ mutex_unlock(&fp->ftp_mtx); -+ mutex_unlock(&bucket->ftb_mtx); -+ return; -+ } -+ -+ /* -+ * Mark the provider to be removed in our post-processing step, mark it -+ * retired, and drop the active count on its proc. Marking it indicates -+ * that we should try to remove it; setting the retired flag indicates -+ * that we're done with this provider; dropping the active count on the -+ * proc releases our hold, and when this reaches zero (as it will -+ * during exit or exec) the proc and associated providers become -+ * defunct. -+ * -+ * We obviously need to take the bucket lock before the provider lock -+ * to perform the lookup, but we need to drop the provider lock -+ * before calling into the DTrace framework since we acquire the -+ * provider lock in callbacks invoked from the DTrace framework. The -+ * bucket lock therefore protects the integrity of the provider hash -+ * table. -+ */ -+ atomic64_dec(&fp->ftp_proc->ftpc_acount); -+ ASSERT(atomic64_read(&fp->ftp_proc->ftpc_acount) < -+ fp->ftp_proc->ftpc_rcount); -+ -+ fp->ftp_retired = 1; -+ fp->ftp_marked = 1; -+ provid = fp->ftp_provid; -+ mutex_unlock(&fp->ftp_mtx); -+ -+ /* -+ * We don't have to worry about invalidating the same provider twice -+ * since fasttrap_provider_lookup() will ignore provider that have -+ * been marked as retired. -+ */ -+ dtrace_invalidate(provid); -+ -+ mutex_unlock(&bucket->ftb_mtx); -+ -+ fasttrap_pid_cleanup(); -+} -+ -+static void fasttrap_probes_cleanup(struct task_struct *tsk) -+{ -+ fasttrap_provider_retire(tsk->pid, FASTTRAP_PID_NAME, 0); -+} -+ -+void fasttrap_meta_remove(void *arg, struct dtrace_helper_provdesc *dhpv, -+ pid_t pid) -+{ -+ /* -+ * Clean up the USDT provider. There may be active consumers of the -+ * provider busy adding probes, no damage will actually befall the -+ * provider until that count has dropped to zero. This just puts -+ * the provider on death row. -+ */ -+ fasttrap_provider_retire(pid, dhpv->dthpv_provname, 1); -+} -+ -+static int fasttrap_add_probe(struct fasttrap_probe_spec *probe) -+{ -+ struct fasttrap_provider *provider; -+ struct fasttrap_probe *pp; -+ struct fasttrap_tracepoint *tp; -+ uint64_t *offs = NULL; -+ uint64_t noffs; -+ char *name; -+ int aframes, retired; -+ -+ switch (probe->ftps_type) { -+ case DTFTP_ENTRY: -+ name = "entry"; -+ aframes = FASTTRAP_ENTRY_AFRAMES; -+ break; -+ case DTFTP_RETURN: -+ name = "return"; -+ aframes = FASTTRAP_RETURN_AFRAMES; -+ break; -+ case DTFTP_OFFSETS: -+ if (probe->ftps_glen <= 0) -+ return -EINVAL; -+ -+ name = "<offsets>"; -+ aframes = FASTTRAP_OFFSET_AFRAMES; -+ offs = fasttrap_glob_offsets(probe, &noffs); -+ break; -+ default: -+ return -EINVAL; -+ } -+ -+ provider = fasttrap_provider_lookup(probe->ftps_pid, FASTTRAP_PID_NAME, -+ &pid_attr); -+ if (provider == NULL) -+ return -ESRCH; -+ -+ /* -+ * Increment the consumer reference count on the provider to indicate -+ * that a new probe is being associated with the provider. This makes -+ * sure that the provider will not be removed while we are working with -+ * it. -+ * -+ * This also means that we can drop the provider lock. -+ */ -+ provider->ftp_ccount++; -+ mutex_unlock(&provider->ftp_mtx); -+ -+ /* -+ * Grab the probe creation lock for this provider to ensure consistency -+ * between dtrace_probe_lookup() and dtrace_probe_create() because -+ * other threads might be creating probes also. -+ */ -+ mutex_lock(&provider->ftp_cmtx); -+ -+ if (probe->ftps_type == DTFTP_OFFSETS) { -+ int i; -+ -+ for (i = 0; i < noffs; i++) { -+ char ostr[sizeof(*offs) + 1]; -+ -+ snprintf(ostr, sizeof(ostr), "%llx", offs[i]); -+ if (dtrace_probe_lookup(provider->ftp_provid, -+ probe->ftps_mod, -+ probe->ftps_func, ostr) != 0) -+ continue; -+ -+ atomic_add(1, &fasttrap_total); -+ if (atomic_read(&fasttrap_total) > fasttrap_max) -+ goto fail_reset; -+ -+ pp = kzalloc(sizeof(struct fasttrap_probe), GFP_KERNEL); -+ if (pp == NULL) -+ goto fail_reset; -+ -+ pp->ftp_prov = provider; -+ pp->ftp_pid = provider->ftp_pid; -+ pp->ftp_ntps = 1; -+ -+ tp = kmem_cache_alloc(tracepoint_cachep, GFP_KERNEL); -+ if (tp == NULL) -+ goto fail_reset; -+ -+ tp->ftt_proc = provider->ftp_proc; -+ tp->ftt_pc = probe->ftps_pc + offs[i]; -+ tp->ftt_pid = provider->ftp_pid; -+ memset(&tp->ftt_mtp, 0, -+ sizeof(struct fasttrap_machtp)); -+ tp->ftt_ids = NULL; -+ tp->ftt_retids = NULL; -+ tp->ftt_next = NULL; -+ -+ pp->ftp_tps[0].fit_tp = tp; -+ pp->ftp_tps[0].fit_id.fti_probe = pp; -+ pp->ftp_tps[0].fit_id.fti_ptype = probe->ftps_type; -+ -+ pp->ftp_id = dtrace_probe_create(provider->ftp_provid, -+ probe->ftps_mod, -+ probe->ftps_func, ostr, -+ aframes, pp); -+ if (pp->ftp_id == DTRACE_IDNONE) { -+ kmem_cache_free(tracepoint_cachep, tp); -+ kfree(pp); -+ -+ goto fail_reset; -+ } -+ } -+ } else if (dtrace_probe_lookup(provider->ftp_provid, probe->ftps_mod, -+ probe->ftps_func, name) == 0) { -+ atomic_add(1, &fasttrap_total); -+ if (atomic_read(&fasttrap_total) > fasttrap_max) -+ goto fail_reset; -+ -+ pp = kzalloc(sizeof(struct fasttrap_probe), GFP_KERNEL); -+ if (pp == NULL) -+ goto fail_reset; -+ -+ pp->ftp_prov = provider; -+ pp->ftp_pid = provider->ftp_pid; -+ pp->ftp_ntps = 1; -+ -+ tp = kmem_cache_alloc(tracepoint_cachep, GFP_KERNEL); -+ if (tp == NULL) -+ goto fail_reset; -+ -+ tp->ftt_proc = provider->ftp_proc; -+ tp->ftt_pc = probe->ftps_pc; -+ tp->ftt_pid = provider->ftp_pid; -+ memset(&tp->ftt_mtp, 0, sizeof(struct fasttrap_machtp)); -+ tp->ftt_ids = NULL; -+ tp->ftt_retids = NULL; -+ tp->ftt_next = NULL; -+ -+ pp->ftp_tps[0].fit_tp = tp; -+ pp->ftp_tps[0].fit_id.fti_probe = pp; -+ pp->ftp_tps[0].fit_id.fti_ptype = probe->ftps_type; -+ -+ pp->ftp_id = dtrace_probe_create(provider->ftp_provid, -+ probe->ftps_mod, -+ probe->ftps_func, name, -+ aframes, pp); -+ if (pp->ftp_id == DTRACE_IDNONE) { -+ kmem_cache_free(tracepoint_cachep, tp); -+ kfree(pp); -+ -+ goto fail_reset; -+ } -+ } -+ -+ mutex_unlock(&provider->ftp_cmtx); -+ -+ /* -+ * The provider is still around because of the consumer reference -+ * count that we incremented. If another thread tried to clean up the -+ * provider while we were using it (because the process called exec or -+ * exit), we'll trigger a cleanup. -+ */ -+ mutex_lock(&provider->ftp_mtx); -+ provider->ftp_ccount--; -+ retired = provider->ftp_retired; -+ mutex_unlock(&provider->ftp_mtx); -+ -+ if (retired) -+ fasttrap_pid_cleanup(); -+ -+ return 0; -+ -+fail_reset: -+ atomic_add(-1, &fasttrap_total); -+ -+ /* -+ * If we failed to create the probe, it usually means we ran out of -+ * memory. We'll try to remove this provider to free some. This -+ * usually happens when a user accidentally triggers the creation of -+ * a very large amount of probes (e.g. pid587:::). -+ */ -+ mutex_unlock(&provider->ftp_cmtx); -+ -+ kfree(offs); -+ -+ mutex_lock(&provider->ftp_mtx); -+ provider->ftp_ccount--; -+ provider->ftp_marked = 1; -+ mutex_unlock(&provider->ftp_mtx); -+ -+ fasttrap_pid_cleanup(); -+ -+ return -ENOMEM; -+} -+ -+static long fasttrap_ioctl(struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ void __user *argp = (void __user *)arg; -+ -+ if (cmd == FASTTRAPIOC_MAKEPROBE) { -+ struct fasttrap_probe_spec __user *uprobe = argp; -+ struct fasttrap_probe_spec *probe; -+ uint8_t glen; -+ size_t size; -+ int ret; -+ char *c; -+ -+ dt_dbg_ioctl("PID IOCTL MAKEPROBE (cmd %#x), argp %p\n", -+ cmd, argp); -+ -+ if (copy_from_user(&glen, &uprobe->ftps_glen, -+ sizeof(uprobe->ftps_glen))) -+ return -EFAULT; -+ -+ size = sizeof(struct fasttrap_probe_spec) + -+ sizeof(probe->ftps_gstr[0]) * (glen - 1); -+ -+ if (size > 1024 * 1024) -+ return -ENOMEM; -+ -+ probe = kmalloc(size, GFP_KERNEL); -+ if (!probe) -+ return -ENOMEM; -+ -+ if (copy_from_user(probe, uprobe, size) != 0) { -+ ret = -EFAULT; -+ goto err; -+ } -+ -+ for (c = &probe->ftps_func[0]; *c != '\0'; c++) { -+ if (*c < 0x20 || 0x7f <= *c) { -+ ret = -EINVAL; -+ goto err; -+ } -+ } -+ -+ for (c = &probe->ftps_mod[0]; *c != '\0'; c++) { -+ if (*c < 0x20 || 0x7f <= *c) { -+ ret = -EINVAL; -+ goto err; -+ } -+ } -+ -+ ret = fasttrap_add_probe(probe); -+err: -+ kfree(probe); -+ -+ return ret; -+ } -+ -+ return -EAGAIN; -+} -+ -+static int fasttrap_open(struct inode *inode, struct file *file) -+{ -+ return 0; -+} -+ -+static int fasttrap_close(struct inode *inode, struct file *file) -+{ -+ return 0; -+} -+ -+static const struct file_operations fasttrap_fops = { -+ .owner = THIS_MODULE, -+ .unlocked_ioctl = fasttrap_ioctl, -+ .open = fasttrap_open, -+ .release = fasttrap_close, -+}; -+ -+static struct miscdevice fasttrap_dev = { -+ .minor = DT_DEV_FASTTRAP_MINOR, -+ .name = "fasttrap", -+ .nodename = "dtrace/provider/fasttrap", -+ .fops = &fasttrap_fops, -+}; -+ -+static int fasttrap_init_htable(struct fasttrap_hash *fth, ulong_t nent) -+{ -+ ulong_t i; -+ -+ if ((nent & (nent - 1)) == 0) -+ fth->fth_nent = nent; -+ else -+ fth->fth_nent = 1 << fls(nent); -+ -+ ASSERT(fth->fth_nent > 0); -+ -+ fth->fth_mask = fth->fth_nent - 1; -+ fth->fth_table = vzalloc(fth->fth_nent * -+ sizeof(struct fasttrap_bucket_elem)); -+ -+ if (fth->fth_table == NULL) -+ return -ENOMEM; -+ -+ for (i = 0; i < fth->fth_nent; i++) -+ mutex_init(&fth->fth_table[i].bucket.ftb_mtx); -+ -+ return 0; -+} -+ -+int fasttrap_dev_init(void) -+{ -+ int ret = 0; -+ ulong_t nent; -+ -+ ret = misc_register(&fasttrap_dev); -+ if (ret) { -+ pr_err("%s: Can't register misc device %d\n", -+ fasttrap_dev.name, fasttrap_dev.minor); -+ goto fail; -+ } -+ -+#ifdef FIXME -+ dtrace_fasttrap_exit_ptr = &fasttrap_exec_exit; -+ dtrace_fasttrap_exec_ptr = &fasttrap_exec_exit; -+#endif -+ -+ tracepoint_cachep = KMEM_CACHE(fasttrap_tracepoint, 0); -+ -+ fasttrap_max = FASTTRAP_MAX_DEFAULT; -+ atomic_set(&fasttrap_total, 0); -+ -+ /* -+ * Conjure up the tracepoints hashtable... -+ */ -+ nent = FASTTRAP_TPOINTS_DEFAULT_SIZE; -+ -+ if (nent == 0 || nent > 0x1000000) -+ nent = FASTTRAP_TPOINTS_DEFAULT_SIZE; -+ -+ if (fasttrap_init_htable(&fasttrap_tpoints, nent) != 0) -+ return -ENOMEM; -+ -+ /* -+ * ... and the providers hash table... -+ */ -+ nent = FASTTRAP_PROVIDERS_DEFAULT_SIZE; -+ if (fasttrap_init_htable(&fasttrap_provs, nent) != 0) -+ return -ENOMEM; -+ -+ /* -+ * ... and the procs hash table. -+ */ -+ nent = FASTTRAP_PROCS_DEFAULT_SIZE; -+ if (fasttrap_init_htable(&fasttrap_procs, nent) != 0) -+ return -ENOMEM; -+ -+fail: -+ return ret; -+} -+ -+/* -+ * This function is called with module_mutex held. -+ */ -+int fasttrap_prov_exit(void) -+{ -+ int fail = 0; -+ ulong_t i; -+ -+ if (dtrace_meta_unregister(fasttrap_id) != 0) -+ return 0; -+ -+ /* -+ * Prevent any new timeouts from running by setting fasttrap_timeout -+ * to a non-zero value, and wait for the current timeout to complete. -+ */ -+ mutex_lock(&fasttrap_cleanup_mtx); -+ fasttrap_cleanup_work = 0; -+ -+ while (fasttrap_cleanup_state != CLEANUP_DEFERRED) { -+ uint_t tmp; -+ -+ tmp = fasttrap_cleanup_state; -+ fasttrap_cleanup_state = CLEANUP_DEFERRED; -+ -+ if (tmp != CLEANUP_NONE) { -+ mutex_unlock(&fasttrap_cleanup_mtx); -+ flush_delayed_work(&fasttrap_cleanup); -+ mutex_lock(&fasttrap_cleanup_mtx); -+ } -+ } -+ -+ fasttrap_cleanup_work = 0; -+ mutex_unlock(&fasttrap_cleanup_mtx); -+ -+ /* -+ * Iterate over all of our providers. If there's still a process -+ * that corresponds to that pid, fail to detach. -+ */ -+ for (i = 0; i < fasttrap_provs.fth_nent; i++) { -+ struct fasttrap_provider **fpp, *fp; -+ struct fasttrap_bucket *bucket; -+ -+ bucket = FASTTRAP_ELEM_BUCKET(&fasttrap_provs.fth_table[i]); -+ mutex_lock(&bucket->ftb_mtx); -+ fpp = (struct fasttrap_provider **)&bucket->ftb_data; -+ while ((fp = *fpp) != NULL) { -+ /* -+ * Acquire and release the lock as a simple way of -+ * waiting for any other consumer to finish with -+ * this provider. A thread must first acquire the -+ * bucket lock so there's no chance of another thread -+ * blocking on the provider's lock. -+ */ -+ mutex_lock(&fp->ftp_mtx); -+ mutex_unlock(&fp->ftp_mtx); -+ -+ if (dtrace_unregister(fp->ftp_provid) != 0) { -+ fail = 1; -+ fpp = &fp->ftp_next; -+ } else { -+ *fpp = fp->ftp_next; -+ fasttrap_provider_free(fp); -+ } -+ } -+ -+ mutex_unlock(&bucket->ftb_mtx); -+ } -+ -+ if (fail) { -+ uint_t work; -+ -+ /* -+ * If we're failing to detach, we need to unblock timeouts -+ * and start a new timeout if any work has accumulated while -+ * we've been unsuccessfully trying to detach. -+ */ -+ mutex_lock(&fasttrap_cleanup_mtx); -+ fasttrap_cleanup_state = CLEANUP_NONE; -+ work = fasttrap_cleanup_work; -+ mutex_unlock(&fasttrap_cleanup_mtx); -+ -+ if (work) -+ fasttrap_pid_cleanup(); -+ -+ dtrace_meta_register("fasttrap", &fasttrap_mops, NULL, -+ &fasttrap_id); -+ -+ return 0; -+ } -+ -+ return 1; -+} -+ -+void fasttrap_dev_exit(void) -+{ -+#ifdef DEBUG -+ mutex_lock(&fasttrap_count_mtx); -+ ASSERT(fasttrap_pid_count == 0); -+ mutex_unlock(&fasttrap_count_mtx); -+#endif -+ -+ if (fasttrap_tpoints.fth_table) -+ vfree(fasttrap_tpoints.fth_table); -+ fasttrap_tpoints.fth_nent = 0; -+ -+ if (fasttrap_provs.fth_table) -+ vfree(fasttrap_provs.fth_table); -+ fasttrap_provs.fth_nent = 0; -+ -+ if (fasttrap_procs.fth_table) -+ vfree(fasttrap_procs.fth_table); -+ fasttrap_procs.fth_nent = 0; -+ -+ kmem_cache_destroy(tracepoint_cachep); -+ -+#ifdef FIXME -+ ASSERT(dtrace_fasttrap_exec_ptr == &fasttrap_exec_exit); -+ dtrace_fasttrap_exec_ptr = NULL; -+ -+ ASSERT(dtrace_fasttrap_exit_ptr == &fasttrap_exec_exit); -+ dtrace_fasttrap_exit_ptr = NULL; -+#endif -+ -+ misc_deregister(&fasttrap_dev); -+} -diff --git a/dtrace/fasttrap_impl.h b/dtrace/fasttrap_impl.h -new file mode 100644 -index 000000000000..cd2c4a28871e ---- /dev/null -+++ b/dtrace/fasttrap_impl.h -@@ -0,0 +1,172 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Dynamic Tracing for Linux - fasttrap provider -+ * -+ * Copyright (c) 2010, 2018, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#ifndef _FASTTRAP_IMPL_H_ -+#define _FASTTRAP_IMPL_H_ -+ -+#include <linux/dtrace/fasttrap.h> -+#include <dtrace/fasttrap_arch.h> -+#include <linux/cache.h> -+ -+/* -+ * Fasttrap Providers, Probes and Tracepoints -+ * -+ * Each Solaris process can have multiple providers -- the pid provider as -+ * well as any number of user-level statically defined tracing (USDT) -+ * providers. Those providers are each represented by a fasttrap_provider. -+ * All providers for a given process have a pointer to a shared -+ * fasttrap_proc. The fasttrap_proc has two states: active or defunct. -+ * When the count of active providers goes to zero it becomes defunct; a -+ * provider drops its active count when it is removed individually or as part -+ * of a mass removal when a process exits or performs an exec. -+ * -+ * Each probe is represented by a fasttrap_probe which has a pointer to -+ * its associated provider as well as a list of fasttrap_id_tp structures -+ * which are tuples combining a fasttrap_id and a fasttrap_tracepoint. -+ * A fasttrap_tracepoint represents the actual point of instrumentation -+ * and it contains two lists of fasttrap_id structures (to be fired pre- -+ * and post-instruction emulation) that identify the probes attached to the -+ * tracepoint. Tracepoints also have a pointer to the fasttrap_proc for the -+ * process they trace which is used when looking up a tracepoint both when a -+ * probe fires and when enabling and disabling probes. -+ * -+ * It's important to note that probes are preallocated with the necessary -+ * number of tracepoints, but that tracepoints can be shared by probes and -+ * swapped between probes. If a probe's preallocated tracepoint is enabled -+ * (and, therefore, the associated probe is enabled), and that probe is -+ * then disabled, ownership of that tracepoint may be exchanged for an -+ * unused tracepoint belonging to another probe that was attached to the -+ * enabled tracepoint. -+ */ -+struct fasttrap_proc { -+ pid_t ftpc_pid; /* process ID for this proc */ -+ atomic64_t ftpc_acount; /* count of active providers */ -+ uint64_t ftpc_rcount; /* count of extant providers */ -+ struct mutex ftpc_mtx; /* lock on all but acount */ -+ struct fasttrap_proc *ftpc_next; /* next proc in hash chain */ -+}; -+ -+struct fasttrap_provider { -+ pid_t ftp_pid; /* process ID for this prov */ -+ char ftp_name[DTRACE_PROVNAMELEN]; /* prov name (w/o the pid) */ -+ dtrace_provider_id_t ftp_provid; /* DTrace provider handle */ -+ uint_t ftp_marked; /* mark for possible removal */ -+ uint_t ftp_retired; /* mark when retired */ -+ struct mutex ftp_mtx; /* provider lock */ -+ struct mutex ftp_cmtx; /* lock on creating probes */ -+ uint64_t ftp_rcount; /* enabled probes ref count */ -+ uint64_t ftp_ccount; /* consumers creating probes */ -+ uint64_t ftp_mcount; /* meta provider count */ -+ struct fasttrap_proc *ftp_proc; /* shared proc for all provs */ -+ struct fasttrap_provider *ftp_next; /* next prov in hash chain */ -+}; -+ -+struct fasttrap_id { -+ struct fasttrap_probe *fti_probe; /* referrring probe */ -+ struct fasttrap_id *fti_next; /* enabled probe list on tp */ -+ enum fasttrap_probe_type fti_ptype; /* probe type */ -+}; -+ -+struct fasttrap_tracepoint { -+ struct fasttrap_proc *ftt_proc; /* associated process struct */ -+ uintptr_t ftt_pc; /* address of tracepoint */ -+ pid_t ftt_pid; /* pid of tracepoint */ -+ struct fasttrap_machtp ftt_mtp; /* ISA-specific portion */ -+ struct fasttrap_id *ftt_ids; /* NULL-terminated list */ -+ struct fasttrap_id *ftt_retids; /* NULL-terminated list */ -+ struct fasttrap_tracepoint *ftt_next; /* link in global hash */ -+}; -+ -+struct fasttrap_id_tp { -+ struct fasttrap_id fit_id; -+ struct fasttrap_tracepoint *fit_tp; -+}; -+ -+struct fasttrap_probe { -+ dtrace_id_t ftp_id; /* DTrace probe identifier */ -+ pid_t ftp_pid; /* pid for this probe */ -+ struct fasttrap_provider *ftp_prov; /* this probe's provider */ -+ uint64_t ftp_gen; /* modification generation */ -+ uint64_t ftp_ntps; /* number of tracepoints */ -+ uint8_t *ftp_argmap; /* native to translated args */ -+ uint8_t ftp_nargs; /* translated argument count */ -+ uint8_t ftp_enabled; /* is this probe enabled */ -+ char *ftp_xtypes; /* translated types index */ -+ char *ftp_ntypes; /* native types index */ -+ struct fasttrap_id_tp ftp_tps[1]; /* flexible array */ -+}; -+ -+struct fasttrap_bucket_elem { -+ union { -+ struct fasttrap_bucket { -+ struct mutex ftb_mtx; /* bucket lock */ -+ void *ftb_data; /* data payload */ -+ } bucket; -+ -+ /* -+ * Fill a cacheline, no matter how large struct mutex is. -+ */ -+ uint8_t ftb_pad[(sizeof(struct fasttrap_bucket) + -+ L1_CACHE_BYTES - 1) & ~(L1_CACHE_BYTES - 1)]; -+ }; -+}; -+typedef struct fasttrap_bucket fasttrap_bucket_t; -+ -+#define FASTTRAP_ELEM_BUCKET(elem) ((fasttrap_bucket_t *) (elem)) -+ -+struct fasttrap_hash { -+ ulong_t fth_nent; /* power-of-2 num. of entries */ -+ ulong_t fth_mask; /* fth_nent - 1 */ -+ struct fasttrap_bucket_elem *fth_table; /* array of buckets */ -+}; -+ -+extern struct fasttrap_hash fasttrap_tpoints; -+ -+#define FASTTRAP_ID_INDEX(id) \ -+ ((struct fasttrap_id_tp *)(((char *)(id) - \ -+ offsetof(struct fasttrap_id_tp, fit_id))) - \ -+ &(id)->fti_probe->ftp_tps[0]) -+#define FASTTRAP_TPOINTS_INDEX(pid, pc) \ -+ (((pc) / sizeof(fasttrap_instr_t) + (pid)) & \ -+ fasttrap_tpoints.fth_mask) -+ -+extern uint64_t *fasttrap_glob_offsets(struct fasttrap_probe_spec *probe, -+ uint64_t *np); -+extern uint64_t fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, -+ int argno, int aframes); -+extern uint64_t fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, -+ int argno, int aframes); -+extern void fasttrap_pid_probe_arch(struct fasttrap_probe *ftp, -+ struct pt_regs *regs); -+extern void fasttrap_pid_retprobe_arch(struct fasttrap_probe *ftp, -+ struct pt_regs *regs); -+extern void fasttrap_set_enabled(struct pt_regs *regs); -+ -+extern void fasttrap_meta_create_probe(void *, void *, -+ struct dtrace_helper_probedesc *); -+extern void *fasttrap_meta_provide(void *, struct dtrace_helper_provdesc *, -+ pid_t); -+extern void fasttrap_meta_remove(void *, struct dtrace_helper_provdesc *, -+ pid_t); -+ -+extern dtrace_meta_provider_id_t fasttrap_id; -+extern struct dtrace_mops fasttrap_mops; -+ -+extern int fasttrap_dev_init(void); -+extern void fasttrap_dev_exit(void); -+ -+#endif /* _FASTTRAP_IMPL_H_ */ -diff --git a/dtrace/fasttrap_mod.c b/dtrace/fasttrap_mod.c -new file mode 100644 -index 000000000000..e9bd0eb065f1 ---- /dev/null -+++ b/dtrace/fasttrap_mod.c -@@ -0,0 +1,38 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: fasttrap_mod.c -+ * DESCRIPTION: DTrace - fasttrap provider kernel module -+ * -+ * Copyright (c) 2010, 2018, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/module.h> -+ -+#define DTRACE_HAVE_PROV_EXIT -+ -+#include "dtrace.h" -+#include "dtrace_dev.h" -+#include "fasttrap_impl.h" -+ -+MODULE_AUTHOR("Kris Van Hees (kris.van.hees@oracle.com)"); -+MODULE_DESCRIPTION("Fasttrap Tracing"); -+MODULE_VERSION("v0.1"); -+MODULE_LICENSE("GPL"); -+ -+struct dtrace_mops fasttrap_mops = { -+ fasttrap_meta_create_probe, -+ fasttrap_meta_provide, -+ fasttrap_meta_remove -+}; -+ -+DT_META_PROVIDER_MODULE(fasttrap) --- -2.27.0 - diff --git a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0014-dtrace-function-boundary-tracing-FBT-core-and-x86-co.patch b/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0014-dtrace-function-boundary-tracing-FBT-core-and-x86-co.patch deleted file mode 100644 index 5862b109ee73..000000000000 --- a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0014-dtrace-function-boundary-tracing-FBT-core-and-x86-co.patch +++ /dev/null @@ -1,1304 +0,0 @@ -From 3ff947b8988900ed494e5e008393c4737093cee2 Mon Sep 17 00:00:00 2001 -From: Kris Van Hees <kris.van.hees@oracle.com> -Date: Mon, 19 Nov 2018 19:02:39 +0000 -Subject: [PATCH 14/20] dtrace: function boundary tracing (FBT) core and x86 - components - -This commit implements the core components needed for FBT tracing. -Unlike ftrace we allow the tracing of very large numbers of functions at -once: the intent is that the system should still be stable when every -eligible function in the kernel is traced simultaneously. Functions -that are not safe for this (because e.g. they are used in trap handling, -or by functions called by the DTrace module itself during probe -processing) are (semi-manually) blacklisted from being probed. - -As part of this, a treewide change to the prototype of traps is started: -they all return 0 by default now, with a nonzero return value indicating -that the trap happened as a result of an FBT probe: the return value is -the opcode atop which the trap was originally placed for later emulation. - -Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com> -Signed-off-by: Tomas Jedlicka <tomas.jedlicka@oracle.com> -Signed-off-by: Nick Alcock <nick.alcock@oracle.com> -Signed-off-by: Eugene Loh <eugene.loh@oracle.com> -Signed-off-by: David Mc Lean <david.mclean@oracle.com> -Signed-off-by: Vincent Lim <vincent.lim@oracle.com> ---- - arch/x86/entry/entry_64.S | 157 +++++++++++++++++++++++++++- - arch/x86/include/asm/kvm_para.h | 2 +- - arch/x86/include/asm/mce.h | 4 +- - arch/x86/include/asm/traps.h | 46 ++++----- - arch/x86/kernel/cpu/mce/core.c | 14 +-- - arch/x86/kernel/dtrace_fbt.c | 177 ++++++++++++++++++++++++++++++++ - arch/x86/kernel/fbt_blacklist.h | 92 +++++++++++++++++ - arch/x86/kernel/kvm.c | 4 +- - arch/x86/kernel/nmi.c | 7 +- - arch/x86/kernel/traps.c | 78 ++++++++------ - arch/x86/mm/fault.c | 12 ++- - include/linux/dtrace_fbt.h | 48 +++++++++ - kernel/dtrace/Kconfig | 7 ++ - kernel/dtrace/Makefile | 4 +- - kernel/dtrace/dtrace_fbt_core.c | 125 ++++++++++++++++++++++ - kernel/dtrace/dtrace_os.c | 2 + - kernel/kprobes.c | 8 ++ - 17 files changed, 715 insertions(+), 72 deletions(-) - create mode 100644 arch/x86/kernel/dtrace_fbt.c - create mode 100644 arch/x86/kernel/fbt_blacklist.h - create mode 100644 include/linux/dtrace_fbt.h - create mode 100644 kernel/dtrace/dtrace_fbt_core.c - -diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S -index faa53fee0663..637650b53723 100644 ---- a/arch/x86/entry/entry_64.S -+++ b/arch/x86/entry/entry_64.S -@@ -38,6 +38,7 @@ - #include <asm/export.h> - #include <asm/frame.h> - #include <asm/nospec-branch.h> -+#include <asm/dtrace_util.h> - #include <linux/err.h> - - #include "calling.h" -@@ -865,7 +866,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt - */ - #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + (x) * 8) - --.macro idtentry_part do_sym, has_error_code:req, read_cr2:req, paranoid:req, shift_ist=-1, ist_offset=0 -+.macro idtentry_part do_sym, has_error_code:req, read_cr2:req, paranoid:req, shift_ist=-1, ist_offset=0, switch_stack=0 - - .if \paranoid - call paranoid_entry -@@ -920,6 +921,13 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt - addq $\ist_offset, CPU_TSS_IST(\shift_ist) - .endif - -+#ifdef CONFIG_DTRACE -+ .if \switch_stack == 0 -+ test %rax,%rax -+ jnz dtrace_error_exit -+ .endif -+#endif -+ - .if \paranoid - /* this procedure expect "no swapgs" flag in ebx */ - jmp paranoid_exit -@@ -1014,13 +1022,158 @@ SYM_CODE_START(\sym) - * run in real process context if user_mode(regs). - */ - .Lfrom_usermode_switch_stack_\@: -- idtentry_part \do_sym, \has_error_code, \read_cr2, paranoid=0 -+ idtentry_part \do_sym, \has_error_code, \read_cr2, paranoid=0, switch_stack=1 - .endif - - _ASM_NOKPROBE(\sym) - SYM_CODE_END(\sym) - .endm - -+#ifdef CONFIG_DTRACE -+SYM_CODE_START(dtrace_error_exit) -+ UNWIND_HINT_REGS -+ DISABLE_INTERRUPTS(CLBR_NONE) -+ TRACE_IRQS_OFF -+ -+ /* -+ * The iretq could re-enable interrupts: -+ */ -+ TRACE_IRQS_IRETQ -+ -+ negq %rax -+ -+ cmpl $DTRACE_INVOP_MOV_RSP_RBP,%eax -+ je dtrace_emu_mov -+ cmpl $DTRACE_INVOP_PUSH_BP,%eax -+ je dtrace_emu_push -+ cmpl $DTRACE_INVOP_LEAVE,%eax -+ je dtrace_emu_leave -+ cmpl $DTRACE_INVOP_NOP,%eax -+ je dtrace_emu_nop -+ cmpl $DTRACE_INVOP_RET,%eax -+ je dtrace_emu_ret -+ -+ leaq dtrace_error_msg(%rip),%rdi -+ movq %rax,%rsi -+ movq (%rsp),%rdx -+ call printk -+ -+ POP_REGS -+ addq $8, %rsp /* skip regs->orig_ax */ -+ jmp dtrace_retint_kernel -+ -+dtrace_emu_mov: -+ POP_REGS -+ addq $8, %rsp /* skip regs->orig_ax */ -+ -+ /* Emulate "mov %rsp, %rbp" instruction. */ -+ pushq %rax /* push temp */ -+ movq 8(%rsp),%rax /* load calling RIP */ -+ addq $3,%rax /* increment over trapping instr */ -+ movq %rax,8(%rsp) /* store calling RIP */ -+ movq 32(%rsp),%rbp /* load %rsp into %rbp */ -+ popq %rax /* pop off temp */ -+ -+ jmp dtrace_retint_kernel -+ -+dtrace_emu_push: -+ POP_REGS -+ addq $8, %rsp /* skip regs->orig_ax */ -+ -+ /* -+ * Emulate a "pushq %rbp" instruction. We need to move the stack down -+ * to make room for the extra address getting pushed. -+ */ -+ subq $16,%rsp /* make room for %rbp */ -+ pushq %rax /* push temp */ -+ movq 24(%rsp),%rax /* load calling RIP */ -+ addq $1,%rax /* increment over trapping instr */ -+ movq %rax,8(%rsp) /* store calling RIP */ -+ movq 32(%rsp),%rax /* load calling CS */ -+ movq %rax,16(%rsp) /* store calling CS */ -+ movq 40(%rsp),%rax /* load calling RFLAGS */ -+ movq %rax,24(%rsp) /* store calling RFLAGS */ -+ movq 48(%rsp),%rax /* load calling RSP */ -+ subq $8,%rax /* make room for %rbp */ -+ movq %rax,32(%rsp) /* store calling RSP */ -+ movq 56(%rsp),%rax /* load calling SS */ -+ movq %rax,40(%rsp) /* store calling SS */ -+ movq 32(%rsp),%rax /* reload calling RSP */ -+ movq %rbp,(%rax) /* store %rbp there */ -+ popq %rax /* pop off temp */ -+ -+ jmp dtrace_retint_kernel -+ -+dtrace_emu_nop: -+ POP_REGS -+ addq $8, %rsp /* skip regs->orig_ax */ -+ -+ /* Emulate a "nop" instruction. */ -+ incq (%rsp) -+ -+ jmp dtrace_retint_kernel -+ -+dtrace_emu_leave: -+ POP_REGS -+ addq $8, %rsp /* skip regs->orig_ax */ -+ -+ /* -+ * Emulate a "leave" instruction. This is equivalent to the sequence: -+ * movq %rbp,%rsp -+ * popq %rbp -+ * We can use the fact that on x86_64 %rsp is saved explicitly, so we -+ * do not need to move any data around. -+ */ -+ pushq %rax /* push temp */ -+ movq 8(%rsp),%rax /* load calling RIP */ -+ addq $1,%rax /* increment over trapping instr */ -+ movq %rax,8(%rsp) /* store calling RIP */ -+ movq (%rbp),%rax /* get new %rbp */ -+ addq $8,%rbp /* adjust new %rsp */ -+ movq %rbp,32(%rsp) /* store new %rsp */ -+ movq %rax,%rbp /* set new %rbp */ -+ popq %rax /* pop off temp */ -+ -+ jmp dtrace_retint_kernel -+ -+dtrace_emu_ret: -+ POP_REGS -+ addq $8, %rsp /* skip regs->orig_ax */ -+ -+ /* Emulate a "ret" instruction. */ -+ pushq %rax /* push temp */ -+ movq 32(%rsp),%rax /* load %rsp */ -+ movq (%rax),%rax /* load calling RIP */ -+ movq %rax,8(%rsp) /* store calling RIP */ -+ addq $8,32(%rsp) /* adjust new %rsp */ -+ popq %rax /* pop off temp */ -+ -+ /* fallthrough */ -+ -+dtrace_retint_kernel: -+ -+#ifdef CONFIG_PREEMPT -+ /* Interrupts are off */ -+ /* Check if we need preemption */ -+ bt $9,2*8(%rsp) /* interrupts were off? (EFLAGS) */ -+ jnc 1f -+0: cmpl $0,PER_CPU_VAR(__preempt_count) -+ jnz 1f -+ call preempt_schedule_irq -+ jmp 0b -+1: -+#endif -+ -+ INTERRUPT_RETURN -+ -+SYM_CODE_END(dtrace_error_exit) -+ -+.pushsection .rodata, "a" -+dtrace_error_msg: -+ .asciz "DTRACE: non-zero (%x) return from trap at %x\n" -+.popsection -+#endif -+ - idtentry divide_error do_divide_error has_error_code=0 - idtentry overflow do_overflow has_error_code=0 - idtentry bounds do_bounds has_error_code=0 -diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h -index 9b4df6eaa11a..45ceffca6737 100644 ---- a/arch/x86/include/asm/kvm_para.h -+++ b/arch/x86/include/asm/kvm_para.h -@@ -92,7 +92,7 @@ void kvm_async_pf_task_wait(u32 token, int interrupt_kernel); - void kvm_async_pf_task_wake(u32 token); - u32 kvm_read_and_reset_pf_reason(void); - extern void kvm_disable_steal_time(void); --void do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address); -+int do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address); - - #ifdef CONFIG_PARAVIRT_SPINLOCKS - void __init kvm_spinlock_init(void); -diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h -index 4359b955e0b7..484d1db7e42e 100644 ---- a/arch/x86/include/asm/mce.h -+++ b/arch/x86/include/asm/mce.h -@@ -240,8 +240,8 @@ extern void mce_disable_bank(int bank); - */ - - /* Call the installed machine check handler for this CPU setup. */ --extern void (*machine_check_vector)(struct pt_regs *, long error_code); --void do_machine_check(struct pt_regs *, long); -+extern int (*machine_check_vector)(struct pt_regs *, long error_code); -+int do_machine_check(struct pt_regs *, long); - - /* - * Threshold handler -diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h -index ffa0dc8a535e..04bddeb57528 100644 ---- a/arch/x86/include/asm/traps.h -+++ b/arch/x86/include/asm/traps.h -@@ -61,41 +61,41 @@ asmlinkage void xen_machine_check(void); - asmlinkage void xen_simd_coprocessor_error(void); - #endif - --dotraplinkage void do_divide_error(struct pt_regs *regs, long error_code); --dotraplinkage void do_debug(struct pt_regs *regs, long error_code); --dotraplinkage void do_nmi(struct pt_regs *regs, long error_code); --dotraplinkage void do_int3(struct pt_regs *regs, long error_code); --dotraplinkage void do_overflow(struct pt_regs *regs, long error_code); --dotraplinkage void do_bounds(struct pt_regs *regs, long error_code); --dotraplinkage void do_invalid_op(struct pt_regs *regs, long error_code); --dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code); -+dotraplinkage int do_divide_error(struct pt_regs *regs, long error_code); -+dotraplinkage int do_debug(struct pt_regs *regs, long error_code); -+dotraplinkage int do_nmi(struct pt_regs *regs, long error_code); -+dotraplinkage int do_int3(struct pt_regs *regs, long error_code); -+dotraplinkage int do_overflow(struct pt_regs *regs, long error_code); -+dotraplinkage int do_bounds(struct pt_regs *regs, long error_code); -+dotraplinkage int do_invalid_op(struct pt_regs *regs, long error_code); -+dotraplinkage int do_device_not_available(struct pt_regs *regs, long error_code); - #if defined(CONFIG_X86_64) || defined(CONFIG_DOUBLEFAULT) --dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2); -+dotraplinkage int do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2); - #endif --dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *regs, long error_code); --dotraplinkage void do_invalid_TSS(struct pt_regs *regs, long error_code); --dotraplinkage void do_segment_not_present(struct pt_regs *regs, long error_code); --dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code); -+dotraplinkage int do_coprocessor_segment_overrun(struct pt_regs *regs, long error_code); -+dotraplinkage int do_invalid_TSS(struct pt_regs *regs, long error_code); -+dotraplinkage int do_segment_not_present(struct pt_regs *regs, long error_code); -+dotraplinkage int do_stack_segment(struct pt_regs *regs, long error_code); - #ifdef CONFIG_X86_64 --dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long address); -+dotraplinkage int do_double_fault(struct pt_regs *regs, long error_code, unsigned long address); - asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs); - asmlinkage __visible notrace - struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s); - void __init trap_init(void); - #endif --dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code); --dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address); --dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code); --dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code); --dotraplinkage void do_alignment_check(struct pt_regs *regs, long error_code); -+dotraplinkage int do_general_protection(struct pt_regs *regs, long error_code); -+dotraplinkage int do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address); -+dotraplinkage int do_spurious_interrupt_bug(struct pt_regs *regs, long error_code); -+dotraplinkage int do_coprocessor_error(struct pt_regs *regs, long error_code); -+dotraplinkage int do_alignment_check(struct pt_regs *regs, long error_code); - #ifdef CONFIG_X86_MCE --dotraplinkage void do_machine_check(struct pt_regs *regs, long error_code); -+dotraplinkage int do_machine_check(struct pt_regs *regs, long error_code); - #endif --dotraplinkage void do_simd_coprocessor_error(struct pt_regs *regs, long error_code); -+dotraplinkage int do_simd_coprocessor_error(struct pt_regs *regs, long error_code); - #ifdef CONFIG_X86_32 --dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code); -+dotraplinkage int do_iret_error(struct pt_regs *regs, long error_code); - #endif --dotraplinkage void do_mce(struct pt_regs *regs, long error_code); -+dotraplinkage int do_mce(struct pt_regs *regs, long error_code); - - static inline int get_si_code(unsigned long condition) - { -diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c -index 2c4f949611e4..fcc4f9b41826 100644 ---- a/arch/x86/kernel/cpu/mce/core.c -+++ b/arch/x86/kernel/cpu/mce/core.c -@@ -1214,7 +1214,7 @@ static void __mc_scan_banks(struct mce *m, struct mce *final, - * MCE broadcast. However some CPUs might be broken beyond repair, - * so be always careful when synchronizing with others. - */ --void do_machine_check(struct pt_regs *regs, long error_code) -+int do_machine_check(struct pt_regs *regs, long error_code) - { - DECLARE_BITMAP(valid_banks, MAX_NR_BANKS); - DECLARE_BITMAP(toclear, MAX_NR_BANKS); -@@ -1249,7 +1249,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) - int lmce = 1; - - if (__mc_check_crashing_cpu(cpu)) -- return; -+ return 0; - - ist_enter(regs); - -@@ -1358,6 +1358,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) - - out_ist: - ist_exit(regs); -+ return 0; - } - EXPORT_SYMBOL_GPL(do_machine_check); - -@@ -1882,19 +1883,20 @@ bool filter_mce(struct mce *m) - } - - /* Handle unconfigured int18 (should never happen) */ --static void unexpected_machine_check(struct pt_regs *regs, long error_code) -+static int unexpected_machine_check(struct pt_regs *regs, long error_code) - { - pr_err("CPU#%d: Unexpected int18 (Machine Check)\n", - smp_processor_id()); -+ return 0; - } - - /* Call the installed machine check handler for this CPU setup. */ --void (*machine_check_vector)(struct pt_regs *, long error_code) = -+int (*machine_check_vector)(struct pt_regs *, long error_code) = - unexpected_machine_check; - --dotraplinkage void do_mce(struct pt_regs *regs, long error_code) -+dotraplinkage int do_mce(struct pt_regs *regs, long error_code) - { -- machine_check_vector(regs, error_code); -+ return machine_check_vector(regs, error_code); - } - - /* -diff --git a/arch/x86/kernel/dtrace_fbt.c b/arch/x86/kernel/dtrace_fbt.c -new file mode 100644 -index 000000000000..52ff3f49d101 ---- /dev/null -+++ b/arch/x86/kernel/dtrace_fbt.c -@@ -0,0 +1,177 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_fbt.c -+ * DESCRIPTION: Dynamic Tracing: FBT registration code (arch-specific) -+ * -+ * Copyright (c) 2010, 2017, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+#include <linux/kernel.h> -+#include <linux/kallsyms.h> -+#include <linux/dtrace_os.h> -+#include <linux/dtrace_fbt.h> -+#include <linux/slab.h> -+#include <linux/sort.h> -+#include <asm/insn.h> -+#include <asm/sections.h> -+ -+#define FBT_MOV_RSP_RBP_1 0x48 -+#define FBT_MOV_RSP_RBP_2 0x89 -+#define FBT_MOV_RSP_RBP_3 0xe5 -+#define FBT_PUSHL_EBP 0x55 -+#define FBT_NOP 0x90 -+#define FBT_RET_IMM16 0xc2 -+#define FBT_RET 0xc3 -+#define FBT_LEAVE 0xc9 -+ -+#define BL_SENTRY(tp, nm) extern tp nm; -+#define BL_DENTRY(tp, nm) -+#include "fbt_blacklist.h" -+#undef BL_DENTRY -+#undef BL_SENTRY -+ -+static void -+dtrace_fbt_populate_bl(void) -+{ -+#define BL_SENTRY(tp, nm) dtrace_fbt_bl_add((unsigned long)&nm, \ -+ __stringify(nm)); -+#define BL_DENTRY(tp, nm) dtrace_fbt_bl_add(0, __stringify(nm)); -+#include "fbt_blacklist.h" -+#undef BL_SENTRY -+#undef BL_DENTRY -+} -+ -+void dtrace_fbt_init(fbt_add_probe_fn fbt_add_probe, struct module *mp, -+ void *arg) -+{ -+ loff_t pos; -+ struct kallsym_iter sym; -+ asm_instr_t *paddr = NULL; -+ struct dt_fbt_bl_entry *blent = NULL; -+ -+ /* -+ * Look up any unresolved symbols in the blacklist, and sort the list -+ * by ascending address. -+ */ -+ dtrace_fbt_populate_bl(); -+ blent = dtrace_fbt_bl_first(); -+ -+ pos = 0; -+ kallsyms_iter_reset(&sym, 0); -+ while (kallsyms_iter_update(&sym, pos++)) { -+ asm_instr_t *addr, *end; -+ int state = 0, insc = 0; -+ void *fbtp = NULL; -+ -+ /* -+ * There is no point considering non-function symbols for FBT, -+ * or symbols that have a zero size. We could consider weak -+ * symbols but that gets quite complicated and there is no -+ * demands for that (so far). -+ */ -+ if (sym.type != 'T' && sym.type != 't') -+ continue; -+ if (!sym.size) -+ continue; -+ -+ /* -+ * Handle only symbols that belong to the module we have been -+ * asked for. -+ */ -+ if (mp == dtrace_kmod && !core_kernel_text(sym.value)) -+ continue; -+ -+ /* -+ * Ensure we have not been given .init symbol from kallsyms -+ * interface. This could lead to memory corruption once DTrace -+ * tries to enable probe in already freed memory. -+ */ -+ if (mp != dtrace_kmod && !within_module_core(sym.value, mp)) -+ continue; -+ -+ /* -+ * See if the symbol is on the FBT's blacklist. Since both -+ * iterators are workng in sort order by ascending address we -+ * can use concurrent traversal. -+ */ -+ while (blent != NULL && -+ dtrace_fbt_bl_entry_addr(blent) < sym.value) { -+ blent = dtrace_fbt_bl_next(blent); -+ } -+ if (dtrace_fbt_bl_entry_addr(blent) == sym.value) -+ continue; -+ -+ /* -+ * No FBT tracing for DTrace functions, and functions that are -+ * crucial to probe processing. -+ * Also weed out symbols that are not relevant here. -+ */ -+ if (strncmp(sym.name, "dtrace_", 7) == 0) -+ continue; -+ if (strncmp(sym.name, "insn_", 5) == 0) -+ continue; -+ if (strncmp(sym.name, "inat_", 5) == 0) -+ continue; -+ if (strncmp(sym.name, "_GLOBAL_", 8) == 0) -+ continue; -+ if (strncmp(sym.name, "do_", 3) == 0) -+ continue; -+ if (strncmp(sym.name, "xen_", 4) == 0) -+ continue; -+ -+ addr = (asm_instr_t *)sym.value; -+ end = (asm_instr_t *)(sym.value + sym.size); -+ -+ /* -+ * FIXME: -+ * When there are multiple symbols for the same address, we -+ * should link them together as probes associated with the -+ * same function. When a probe for that function is triggered -+ * all associated probes should fire. -+ * -+ * For now, we ignore duplicates. -+ */ -+ if (addr == paddr) -+ continue; -+ paddr = addr; -+ -+ while (addr < end) { -+ struct insn insn; -+ -+ insc++; -+ -+ switch (state) { -+ case 0: /* start of function */ -+ if (*addr == FBT_PUSHL_EBP) { -+ fbt_add_probe( -+ mp, sym.name, -+ FBT_ENTRY, *addr, addr, 0, -+ NULL, arg); -+ state = 1; -+ } else if (insc > 10) -+ state = 2; -+ break; -+ case 1: /* look for ret */ -+ if (*addr == FBT_RET) { -+ uintptr_t off; -+ -+ off = addr - (asm_instr_t *)sym.value; -+ fbtp = fbt_add_probe( -+ mp, sym.name, -+ FBT_RETURN, *addr, addr, off, -+ fbtp, arg); -+ } -+ break; -+ } -+ -+ if (state == 2) -+ break; -+ -+ kernel_insn_init(&insn, addr, MAX_INSN_SIZE); -+ insn_get_length(&insn); -+ -+ addr += insn.length; -+ } -+ } -+} -+EXPORT_SYMBOL(dtrace_fbt_init); -diff --git a/arch/x86/kernel/fbt_blacklist.h b/arch/x86/kernel/fbt_blacklist.h -new file mode 100644 -index 000000000000..fd599859b47e ---- /dev/null -+++ b/arch/x86/kernel/fbt_blacklist.h -@@ -0,0 +1,92 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Functions used in die notifier chain calling. -+ */ -+BL_SENTRY(void *, notify_die) -+BL_DENTRY(void *, notifier_call_chain) -+BL_SENTRY(typeof(__atomic_notifier_call_chain), __atomic_notifier_call_chain) -+BL_SENTRY(typeof(atomic_notifier_call_chain), atomic_notifier_call_chain) -+BL_SENTRY(typeof(__raw_notifier_call_chain), __raw_notifier_call_chain) -+BL_SENTRY(typeof(raw_notifier_call_chain), raw_notifier_call_chain) -+BL_DENTRY(void *, hw_breakpoint_exceptions_notify) -+BL_DENTRY(void *, kprobe_exceptions_notify) -+ -+/* -+ * Functions used to update vtime in probe context. -+ */ -+BL_SENTRY(typeof(ktime_get_raw_fast_ns), ktime_get_raw_fast_ns) -+BL_DENTRY(void *, raw_read_seqcount) -+BL_DENTRY(void *, read_seqcount_retry) -+BL_DENTRY(void *, __read_seqcount_retry) -+ -+/* xen_clocksource */ -+BL_DENTRY(void *, xen_clocksource_get_cycles) -+BL_DENTRY(void *, xen_clocksource_read) -+BL_DENTRY(void *, pvclock_clocksource_read) -+BL_DENTRY(void *, pvclock_touch_watchdogs) -+BL_DENTRY(void *, touch_softlockup_watchdog_sync) -+BL_DENTRY(void *, clocksource_touch_watchdog) -+BL_DENTRY(void *, clocksource_resume_watchdog) -+BL_DENTRY(void *, reset_hung_task_detector) -+/* clocksource_tsc */ -+BL_DENTRY(void *, read_tsc) -+BL_DENTRY(void *, get_cycles) -+/* clocksource_hpet */ -+BL_DENTRY(void *, read_hpet) -+BL_DENTRY(void *, hpet_readl) -+/* kvm_clock */ -+BL_DENTRY(void *, kvm_clock_get_cycles) -+BL_DENTRY(void *, kvm_clock_read) -+ -+/* -+ * Functions used in trap handling. -+ */ -+BL_DENTRY(void *, fixup_exception) -+BL_DENTRY(void *, paranoid_entry) -+BL_DENTRY(void *, kgdb_ll_trap) -+BL_DENTRY(void *, error_entry) -+BL_DENTRY(void *, xen_int3) -+BL_DENTRY(void *, ftrace_int3_handler) -+BL_DENTRY(typeof(poke_int3_handler), poke_int3_handler) -+BL_DENTRY(void *, fixup_bad_iret) -+BL_DENTRY(void *, xen_adjust_exception_frame) -+BL_DENTRY(void *, paravirt_nop) -+BL_DENTRY(void *, ist_enter) -+BL_DENTRY(void *, rcu_nmi_enter) -+BL_DENTRY(void *, rcu_dynticks_curr_cpu_in_eqs) -+BL_DENTRY(void *, rcu_dynticks_eqs_exit) -+BL_DENTRY(void *, trace_rcu_dyntick) -+BL_DENTRY(void *, rcu_nmi_exit) -+BL_DENTRY(void *, rcu_dynticks_eqs_enter) -+BL_DENTRY(void *, ist_exit) -+ -+/* -+ * Functions used in page fault handling. -+ */ -+BL_SENTRY(void *, do_page_fault) -+BL_DENTRY(void *, __do_page_fault) -+BL_DENTRY(void *, huge_page_mask) -+BL_DENTRY(void *, mmap_address_hint_valid) -+BL_DENTRY(void *, vm_start_gap) -+BL_DENTRY(void *, hugetlb_get_unmapped_area_bottomup) -+BL_DENTRY(void *, hugetlb_get_unmapped_area_topdown) -+BL_DENTRY(void *, down_read_trylock) -+BL_DENTRY(void *, __get_user_pages_fast) -+BL_DENTRY(void *, gup_pud_range) -+BL_DENTRY(void *, gup_huge_pud) -+BL_DENTRY(void *, gup_pmd_range) -+BL_DENTRY(void *, gup_huge_pmd) -+BL_DENTRY(void *, gup_pte_range) -+BL_DENTRY(void *, pte_mfn_to_pfn) -+ -+/* -+ * Functions used under 4.12 idr_find -+ */ -+BL_DENTRY(void *, idr_find) -+BL_DENTRY(void *, find_next_bit) -+BL_DENTRY(void *, _find_next_bit) -+BL_DENTRY(void *, radix_tree_lookup) -+BL_DENTRY(void *, __radix_tree_lookup) -+BL_DENTRY(void *, radix_tree_load_root) -+BL_DENTRY(void *, radix_tree_descend) -+BL_DENTRY(void *, is_sibling_entry) -diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c -index 6efe0410fb72..4b81eb988a8c 100644 ---- a/arch/x86/kernel/kvm.c -+++ b/arch/x86/kernel/kvm.c -@@ -242,7 +242,7 @@ u32 kvm_read_and_reset_pf_reason(void) - EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason); - NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason); - --dotraplinkage void -+dotraplinkage int - do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address) - { - switch (kvm_read_and_reset_pf_reason()) { -@@ -259,6 +259,8 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned lon - rcu_irq_exit(); - break; - } -+ -+ return 0; - } - NOKPROBE_SYMBOL(do_async_page_fault); - -diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c -index 54c21d6abd5a..311c0d205773 100644 ---- a/arch/x86/kernel/nmi.c -+++ b/arch/x86/kernel/nmi.c -@@ -507,15 +507,15 @@ static bool notrace is_debug_stack(unsigned long addr) - NOKPROBE_SYMBOL(is_debug_stack); - #endif - --dotraplinkage notrace void -+dotraplinkage notrace int - do_nmi(struct pt_regs *regs, long error_code) - { - if (IS_ENABLED(CONFIG_SMP) && cpu_is_offline(smp_processor_id())) -- return; -+ return 0; - - if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) { - this_cpu_write(nmi_state, NMI_LATCHED); -- return; -+ return 0; - } - this_cpu_write(nmi_state, NMI_EXECUTING); - this_cpu_write(nmi_cr2, read_cr2()); -@@ -557,6 +557,7 @@ do_nmi(struct pt_regs *regs, long error_code) - - if (user_mode(regs)) - mds_user_clear_cpu_buffers(); -+ return 0; - } - NOKPROBE_SYMBOL(do_nmi); - -diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c -index 6ef00eb6fbb9..a8fa7281f274 100644 ---- a/arch/x86/kernel/traps.c -+++ b/arch/x86/kernel/traps.c -@@ -255,9 +255,11 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, - } - NOKPROBE_SYMBOL(do_trap); - --static void do_error_trap(struct pt_regs *regs, long error_code, char *str, -+static int do_error_trap(struct pt_regs *regs, long error_code, char *str, - unsigned long trapnr, int signr, int sicode, void __user *addr) - { -+ int ret; -+ - RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); - - /* -@@ -265,20 +267,21 @@ static void do_error_trap(struct pt_regs *regs, long error_code, char *str, - * notifier chain. - */ - if (!user_mode(regs) && fixup_bug(regs, trapnr)) -- return; -+ return 0; - -- if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) != -- NOTIFY_STOP) { -+ ret = notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr); -+ if ((ret & NOTIFY_STOP_MASK) != NOTIFY_STOP_MASK) { - cond_local_irq_enable(regs); - do_trap(trapnr, signr, str, regs, error_code, sicode, addr); - } -+ return notifier_to_errno(ret); - } - - #define IP ((void __user *)uprobe_get_trap_addr(regs)) - #define DO_ERROR(trapnr, signr, sicode, addr, str, name) \ --dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ -+dotraplinkage int do_##name(struct pt_regs *regs, long error_code) \ - { \ -- do_error_trap(regs, error_code, str, trapnr, signr, sicode, addr); \ -+ return do_error_trap(regs, error_code, str, trapnr, signr, sicode, addr); \ - } - - DO_ERROR(X86_TRAP_DE, SIGFPE, FPE_INTDIV, IP, "divide error", divide_error) -@@ -323,7 +326,7 @@ __visible void __noreturn handle_stack_overflow(const char *message, - * be lost. If, for some reason, we need to return to a context with modified - * regs, the shim code could be adjusted to synchronize the registers. - */ --dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2) -+dotraplinkage int do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2) - { - static const char str[] = "double fault"; - struct task_struct *tsk = current; -@@ -374,7 +377,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsign - regs->ip = (unsigned long)general_protection; - regs->sp = (unsigned long)&gpregs->orig_ax; - -- return; -+ return 0; - } - #endif - -@@ -432,18 +435,19 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsign - } - #endif - --dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) -+dotraplinkage int do_bounds(struct pt_regs *regs, long error_code) - { - RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); - if (notify_die(DIE_TRAP, "bounds", regs, error_code, - X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP) -- return; -+ return 0; - cond_local_irq_enable(regs); - - if (!user_mode(regs)) - die("bounds", regs, error_code); - - do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, 0, NULL); -+ return 0; - } - - enum kernel_gp_hint { -@@ -490,26 +494,26 @@ static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs, - - #define GPFSTR "general protection fault" - --dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code) -+dotraplinkage int do_general_protection(struct pt_regs *regs, long error_code) - { - char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR; - enum kernel_gp_hint hint = GP_NO_HINT; - struct task_struct *tsk; - unsigned long gp_addr; -- int ret; -+ int ret = 0; - - RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); - cond_local_irq_enable(regs); - - if (static_cpu_has(X86_FEATURE_UMIP)) { - if (user_mode(regs) && fixup_umip_exception(regs)) -- return; -+ return 0; - } - - if (v8086_mode(regs)) { - local_irq_enable(); - handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); -- return; -+ return 0; - } - - tsk = current; -@@ -521,11 +525,11 @@ dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code) - show_signal(tsk, SIGSEGV, "", desc, regs, error_code); - force_sig(SIGSEGV); - -- return; -+ return 0; - } - - if (fixup_exception(regs, X86_TRAP_GP, error_code, 0)) -- return; -+ return 0; - - tsk->thread.error_code = error_code; - tsk->thread.trap_nr = X86_TRAP_GP; -@@ -537,11 +541,11 @@ dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code) - if (!preemptible() && - kprobe_running() && - kprobe_fault_handler(regs, X86_TRAP_GP)) -- return; -+ return 0; - - ret = notify_die(DIE_GPF, desc, regs, error_code, X86_TRAP_GP, SIGSEGV); -- if (ret == NOTIFY_STOP) -- return; -+ if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK) -+ return notifier_to_errno(ret); - - if (error_code) - snprintf(desc, sizeof(desc), "segment-related " GPFSTR); -@@ -563,13 +567,16 @@ dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code) - - die_addr(desc, regs, error_code, gp_addr); - -+ return 0; - } - NOKPROBE_SYMBOL(do_general_protection); - --dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) -+dotraplinkage int notrace do_int3(struct pt_regs *regs, long error_code) - { -+ int ret; -+ - if (poke_int3_handler(regs)) -- return; -+ return 0; - - /* - * Use ist_enter despite the fact that we don't use an IST stack. -@@ -591,9 +598,13 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) - goto exit; - #endif - -- if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, -- SIGTRAP) == NOTIFY_STOP) -+ ret = notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, -+ SIGTRAP); -+ if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK) { -+ ret = notifier_to_errno(ret); - goto exit; -+ } else -+ ret = 0; - - cond_local_irq_enable(regs); - do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, 0, NULL); -@@ -601,6 +612,7 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) - - exit: - ist_exit(regs); -+ return ret; - } - NOKPROBE_SYMBOL(do_int3); - -@@ -697,7 +709,7 @@ static bool is_sysenter_singlestep(struct pt_regs *regs) - * - * May run on IST stack. - */ --dotraplinkage void do_debug(struct pt_regs *regs, long error_code) -+dotraplinkage int do_debug(struct pt_regs *regs, long error_code) - { - struct task_struct *tsk = current; - int user_icebp = 0; -@@ -798,6 +810,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) - - exit: - ist_exit(regs); -+ return 0; - } - NOKPROBE_SYMBOL(do_debug); - -@@ -846,26 +859,29 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) - (void __user *)uprobe_get_trap_addr(regs)); - } - --dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) -+dotraplinkage int do_coprocessor_error(struct pt_regs *regs, long error_code) - { - RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); - math_error(regs, error_code, X86_TRAP_MF); -+ return 0; - } - --dotraplinkage void -+dotraplinkage int - do_simd_coprocessor_error(struct pt_regs *regs, long error_code) - { - RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); - math_error(regs, error_code, X86_TRAP_XF); -+ return 0; - } - --dotraplinkage void -+dotraplinkage int - do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) - { - cond_local_irq_enable(regs); -+ return 0; - } - --dotraplinkage void -+dotraplinkage int - do_device_not_available(struct pt_regs *regs, long error_code) - { - unsigned long cr0 = read_cr0(); -@@ -880,7 +896,7 @@ do_device_not_available(struct pt_regs *regs, long error_code) - - info.regs = regs; - math_emulate(&info); -- return; -+ return 0; - } - #endif - -@@ -896,11 +912,12 @@ do_device_not_available(struct pt_regs *regs, long error_code) - */ - die("unexpected #NM exception", regs, error_code); - } -+ return 0; - } - NOKPROBE_SYMBOL(do_device_not_available); - - #ifdef CONFIG_X86_32 --dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) -+dotraplinkage int do_iret_error(struct pt_regs *regs, long error_code) - { - RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); - local_irq_enable(); -@@ -910,6 +927,7 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) - do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, - ILL_BADSTK, (void __user *)NULL); - } -+ return 0; - } - #endif - -diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c -index b1cdcf3518f3..abb38c84c1e5 100644 ---- a/arch/x86/mm/fault.c -+++ b/arch/x86/mm/fault.c -@@ -1548,7 +1548,14 @@ trace_page_fault_entries(struct pt_regs *regs, unsigned long error_code, - trace_page_fault_kernel(address, regs, error_code); - } - --dotraplinkage void -+/* -+ * We must have this function blacklisted from kprobes, tagged with notrace -+ * and call read_cr2() before calling anything else. To avoid calling any -+ * kind of tracing machinery before we've observed the CR2 value. -+ * -+ * exception_{enter,exit}() contains all sorts of tracepoints. -+ */ -+dotraplinkage int notrace - do_page_fault(struct pt_regs *regs, unsigned long hw_error_code, - unsigned long address) - { -@@ -1556,12 +1563,13 @@ do_page_fault(struct pt_regs *regs, unsigned long hw_error_code, - trace_page_fault_entries(regs, hw_error_code, address); - - if (unlikely(kmmio_fault(regs, address))) -- return; -+ return 0; - - /* Was the fault on kernel-controlled part of the address space? */ - if (unlikely(fault_in_kernel_space(address))) - do_kern_addr_fault(regs, hw_error_code, address); - else - do_user_addr_fault(regs, hw_error_code, address); -+ return 0; - } - NOKPROBE_SYMBOL(do_page_fault); -diff --git a/include/linux/dtrace_fbt.h b/include/linux/dtrace_fbt.h -new file mode 100644 -index 000000000000..d11e273cee31 ---- /dev/null -+++ b/include/linux/dtrace_fbt.h -@@ -0,0 +1,48 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ *Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+#ifndef _LINUX_DTRACE_FBT_H -+#define _LINUX_DTRACE_FBT_H -+ -+#include <linux/module.h> -+#include <asm/dtrace_arch.h> -+ -+extern unsigned long dtrace_fbt_nfuncs __attribute__((weak)); -+ -+/* -+ * Prototype for callback function that handles the actual creation of FBT -+ * probes. -+ * -+ * Arguments to pass: -+ * - Pointer to module the probe will belong to -+ * - function name -+ * - probe type (FBT_ENTRY or FBT_RETURN) -+ * - probe subtype (arch-specific) -+ * - address (location of the probe) -+ * - offset from the function start -+ * - return value from previous callback invocation -+ * - cookie passed to dtrace_fbt_init -+ * Returns: -+ * - generic pointer (only to be used to pass back in) -+ */ -+#define FBT_ENTRY 0 -+#define FBT_RETURN 1 -+ -+typedef void *(*fbt_add_probe_fn)(struct module *, char *, int, int, -+ asm_instr_t *, uintptr_t, void *, void *); -+extern void dtrace_fbt_init(fbt_add_probe_fn, struct module *, void *); -+ -+/* -+ * Dynamic blacklist routines. -+ */ -+struct dt_fbt_bl_entry; -+ -+extern struct dt_fbt_bl_entry *dtrace_fbt_bl_add(unsigned long, const char *); -+extern struct dt_fbt_bl_entry *dtrace_fbt_bl_first(void); -+extern struct dt_fbt_bl_entry *dtrace_fbt_bl_next(struct dt_fbt_bl_entry *); -+extern unsigned long dtrace_fbt_bl_entry_addr(struct dt_fbt_bl_entry *); -+extern const char *dtrace_fbt_bl_entry_name(struct dt_fbt_bl_entry *); -+ -+#endif /* _LINUX_DTRACE_FBT_H */ -diff --git a/kernel/dtrace/Kconfig b/kernel/dtrace/Kconfig -index 6bf6620981cd..1f070e49c69f 100644 ---- a/kernel/dtrace/Kconfig -+++ b/kernel/dtrace/Kconfig -@@ -55,6 +55,13 @@ config DT_SDT_PERF - Provides the perf provider, containing a DTrace probe for each - perf-events tracepoint in the system. - -+config DT_FBT -+ tristate "Function boundary tracing" -+ default m -+ select FTRACE -+ help -+ Provides function boundary tracing for functions in the kernel. -+ - config DT_SYSTRACE - tristate "System Call Tracing" - default m -diff --git a/kernel/dtrace/Makefile b/kernel/dtrace/Makefile -index 06329cbe52cb..0e5fb34b7b47 100644 ---- a/kernel/dtrace/Makefile -+++ b/kernel/dtrace/Makefile -@@ -4,11 +4,11 @@ - - DT_CORE_ARCH_OBJS = $(addprefix ../../arch/$(SRCARCH)/kernel/, \ - dtrace_syscall.o dtrace_syscall_stubs.o \ -- dtrace_sdt.o dtrace_util.o) -+ dtrace_fbt.o dtrace_sdt.o dtrace_util.o) - - ifdef CONFIG_DT_CORE - obj-y += cyclic.o dtrace_os.o dtrace_cpu.o \ -- dtrace_sdt_core.o \ -+ dtrace_sdt_core.o dtrace_fbt_core.o \ - dtrace_task.o dtrace_psinfo.o \ - $(DT_CORE_ARCH_OBJS) - endif -diff --git a/kernel/dtrace/dtrace_fbt_core.c b/kernel/dtrace/dtrace_fbt_core.c -new file mode 100644 -index 000000000000..67182a3b13fc ---- /dev/null -+++ b/kernel/dtrace/dtrace_fbt_core.c -@@ -0,0 +1,125 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_fbt_core.c -+ * DESCRIPTION: DTrace - FBT common code -+ * -+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/kernel.h> -+#include <linux/kallsyms.h> -+#include <linux/rbtree.h> -+#include <linux/slab.h> -+#include <linux/dtrace_fbt.h> -+ -+struct dt_fbt_bl_entry { -+ struct rb_node dfbe_node; -+ unsigned long dfbe_addr; -+ const char *dfbe_name; -+}; -+ -+static struct rb_root dt_fbt_root = RB_ROOT; -+ -+struct dt_fbt_bl_entry * -+dtrace_fbt_bl_add(unsigned long addr, const char *name) -+{ -+ struct rb_node **p = &dt_fbt_root.rb_node; -+ struct rb_node *parent = NULL; -+ struct dt_fbt_bl_entry *entry; -+ -+ /* -+ * If no address was given, we need to do a symbol name lookup: -+ * - If no symbol name was given, we cannot add anything. -+ * - If the lookup failed, we cannot add anything. -+ */ -+ if (addr == 0) { -+ if (name == NULL) -+ return NULL; -+ -+ addr = kallsyms_lookup_name(name); -+ -+ if (addr == 0) -+ return NULL; -+ } -+ -+ /* Find place in the tree. */ -+ while (*p) { -+ parent = *p; -+ entry = rb_entry(parent, struct dt_fbt_bl_entry, dfbe_node); -+ -+ if (addr > entry->dfbe_addr) -+ p = &parent->rb_right; -+ else if (addr < entry->dfbe_addr) -+ p = &parent->rb_left; -+ else -+ return NULL; /* no duplicates please */ -+ } -+ -+ /* Create a new blacklist entry. */ -+ entry = kmalloc(sizeof(*entry), GFP_KERNEL); -+ if (entry == NULL) -+ return NULL; -+ -+ entry->dfbe_name = name; -+ entry->dfbe_addr = addr; -+ -+ /* Update the tree. */ -+ rb_link_node(&entry->dfbe_node, parent, p); -+ rb_insert_color(&entry->dfbe_node, &dt_fbt_root); -+ -+ return entry; -+} -+ -+/* -+ * Iterators for blacklisted symbols. The iteration happens in sort order by -+ * virtual memory address. Symbols with pending resolution are inored. -+ */ -+struct dt_fbt_bl_entry * -+dtrace_fbt_bl_first(void) -+{ -+ struct rb_node *node = rb_first(&dt_fbt_root); -+ -+ if (node == NULL) -+ return (NULL); -+ -+ return rb_entry(node, struct dt_fbt_bl_entry, dfbe_node); -+} -+ -+struct dt_fbt_bl_entry * -+dtrace_fbt_bl_next(struct dt_fbt_bl_entry *entry) -+{ -+ struct rb_node *node = rb_next(&entry->dfbe_node); -+ -+ if (node == NULL) -+ return (NULL); -+ -+ return rb_entry(node, struct dt_fbt_bl_entry, dfbe_node); -+} -+ -+unsigned long -+dtrace_fbt_bl_entry_addr(struct dt_fbt_bl_entry *entry) -+{ -+ if (entry == NULL) -+ return (0); -+ -+ return entry->dfbe_addr; -+} -+ -+const char * -+dtrace_fbt_bl_entry_name(struct dt_fbt_bl_entry *entry) -+{ -+ if (entry == NULL) -+ return (NULL); -+ -+ return entry->dfbe_name; -+} -diff --git a/kernel/dtrace/dtrace_os.c b/kernel/dtrace/dtrace_os.c -index 7140fd64cb7a..355bf1b6edc3 100644 ---- a/kernel/dtrace/dtrace_os.c -+++ b/kernel/dtrace/dtrace_os.c -@@ -18,6 +18,7 @@ - - #include <linux/binfmts.h> - #include <linux/dtrace_cpu.h> -+#include <linux/dtrace_fbt.h> - #include <linux/dtrace_os.h> - #include <linux/dtrace_sdt.h> - #include <linux/fs.h> -@@ -103,6 +104,7 @@ void __init dtrace_os_init(void) - dtrace_kmod->core_layout.size = 0x2000000; - #endif - -+ dtrace_kmod->num_ftrace_callsites = dtrace_fbt_nfuncs; - dtrace_kmod->state = MODULE_STATE_LIVE; - atomic_inc(&dtrace_kmod->refcnt); - -diff --git a/kernel/kprobes.c b/kernel/kprobes.c -index 2625c241ac00..bbae12ce6cf2 100644 ---- a/kernel/kprobes.c -+++ b/kernel/kprobes.c -@@ -36,6 +36,10 @@ - #include <linux/cpu.h> - #include <linux/jump_label.h> - -+#ifdef CONFIG_DTRACE -+#include <linux/dtrace_fbt.h> -+#endif -+ - #include <asm/sections.h> - #include <asm/cacheflush.h> - #include <asm/errno.h> -@@ -2152,6 +2156,10 @@ int kprobe_add_ksym_blacklist(unsigned long entry) - !kallsyms_lookup_size_offset(entry, &size, &offset)) - return -EINVAL; - -+#ifdef CONFIG_DTRACE -+ dtrace_fbt_bl_add(entry, NULL); -+#endif -+ - ent = kmalloc(sizeof(*ent), GFP_KERNEL); - if (!ent) - return -ENOMEM; --- -2.27.0 - diff --git a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0015-dtrace-fbt-provider-modular-components.patch b/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0015-dtrace-fbt-provider-modular-components.patch deleted file mode 100644 index b5b556cc4d98..000000000000 --- a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0015-dtrace-fbt-provider-modular-components.patch +++ /dev/null @@ -1,683 +0,0 @@ -From af8b0b492b62d442b7fe1d6845a25faf78bae717 Mon Sep 17 00:00:00 2001 -From: Kris Van Hees <kris.van.hees@oracle.com> -Date: Mon, 19 Nov 2018 18:12:44 +0000 -Subject: [PATCH 15/20] dtrace: fbt provider, modular components - -This uses the fbt machinery added in the previous commit. - -Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com> -Signed-off-by: Nick Alcock <nick.alcock@oracle.com> -Signed-off-by: Tomas Jedlicka <tomas.jedlicka@oracle.com> -Signed-off-by: Eugene Loh <eugene.loh@oracle.com> -Signed-off-by: David Mc Lean <david.mclean@oracle.com> -Signed-off-by: Vincent Lim <vincent.lim@oracle.com> ---- - arch/x86/dtrace/Makefile.arch | 2 + - arch/x86/dtrace/fbt_x86_64.c | 156 ++++++++++++ - arch/x86/dtrace/include/dtrace/fbt_arch.h | 42 ++++ - dtrace/Makefile | 2 + - dtrace/fbt_dev.c | 281 ++++++++++++++++++++++ - dtrace/fbt_impl.h | 52 ++++ - dtrace/fbt_mod.c | 56 +++++ - 7 files changed, 591 insertions(+) - create mode 100644 arch/x86/dtrace/fbt_x86_64.c - create mode 100644 arch/x86/dtrace/include/dtrace/fbt_arch.h - create mode 100644 dtrace/fbt_dev.c - create mode 100644 dtrace/fbt_impl.h - create mode 100644 dtrace/fbt_mod.c - -diff --git a/arch/x86/dtrace/Makefile.arch b/arch/x86/dtrace/Makefile.arch -index e4655557e06a..906fa8c7e17c 100644 ---- a/arch/x86/dtrace/Makefile.arch -+++ b/arch/x86/dtrace/Makefile.arch -@@ -8,8 +8,10 @@ ccflags-y += -I$(srctree)/arch/x86/dtrace/include -Idtrace - - dtrace-obj += dtrace_asm_x86_64.o dtrace_isa_x86_64.o - fasttrap-obj += fasttrap_x86_64.o -+fbt-obj += fbt_x86_64.o - sdt-obj += sdt_x86_64.o - - dtrace-y += $(addprefix $(DTARCHDIR)/, $(dtrace-obj)) - fasttrap-y += $(addprefix $(DTARCHDIR)/, $(fasttrap-obj)) -+fbt-y += $(addprefix $(DTARCHDIR)/, $(fbt-obj)) - sdt-y += $(addprefix $(DTARCHDIR)/, $(sdt-obj)) -diff --git a/arch/x86/dtrace/fbt_x86_64.c b/arch/x86/dtrace/fbt_x86_64.c -new file mode 100644 -index 000000000000..a8a05292be7e ---- /dev/null -+++ b/arch/x86/dtrace/fbt_x86_64.c -@@ -0,0 +1,156 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: fbt_x86_64.c -+ * DESCRIPTION: DTrace - FBT provider implementation for x86 -+ * -+ * Copyright (c) 2010, 2018, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/dtrace_fbt.h> -+#include <linux/vmalloc.h> -+#include <asm/dtrace_util.h> -+ -+#include "dtrace.h" -+#include "dtrace_dev.h" -+#include "fbt_impl.h" -+ -+/* -+ * Use 0xf0 (LOCK Prefix) and X86_TRAP_UD for Invalid Opcode traps to be used. -+ * Use 0xcc (INT 3) and X86_TRAP_BP for Breakpoint traps to be used. -+ */ -+#define FBT_ENTRY_PATCHVAL 0xcc -+#define FBT_ENTRY_TRAP X86_TRAP_BP -+#define FBT_RETURN_PATCHVAL 0xcc -+#define FBT_RETURN_TRAP X86_TRAP_BP -+ -+static uint8_t fbt_invop(struct pt_regs *regs) -+{ -+ struct fbt_probe *fbp = fbt_probetab[FBT_ADDR2NDX(regs->ip)]; -+ -+ for (; fbp != NULL; fbp = fbp->fbp_hashnext) { -+ if ((uintptr_t)fbp->fbp_patchpoint == regs->ip) { -+ struct pt_regs *old = this_cpu_core->cpu_dtrace_regs; -+ -+ this_cpu_core->cpu_dtrace_regs = regs; -+ if (fbp->fbp_roffset == 0) { -+ dtrace_probe(fbp->fbp_id, regs->di, regs->si, -+ regs->dx, regs->cx, regs->r8, -+ regs->r9, 0); -+ } else { -+ dtrace_probe(fbp->fbp_id, fbp->fbp_roffset, -+ regs->ax, 0, 0, 0, 0, 0); -+ } -+ -+ this_cpu_core->cpu_dtrace_regs = old; -+ -+ return fbp->fbp_rval; -+ } -+ } -+ -+ return 0; -+} -+ -+uint64_t fbt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, -+ int aframes) -+{ -+ struct pt_regs *regs = this_cpu_core->cpu_dtrace_regs; -+ uint64_t *st; -+ uint64_t val; -+ -+ if (regs == NULL) -+ return 0; -+ -+ switch (argno) { -+ case 0: -+ return regs->di; -+ case 1: -+ return regs->si; -+ case 2: -+ return regs->dx; -+ case 3: -+ return regs->cx; -+ case 4: -+ return regs->r8; -+ case 5: -+ return regs->r9; -+ } -+ -+ ASSERT(argno > 5); -+ -+ st = (uint64_t *)regs->sp; -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); -+ /* -+ * Skip the topmost slot of the stack because that holds the return -+ * address for the call to the function we are entering. At this point -+ * the BP has not been pushed yet, so we are still working within the -+ * caller's stack frame. -+ */ -+ val = st[1 + argno - 6]; -+ DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); -+ -+ return val; -+} -+ -+void fbt_provide_probe_arch(struct fbt_probe *fbp, int probetype, int stype) -+{ -+ fbp->fbp_patchval = probetype == FBT_ENTRY ? FBT_ENTRY_PATCHVAL -+ : FBT_RETURN_PATCHVAL; -+ fbp->fbp_savedval = *fbp->fbp_patchpoint; -+ fbp->fbp_rval = probetype == FBT_ENTRY ? DTRACE_INVOP_PUSH_BP -+ : DTRACE_INVOP_RET; -+} -+ -+int fbt_can_patch_return_arch(asm_instr_t *addr) -+{ -+ return 1; -+} -+ -+int fbt_provide_module_arch(void *arg, struct module *mp) -+{ -+ return 1; -+} -+ -+void fbt_destroy_module(void *arg, struct module *mp) -+{ -+} -+ -+void fbt_enable_arch(struct fbt_probe *fbp, dtrace_id_t id, void *arg) -+{ -+ dtrace_invop_enable(fbp->fbp_patchpoint, fbp->fbp_patchval); -+} -+ -+void fbt_disable_arch(struct fbt_probe *fbp, dtrace_id_t id, void *arg) -+{ -+ dtrace_invop_disable(fbp->fbp_patchpoint, fbp->fbp_savedval); -+} -+ -+int fbt_dev_init_arch(void) -+{ -+ fbt_probetab_mask = fbt_probetab_size - 1; -+ fbt_probetab = dtrace_vzalloc_try(fbt_probetab_size * -+ sizeof(struct fbt_probe *)); -+ -+ if (fbt_probetab == NULL) -+ return -ENOMEM; -+ -+ return dtrace_invop_add(fbt_invop); -+} -+ -+void fbt_dev_exit_arch(void) -+{ -+ vfree(fbt_probetab); -+ fbt_probetab_mask = 0; -+ fbt_probetab_size = 0; -+ -+ dtrace_invop_remove(fbt_invop); -+} -diff --git a/arch/x86/dtrace/include/dtrace/fbt_arch.h b/arch/x86/dtrace/include/dtrace/fbt_arch.h -new file mode 100644 -index 000000000000..7e90b2b75bba ---- /dev/null -+++ b/arch/x86/dtrace/include/dtrace/fbt_arch.h -@@ -0,0 +1,42 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Dynamic Tracing for Linux - FBT Implementation defines -+ * -+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _X86_64_FBT_ARCH_H -+#define _X86_64_FBT_ARCH_H -+ -+#define FBT_AFRAMES 7 -+ -+struct fbt_probe { -+ char *fbp_name; /* name of probe */ -+ dtrace_id_t fbp_id; /* probe ID */ -+ struct module *fbp_module; /* defining module */ -+ int fbp_primary; /* non-zero if primary mod */ -+ asm_instr_t *fbp_patchpoint;/* patch point */ -+ asm_instr_t fbp_patchval; /* instruction to patch */ -+ asm_instr_t fbp_savedval; /* saved instruction value */ -+ uintptr_t fbp_roffset; /* relative offset */ -+ int fbp_rval; -+ struct fbt_probe *fbp_next; /* next probe */ -+ struct fbt_probe *fbp_hashnext; /* next on hash */ -+}; -+ -+#endif /* _X86_64_FBT_ARCH_H */ -diff --git a/dtrace/Makefile b/dtrace/Makefile -index c7e3fc512a6c..35b8b098123f 100644 ---- a/dtrace/Makefile -+++ b/dtrace/Makefile -@@ -4,6 +4,7 @@ - - obj-$(CONFIG_DT_CORE) += dtrace.o - obj-$(CONFIG_DT_FASTTRAP) += fasttrap.o -+obj-$(CONFIG_DT_FBT) += fbt.o - obj-$(CONFIG_DT_PROFILE) += profile.o - obj-$(CONFIG_DT_SDT) += sdt.o - obj-$(CONFIG_DT_SYSTRACE) += systrace.o -@@ -19,6 +20,7 @@ dtrace-y := dtrace_mod.o dtrace_dev.o \ - dtrace_ptofapi.o dtrace_predicate.o \ - dtrace_spec.o dtrace_state.o dtrace_util.o - fasttrap-y := fasttrap_mod.o fasttrap_dev.o -+fbt-y := fbt_mod.o fbt_dev.o - profile-y := profile_mod.o profile_dev.o - sdt-y := sdt_mod.o sdt_dev.o - systrace-y := systrace_mod.o systrace_dev.o -diff --git a/dtrace/fbt_dev.c b/dtrace/fbt_dev.c -new file mode 100644 -index 000000000000..281fd749aae7 ---- /dev/null -+++ b/dtrace/fbt_dev.c -@@ -0,0 +1,281 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: fbt_dev.c -+ * DESCRIPTION: DTrace - FBT provider device driver -+ * -+ * Copyright (c) 2010, 2018, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/fs.h> -+#include <linux/miscdevice.h> -+#include <linux/kallsyms.h> -+#include <linux/slab.h> -+#include <linux/vmalloc.h> -+#include <linux/dtrace_fbt.h> -+ -+#include "dtrace.h" -+#include "dtrace_dev.h" -+#include "fbt_impl.h" -+ -+#define FBT_PROBETAB_SIZE 0x8000 /* 32k entries -- 128K total */ -+ -+struct fbt_probe **fbt_probetab; -+int fbt_probetab_size = FBT_PROBETAB_SIZE; -+int fbt_probetab_mask; -+ -+static void *fbt_provide_probe(struct module *mp, char *func, int probetype, -+ int stype, asm_instr_t *addr, uintptr_t off, -+ void *pfbt, void *arg) -+{ -+ struct fbt_probe *fbp; -+ struct fbt_probe *prev; -+ int *skipped = arg; -+ -+ switch (probetype) { -+ case FBT_ENTRY: -+ fbp = kzalloc(sizeof(struct fbt_probe), GFP_KERNEL); -+ fbp->fbp_name = kstrdup(func, GFP_KERNEL); -+ if (fbp->fbp_name == NULL) -+ goto err_probe; -+ -+ fbp->fbp_id = dtrace_probe_create(fbt_id, mp->name, func, -+ "entry", FBT_AFRAMES, fbp); -+ if (fbp->fbp_id == DTRACE_IDNONE) -+ goto err_name; -+ -+ fbp->fbp_module = mp; -+ fbp->fbp_primary = 1; /* FIXME */ -+ fbp->fbp_roffset = off; -+ fbp->fbp_patchpoint = addr; -+ fbt_provide_probe_arch(fbp, probetype, stype); -+ -+ fbp->fbp_hashnext = fbt_probetab[FBT_ADDR2NDX(fbp->fbp_patchpoint)]; -+ fbt_probetab[FBT_ADDR2NDX(fbp->fbp_patchpoint)] = fbp; -+ -+ PDATA(mp)->fbt_probe_cnt++; -+ -+ return fbp; -+ case FBT_RETURN: -+ -+ /* Check if we are able to patch this return probe. */ -+ if (!fbt_can_patch_return_arch(addr)) -+ return pfbt; -+ -+ fbp = kzalloc(sizeof(struct fbt_probe), GFP_KERNEL); -+ fbp->fbp_name = kstrdup(func, GFP_KERNEL); -+ if (fbp->fbp_name == NULL) -+ goto err_probe; -+ -+ prev = (struct fbt_probe *)pfbt; -+ if (prev != NULL) { -+ prev->fbp_next = fbp; -+ fbp->fbp_id = prev->fbp_id; -+ } else { -+ fbp->fbp_id = dtrace_probe_create(fbt_id, mp->name, -+ func, "return", -+ FBT_AFRAMES, fbp); -+ if (fbp->fbp_id == DTRACE_IDNONE) -+ goto err_name; -+ } -+ -+ fbp->fbp_module = mp; -+ fbp->fbp_primary = 1; /* FIXME */ -+ fbp->fbp_roffset = off; -+ fbp->fbp_patchpoint = addr; -+ fbt_provide_probe_arch(fbp, probetype, stype); -+ -+ fbp->fbp_hashnext = fbt_probetab[FBT_ADDR2NDX(fbp->fbp_patchpoint)]; -+ fbt_probetab[FBT_ADDR2NDX(fbp->fbp_patchpoint)] = fbp; -+ -+ PDATA(mp)->fbt_probe_cnt++; -+ -+ return fbp; -+ default: -+ pr_info("FBT: Invalid probe type %d (%d) for %s\n", -+ probetype, stype, func); -+ -+ return NULL; -+ } -+ -+err_name: -+ kfree(fbp->fbp_name); -+err_probe: -+ kfree(fbp); -+ (*skipped)++; -+ -+ return NULL; -+} -+ -+void fbt_provide_module(void *arg, struct module *mp) -+{ -+ struct module_use *use; -+ int probes_skipped = 0; -+ -+ /* If module setup has failed then do not provide anything. */ -+ if (PDATA(mp) == NULL) -+ return; -+ -+ /* -+ * Nothing to do if the module FBT probes were already created. -+ */ -+ if (PDATA(mp)->fbt_probe_cnt != 0) -+ return; -+ -+ /* -+ * Do not try to instrument DTrace itself and its modules: -+ * - dtrace module -+ * - all modules depending on dtrace -+ */ -+ if (!strncmp(mp->name, "dtrace", 7)) -+ return; -+ -+ list_for_each_entry(use, &mp->target_list, target_list) { -+ if (!strncmp(use->target->name, "dtrace", 7)) -+ return; -+ } -+ -+ /* -+ * Provide probes. -+ */ -+ if (!fbt_provide_module_arch(arg, mp)) -+ return; -+ -+ dtrace_fbt_init((fbt_add_probe_fn)fbt_provide_probe, mp, -+ &probes_skipped); -+ -+ if (probes_skipped != 0) -+ pr_warn("fbt: Failed to provide %d probes in %s (out of memory)\n", -+ probes_skipped, mp->name); -+} -+ -+int fbt_enable(void *arg, dtrace_id_t id, void *parg) -+{ -+ struct fbt_probe *fbp = parg; -+ struct fbt_probe *curr; -+ -+ /* -+ * Ensure that we have a reference to the module. -+ */ -+ if (!try_module_get(fbp->fbp_module)) -+ return -EAGAIN; -+ -+ /* -+ * If at least one other enabled probe exists for this module, drop the -+ * reference we took above, because we only need one to prevent the -+ * module from being unloaded. -+ */ -+ PDATA(fbp->fbp_module)->enabled_cnt++; -+ if (PDATA(fbp->fbp_module)->enabled_cnt > 1) -+ module_put(fbp->fbp_module); -+ -+ for (curr = fbp; curr != NULL; curr = curr->fbp_next) -+ fbt_enable_arch(curr, id, arg); -+ -+ return 0; -+} -+ -+void fbt_disable(void *arg, dtrace_id_t id, void *parg) -+{ -+ struct fbt_probe *fbp = parg; -+ struct fbt_probe *curr; -+ -+ for (curr = fbp; curr != NULL; curr = curr->fbp_next) -+ fbt_disable_arch(curr, id, arg); -+ -+ /* -+ * If we are disabling a probe, we know it was enabled, and therefore -+ * we know that we have a reference on the module to prevent it from -+ * being unloaded. If we disable the last probe on the module, we can -+ * drop the reference. -+ */ -+ PDATA(fbp->fbp_module)->enabled_cnt--; -+ if (PDATA(fbp->fbp_module)->enabled_cnt == 0) -+ module_put(fbp->fbp_module); -+} -+ -+void fbt_destroy(void *arg, dtrace_id_t id, void *parg) -+{ -+ struct fbt_probe *fbp = parg; -+ struct fbt_probe *hbp, *lst, *nxt; -+ int ndx; -+ struct module *mp = fbp->fbp_module; -+ -+ do { -+ nxt = fbp->fbp_next; -+ -+ ndx = FBT_ADDR2NDX(fbp->fbp_patchpoint); -+ lst = NULL; -+ hbp = fbt_probetab[ndx]; -+ -+ while (hbp != fbp) { -+ ASSERT(hbp != NULL); -+ -+ lst = hbp; -+ hbp = hbp->fbp_hashnext; -+ } -+ -+ if (lst != NULL) -+ lst->fbp_hashnext = fbp->fbp_hashnext; -+ else -+ fbt_probetab[ndx] = fbp->fbp_hashnext; -+ -+ kfree(fbp->fbp_name); -+ kfree(fbp); -+ -+ PDATA(mp)->fbt_probe_cnt--; -+ -+ fbp = nxt; -+ } while (fbp != NULL); -+} -+ -+static int fbt_open(struct inode *inode, struct file *file) -+{ -+ return -EAGAIN; -+} -+ -+static int fbt_close(struct inode *inode, struct file *file) -+{ -+ return 0; -+} -+ -+static const struct file_operations fbt_fops = { -+ .owner = THIS_MODULE, -+ .open = fbt_open, -+ .release = fbt_close, -+}; -+ -+static struct miscdevice fbt_dev = { -+ .minor = DT_DEV_FBT_MINOR, -+ .name = "fbt", -+ .nodename = "dtrace/provider/fbt", -+ .fops = &fbt_fops, -+}; -+ -+int fbt_dev_init(void) -+{ -+ int ret = 0; -+ -+ ret = misc_register(&fbt_dev); -+ if (ret) -+ pr_err("%s: Can't register misc device %d\n", -+ fbt_dev.name, fbt_dev.minor); -+ -+ return fbt_dev_init_arch(); -+} -+ -+void fbt_dev_exit(void) -+{ -+ fbt_dev_exit_arch(); -+ -+ misc_deregister(&fbt_dev); -+} -diff --git a/dtrace/fbt_impl.h b/dtrace/fbt_impl.h -new file mode 100644 -index 000000000000..85f83e704988 ---- /dev/null -+++ b/dtrace/fbt_impl.h -@@ -0,0 +1,52 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Dynamic Tracing for Linux - Function Boundary Tracing provider -+ * -+ * Copyright (c) 2010, 2017, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#ifndef _FBT_H_ -+#define _FBT_H_ -+ -+#include <asm/dtrace_arch.h> -+#include <dtrace/fbt_arch.h> -+ -+#define FBT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & \ -+ fbt_probetab_mask) -+ -+extern struct fbt_probe **fbt_probetab; -+extern int fbt_probetab_size; -+extern int fbt_probetab_mask; -+ -+extern void fbt_provide_probe_arch(struct fbt_probe *, int, int); -+extern void fbt_enable_arch(struct fbt_probe *, dtrace_id_t, void *); -+extern void fbt_disable_arch(struct fbt_probe *, dtrace_id_t, void *); -+extern int fbt_can_patch_return_arch(asm_instr_t *); -+ -+extern int fbt_provide_module_arch(void *, struct module *); -+extern void fbt_provide_module(void *, struct module *); -+extern void fbt_destroy_module(void *, struct module *); -+extern int fbt_enable(void *, dtrace_id_t, void *); -+extern void fbt_disable(void *, dtrace_id_t, void *); -+extern uint64_t fbt_getarg(void *, dtrace_id_t, void *, int, int); -+extern void fbt_destroy(void *, dtrace_id_t, void *); -+ -+extern dtrace_provider_id_t fbt_id; -+ -+extern int fbt_dev_init_arch(void); -+extern void fbt_dev_exit_arch(void); -+ -+extern int fbt_dev_init(void); -+extern void fbt_dev_exit(void); -+ -+#endif /* _FBT_H_ */ -diff --git a/dtrace/fbt_mod.c b/dtrace/fbt_mod.c -new file mode 100644 -index 000000000000..3da13d71809d ---- /dev/null -+++ b/dtrace/fbt_mod.c -@@ -0,0 +1,56 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: fbt_mod.c -+ * DESCRIPTION: DTrace - FBT provider kernel module -+ * -+ * Copyright (c) 2010, 2017, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/module.h> -+ -+#include "dtrace.h" -+#include "dtrace_dev.h" -+#include "fbt_impl.h" -+ -+MODULE_AUTHOR("Kris Van Hees (kris.van.hees@oracle.com)"); -+MODULE_DESCRIPTION("Function Boundary Tracing"); -+MODULE_VERSION("v0.1"); -+MODULE_LICENSE("GPL"); -+ -+static const struct dtrace_pattr fbt_attr = { -+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, -+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, -+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, -+}; -+ -+static struct dtrace_pops fbt_pops = { -+ .dtps_provide = NULL, -+ .dtps_provide_module = fbt_provide_module, -+ .dtps_destroy_module = fbt_destroy_module, -+ .dtps_enable = fbt_enable, -+ .dtps_disable = fbt_disable, -+ .dtps_suspend = NULL, -+ .dtps_resume = NULL, -+ .dtps_getargdesc = NULL, -+#ifdef CONFIG_X86_64 -+ .dtps_getargval = fbt_getarg, -+#else -+ .dtps_getargval = NULL, -+#endif -+ .dtps_usermode = NULL, -+ .dtps_destroy = fbt_destroy -+}; -+ -+DT_PROVIDER_MODULE(fbt, DTRACE_PRIV_KERNEL) --- -2.27.0 - diff --git a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0016-dtrace-arm-arm64-port.patch b/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0016-dtrace-arm-arm64-port.patch deleted file mode 100644 index 16d3b40c9af9..000000000000 --- a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0016-dtrace-arm-arm64-port.patch +++ /dev/null @@ -1,2726 +0,0 @@ -From 138ff7e69a8dbffa44b34e5efb0a7cb1fc19ee8d Mon Sep 17 00:00:00 2001 -From: Kris Van Hees <kris.van.hees@oracle.com> -Date: Thu, 8 Nov 2018 18:57:33 +0000 -Subject: [PATCH 16/20] dtrace, arm: arm64 port - -This provides an arm64 implementation of DTrace. - -Signed-off-by: Nick Alcock <nick.alcock@oracle.com> -Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com> -Signed-off-by: Tomas Jedlicka <tomas.jedlicka@oracle.com> -Signed-off-by: Eugene Loh <eugene.loh@oracle.com> -Signed-off-by: David Mc Lean <david.mclean@oracle.com> -Signed-off-by: Vincent Lim <vincent.lim@oracle.com> ---- - arch/arm64/Kconfig | 3 + - arch/arm64/dtrace/Makefile.arch | 17 + - arch/arm64/dtrace/dtrace_asm_arm64.S | 51 +++ - arch/arm64/dtrace/dtrace_isa_arm64.c | 164 +++++++ - arch/arm64/dtrace/fasttrap_arm64.c | 282 ++++++++++++ - arch/arm64/dtrace/fbt_arm64.c | 152 +++++++ - .../dtrace/include/dtrace/fasttrap_arch.h | 30 ++ - arch/arm64/dtrace/include/dtrace/fbt_arch.h | 53 +++ - arch/arm64/dtrace/include/dtrace/sdt_arch.h | 28 ++ - arch/arm64/dtrace/sdt_arm64.c | 122 +++++ - arch/arm64/include/asm/brk-imm.h | 6 + - arch/arm64/include/asm/cpu.h | 1 + - arch/arm64/include/asm/debug-monitors.h | 4 + - arch/arm64/include/asm/dtrace_arch.h | 31 ++ - arch/arm64/include/asm/dtrace_cpuinfo.h | 13 + - arch/arm64/include/asm/dtrace_sdt_arch.h | 15 + - arch/arm64/include/asm/dtrace_syscall.h | 3 + - arch/arm64/include/asm/dtrace_syscall_types.h | 11 + - arch/arm64/include/asm/dtrace_util.h | 14 + - arch/arm64/include/asm/kdebug.h | 11 + - arch/arm64/include/asm/syscall.h | 8 +- - arch/arm64/kernel/dtrace_fbt.c | 187 ++++++++ - arch/arm64/kernel/dtrace_sdt.c | 25 ++ - arch/arm64/kernel/dtrace_syscall.c | 89 ++++ - arch/arm64/kernel/dtrace_syscall_stubs.S | 0 - arch/arm64/kernel/dtrace_util.c | 292 ++++++++++++ - arch/arm64/kernel/entry-common.c | 5 +- - arch/arm64/kernel/entry.S | 55 ++- - arch/arm64/kernel/fbt_blacklist.h | 91 ++++ - arch/arm64/kernel/probes/uprobes.c | 3 +- - arch/arm64/kernel/sys.c | 2 +- - arch/arm64/kvm/hyp/hyp-entry.S | 6 + - arch/arm64/mm/fault.c | 18 + - include/linux/uprobes.h | 1 + - kernel/events/uprobes.c | 10 + - scripts/dtrace_sdt_arm64.sh | 425 ++++++++++++++++++ - scripts/link-vmlinux.sh | 15 +- - 37 files changed, 2229 insertions(+), 14 deletions(-) - create mode 100644 arch/arm64/dtrace/Makefile.arch - create mode 100644 arch/arm64/dtrace/dtrace_asm_arm64.S - create mode 100644 arch/arm64/dtrace/dtrace_isa_arm64.c - create mode 100644 arch/arm64/dtrace/fasttrap_arm64.c - create mode 100644 arch/arm64/dtrace/fbt_arm64.c - create mode 100644 arch/arm64/dtrace/include/dtrace/fasttrap_arch.h - create mode 100644 arch/arm64/dtrace/include/dtrace/fbt_arch.h - create mode 100644 arch/arm64/dtrace/include/dtrace/sdt_arch.h - create mode 100644 arch/arm64/dtrace/sdt_arm64.c - create mode 100644 arch/arm64/include/asm/dtrace_arch.h - create mode 100644 arch/arm64/include/asm/dtrace_cpuinfo.h - create mode 100644 arch/arm64/include/asm/dtrace_sdt_arch.h - create mode 100644 arch/arm64/include/asm/dtrace_syscall.h - create mode 100644 arch/arm64/include/asm/dtrace_syscall_types.h - create mode 100644 arch/arm64/include/asm/dtrace_util.h - create mode 100644 arch/arm64/include/asm/kdebug.h - create mode 100644 arch/arm64/kernel/dtrace_fbt.c - create mode 100644 arch/arm64/kernel/dtrace_sdt.c - create mode 100644 arch/arm64/kernel/dtrace_syscall.c - create mode 100644 arch/arm64/kernel/dtrace_syscall_stubs.S - create mode 100644 arch/arm64/kernel/dtrace_util.c - create mode 100644 arch/arm64/kernel/fbt_blacklist.h - create mode 100755 scripts/dtrace_sdt_arm64.sh - -diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig -index 0b30e884e088..ade00e80c7d5 100644 ---- a/arch/arm64/Kconfig -+++ b/arch/arm64/Kconfig -@@ -302,6 +302,9 @@ config PGTABLE_LEVELS - config ARCH_SUPPORTS_UPROBES - def_bool y - -+config ARCH_SUPPORTS_DTRACE -+ def_bool y -+ - config ARCH_PROC_KCORE_TEXT - def_bool y - -diff --git a/arch/arm64/dtrace/Makefile.arch b/arch/arm64/dtrace/Makefile.arch -new file mode 100644 -index 000000000000..393b5cea3f7c ---- /dev/null -+++ b/arch/arm64/dtrace/Makefile.arch -@@ -0,0 +1,17 @@ -+# -+# Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. -+# -+ -+DTARCHDIR = ../arch/arm64/dtrace -+ -+ccflags-y += -I$(srctree)/arch/arm64/dtrace/include -Idtrace -+ -+dtrace-obj += dtrace_asm_arm64.o dtrace_isa_arm64.o -+fasttrap-obj += fasttrap_arm64.o -+fbt-obj += fbt_arm64.o -+sdt-obj += sdt_arm64.o -+ -+dtrace-y += $(addprefix $(DTARCHDIR)/, $(dtrace-obj)) -+fasttrap-y += $(addprefix $(DTARCHDIR)/, $(fasttrap-obj)) -+fbt-y += $(addprefix $(DTARCHDIR)/, $(fbt-obj)) -+sdt-y += $(addprefix $(DTARCHDIR)/, $(sdt-obj)) -diff --git a/arch/arm64/dtrace/dtrace_asm_arm64.S b/arch/arm64/dtrace/dtrace_asm_arm64.S -new file mode 100644 -index 000000000000..beee9d7149de ---- /dev/null -+++ b/arch/arm64/dtrace/dtrace_asm_arm64.S -@@ -0,0 +1,51 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Dynamic Tracing for Linux - ARM64 specific assembly -+ * -+ * Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/linkage.h> -+ -+ENTRY(dtrace_caller) -+ mov x0, #-1 -+ ret -+ENDPROC(dtrace_caller) -+ -+ENTRY(dtrace_copy) -+ ret -+ENDPROC(dtrace_copy) -+ -+ENTRY(dtrace_copystr) -+ ret -+ENDPROC(dtrace_copystr) -+ -+ENTRY(dtrace_fuword8_nocheck) -+ ldrb w0, [x0] -+ ret -+ENDPROC(dtrace_fuword8_nocheck) -+ -+ENTRY(dtrace_fuword16_nocheck) -+ ldrh w0, [x0] -+ ret -+ENDPROC(dtrace_fuword16_nocheck) -+ -+ENTRY(dtrace_fuword32_nocheck) -+ ldr w0, [x0] -+ ret -+ENDPROC(dtrace_fuword32_nocheck) -+ -+ENTRY(dtrace_fuword64_nocheck) -+ ldr x0, [x0] -+ ret -+ENDPROC(dtrace_fuword64_nocheck) -diff --git a/arch/arm64/dtrace/dtrace_isa_arm64.c b/arch/arm64/dtrace/dtrace_isa_arm64.c -new file mode 100644 -index 000000000000..2eb530149c5b ---- /dev/null -+++ b/arch/arm64/dtrace/dtrace_isa_arm64.c -@@ -0,0 +1,164 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_isa_arm64.c -+ * DESCRIPTION: DTrace - arm64 architecture specific support functions -+ * -+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <asm/stacktrace.h> -+#include <linux/ptrace.h> -+ -+#include "dtrace.h" -+ -+uintptr_t _userlimit = 0x0000ffffffffffffLL; -+ -+void dtrace_copyin_arch(uintptr_t uaddr, uintptr_t kaddr, size_t size, -+ volatile uint16_t *flags) -+{ -+} -+ -+void dtrace_copyinstr_arch(uintptr_t uaddr, uintptr_t kaddr, size_t size, -+ volatile uint16_t *flags) -+{ -+} -+ -+void dtrace_copyout(uintptr_t uaddr, uintptr_t kaddr, size_t size, -+ volatile uint16_t *flags) -+{ -+} -+ -+void dtrace_copyoutstr(uintptr_t uaddr, uintptr_t kaddr, size_t size, -+ volatile uint16_t *flags) -+{ -+} -+ -+#define DTRACE_FUWORD(bits) \ -+ uint##bits##_t dtrace_fuword##bits(void *uaddr) \ -+ { \ -+ extern uint##bits##_t dtrace_fuword##bits##_nocheck(void *);\ -+ \ -+ if ((uintptr_t)uaddr > _userlimit) { \ -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); \ -+ this_cpu_core->cpuc_dtrace_illval = (uintptr_t)uaddr; \ -+ } \ -+ \ -+ return dtrace_fuword##bits##_nocheck(uaddr); \ -+ } -+ -+DTRACE_FUWORD(8) -+DTRACE_FUWORD(16) -+DTRACE_FUWORD(32) -+DTRACE_FUWORD(64) -+ -+static int dtrace_unwind_frame(struct task_struct *task, -+ struct stackframe *frame) -+{ -+ unsigned long fp = frame->fp; -+ -+ if (fp & 0xf) -+ return -EINVAL; -+ -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); -+ frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp)); -+ frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8)); -+ DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); -+ -+ if (!frame->fp && !frame->pc) -+ return -EINVAL; -+ -+ return 0; -+} -+ -+uint64_t dtrace_getarg(int argno, int aframes) -+{ -+ uint64_t *st; -+ uint64_t val; -+ int i; -+ struct stackframe frame; -+ struct task_struct *task = current; -+ -+ if (argno < 7) -+ return 0; -+ -+ if (this_cpu_core->cpu_dtrace_regs) -+ st = (uint64_t *)this_cpu_core->cpu_dtrace_regs->regs[29]; -+ else { -+ frame.fp = (unsigned long)__builtin_frame_address(0); -+ frame.pc = (unsigned long)dtrace_getarg; -+ -+ aframes += 1; /* Count this function. */ -+ for (i = 0; i < aframes; i++) { -+ if (dtrace_unwind_frame(task, &frame) < 0) -+ break; -+ } -+ -+ /* -+ * If we cannot traverse the expected number of stack frames, -+ * there is something wrong with the stack. -+ */ -+ if (i < aframes) { -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_BADSTACK); -+ -+ return 0; -+ } -+ -+ st = (uint64_t *)frame.fp; -+ } -+ -+ /* -+ * The first 7 arguments (arg0 through arg6) are passed in registers -+ * to dtrace_probe(). The remaining arguments (arg7 through arg9) are -+ * passed on the stack. -+ * -+ * Stack layout: -+ * bp[0] = pushed fp from caller -+ * bp[1] = return address -+ * bp[2] = 8th argument (arg7 -> argno = 7) -+ * bp[3] = 9th argument (arg8 -> argno = 8) -+ * ... -+ */ -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); -+ val = READ_ONCE_NOCHECK(st[2 + (argno - 7)]); -+ DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); -+ -+ return val; -+} -+ -+ulong_t dtrace_getreg(struct task_struct *task, uint_t reg) -+{ -+ struct pt_regs *rp = task_pt_regs(task); -+ -+ return regs_get_register(rp, reg * sizeof(uint64_t)); -+} -+ -+void pdata_init(struct dtrace_module *pdata, struct module *mp) -+{ -+ /* -+ * Throw away existing data as we don't support reusal at -+ * the moment. -+ */ -+ if (mp->pdata != NULL) -+ pdata_cleanup(pdata, mp); -+ -+ pdata->sdt_tab = NULL; -+ pdata->fbt_tab = NULL; -+} -+ -+void pdata_cleanup(struct dtrace_module *pdata, struct module *mp) -+{ -+ if (pdata->sdt_tab != NULL) -+ dtrace_free_text(pdata->sdt_tab); -+ if (pdata->fbt_tab != NULL) -+ dtrace_free_text(pdata->fbt_tab); -+} -diff --git a/arch/arm64/dtrace/fasttrap_arm64.c b/arch/arm64/dtrace/fasttrap_arm64.c -new file mode 100644 -index 000000000000..cc970a11412c ---- /dev/null -+++ b/arch/arm64/dtrace/fasttrap_arm64.c -@@ -0,0 +1,282 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: fasttrap_arm64.c -+ * DESCRIPTION: DTrace - fasttrap provider implementation for arm64 -+ * -+ * Copyright (c) 2010, 2018, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <asm/insn.h> -+#include <linux/kernel.h> -+#include <linux/slab.h> -+#include <linux/uaccess.h> -+ -+#include "dtrace.h" -+#include "dtrace_dev.h" -+#include "fasttrap_impl.h" -+ -+static int has_jump_table(const asm_instr_t *addr, size_t size) -+{ -+ const asm_instr_t *end = addr + size; -+ -+ while (addr < end) { -+ /* -+ * If we encounter a branch-to-register instruction, we assume -+ * it is part of a jump table implementation. -+ */ -+ if (aarch64_insn_is_br(addr[0])) -+ return 1; -+ -+ addr++; -+ } -+ -+ return 0; -+} -+ -+static uint64_t *fasttrap_all_offsets(asm_instr_t *text, size_t size, -+ uint64_t *np) -+{ -+ uint64_t *offs = NULL; -+ uint64_t noffs; -+ asm_instr_t *instr; -+ asm_instr_t *end; -+ -+ /* -+ * Two passes are taken through this section of code. The first time -+ * around we merely count the number of probe points. The second time, -+ * we actually record their locations. -+ */ -+again: -+ noffs = 0; -+ instr = text; -+ end = text + size; -+ -+ while (instr < end) { -+ if (offs) -+ offs[noffs] = (uint64_t) -+ ((uintptr_t)instr - (uintptr_t)text); -+ noffs++; -+ -+ instr++; -+ } -+ -+ if (offs == NULL) { -+ /* -+ * No matching offsets found - we are done. -+ */ -+ if (noffs == 0) -+ goto fail; -+ -+ /* -+ * We know how many tracepoint locations there are for this -+ * probe, so allocate a member to record them, and kick off the -+ * second pass. -+ */ -+ offs = kmalloc(sizeof(uint64_t) * noffs, GFP_KERNEL); -+ if (!offs) -+ goto fail; -+ -+ goto again; -+ } -+ -+ *np = noffs; -+ -+ return offs; -+ -+fail: -+ *np = 0; -+ kfree(offs); -+ -+ return NULL; -+} -+ -+uint64_t *fasttrap_glob_offsets(struct fasttrap_probe_spec *probe, -+ uint64_t *np) -+{ -+ size_t size = probe->ftps_size; -+ asm_instr_t *text = NULL; -+ asm_instr_t *instr; -+ asm_instr_t *end; -+ uint64_t *offs = NULL; -+ uint64_t noffs; -+ int ret = 0; -+ char ostr[sizeof(instr) * 2 + 1]; /* 2 chars / byte + 1 */ -+ -+ if (!IS_ALIGNED(size, sizeof(instr[0]))) -+ goto fail; -+ -+ text = kmalloc(size, GFP_KERNEL); -+ if (!text) -+ goto fail; -+ -+ ret = dtrace_copy_code(probe->ftps_pid, (uint8_t *)text, -+ probe->ftps_pc, size); -+ if (ret != 0) -+ goto fail; -+ -+ /* -+ * From this point on, size will be a count of instructions rather than -+ * a byte count. We already verified earlier on that it is a multiple -+ * of the instruction size. -+ */ -+ size /= sizeof(instr[0]); -+ -+ if (has_jump_table(text, size)) -+ goto fail; -+ -+ if (probe->ftps_glen == 1 && probe->ftps_gstr[0] == '*') { -+ offs = fasttrap_all_offsets(text, size, &noffs); -+ goto out; -+ } -+ -+ /* -+ * Two passes are taken through this section of code. The first time -+ * around we merely count the number of probe points. The second time, -+ * we actually record their locations. -+ */ -+again: -+ noffs = 0; -+ instr = text; -+ end = text + size; -+ -+ while (instr < end) { -+ uint64_t off = (uint64_t) -+ ((uintptr_t)instr - (uintptr_t)text); -+ -+ snprintf(ostr, sizeof(ostr), "%llx", off); -+ if (dtrace_gmatch(ostr, probe->ftps_gstr)) { -+ if (offs) -+ offs[noffs] = off; -+ noffs++; -+ } -+ -+ instr++; -+ } -+ -+ if (offs == NULL) { -+ /* -+ * No matching offsets found - we are done. -+ */ -+ if (noffs == 0) -+ goto fail; -+ -+ /* -+ * We know how many tracepoint locations there are for this -+ * probe, so allocate member to record them, and kick off the -+ * second pass. -+ */ -+ offs = kmalloc(sizeof(uint64_t) * noffs, GFP_KERNEL); -+ if (!offs) -+ goto fail; -+ -+ goto again; -+ } -+ -+out: -+ kfree(text); -+ -+ *np = noffs; -+ -+ return offs; -+ -+fail: -+ kfree(offs); -+ kfree(text); -+ -+ *np = 0; -+ return NULL; -+} -+ -+uint64_t fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno, -+ int aframes) -+{ -+ struct pt_regs *regs = this_cpu_core->cpu_dtrace_regs; -+ uint64_t *st; -+ uint64_t val; -+ -+ if (regs == NULL) -+ return 0; -+ -+ if (argno < 8) -+ return regs->regs[argno]; -+ -+ pagefault_disable(); -+ st = (uint64_t *)regs->sp; -+ __copy_from_user_inatomic_nocache(&val, (void *)&st[argno - 8], -+ sizeof(st[0])); -+ pagefault_enable(); -+ -+ return val; -+} -+ -+uint64_t fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, -+ int argno, int aframes) -+{ -+ return fasttrap_pid_getarg(arg, id, parg, argno, aframes); -+} -+ -+static void fasttrap_map_args(struct fasttrap_probe *probe, -+ struct pt_regs *regs, int argc, uintptr_t *argv) -+{ -+ int i, x, cap = min(argc, (int)probe->ftp_nargs); -+ uintptr_t *st = (uintptr_t *)regs->sp; -+ -+ for (i = 0; i < cap; i++) { -+ x = probe->ftp_argmap[i]; -+ -+ if (x < 8) -+ argv[i] = regs->regs[x]; -+ else { -+ pagefault_disable(); -+ __copy_from_user_inatomic_nocache(&argv[i], -+ (void *)&st[x - 8], -+ sizeof(st[0])); -+ pagefault_enable(); -+ } -+ } -+ -+ while (i < argc) -+ argv[i++] = 0; -+} -+ -+void fasttrap_pid_probe_arch(struct fasttrap_probe *ftp, struct pt_regs *regs) -+{ -+ if (ftp->ftp_argmap == NULL) { -+ dtrace_probe(ftp->ftp_id, regs->regs[0], regs->regs[1], -+ regs->regs[2], regs->regs[3], -+ regs->regs[4], regs->regs[5], -+ regs->regs[6]); -+ } else { -+ uintptr_t t[7]; -+ -+ fasttrap_map_args(ftp, regs, sizeof(t) / sizeof(t[0]), t); -+ dtrace_probe(ftp->ftp_id, t[0], t[1], t[2], t[3], -+ t[4], t[5], t[6]); -+ } -+} -+ -+void fasttrap_pid_retprobe_arch(struct fasttrap_probe *ftp, -+ struct pt_regs *regs) -+{ -+ /* -+ * FIXME: The first argument to the probe should be the offset in the -+ * function that the return occurred at, but uprobes doesn't give -+ * us that information (or so it seems). -+ */ -+ dtrace_probe(ftp->ftp_id, 0, regs->regs[0], regs->regs[1], 0, 0, 0, 0); -+} -+ -+void fasttrap_set_enabled(struct pt_regs *regs) -+{ -+ regs->regs[0] = 1; -+} -diff --git a/arch/arm64/dtrace/fbt_arm64.c b/arch/arm64/dtrace/fbt_arm64.c -new file mode 100644 -index 000000000000..be9dcaf8db28 ---- /dev/null -+++ b/arch/arm64/dtrace/fbt_arm64.c -@@ -0,0 +1,152 @@ -+/* -+ * FILE: fbt_arm64.c -+ * DESCRIPTION: DTrace - FBT provider implementation for arm64 -+ * -+ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/dtrace_fbt.h> -+#include <linux/ptrace.h> -+#include <linux/vmalloc.h> -+#include <asm/dtrace_util.h> -+#include <asm/debug-monitors.h> -+ -+#include "dtrace.h" -+#include "dtrace_dev.h" -+#include "fbt_impl.h" -+ -+static int fbt_brk_hook(struct pt_regs *regs, unsigned int esr) -+{ -+ uintptr_t ip = instruction_pointer(regs); -+ struct fbt_probe *fbp = fbt_probetab[FBT_ADDR2NDX(ip)]; -+ -+ for (; fbp != NULL; fbp = fbp->fbp_hashnext) { -+ if ((uintptr_t)fbp->fbp_patchpoint == ip) { -+ struct pt_regs *oregs; -+ -+ oregs = this_cpu_core->cpu_dtrace_regs; -+ this_cpu_core->cpu_dtrace_regs = regs; -+ -+ if (fbp->fbp_roffset == 0) { -+ dtrace_probe(fbp->fbp_id, regs->regs[0], -+ regs->regs[1], regs->regs[2], -+ regs->regs[3], regs->regs[4], -+ regs->regs[5], regs->regs[6]); -+ } else { -+ dtrace_probe(fbp->fbp_id, fbp->fbp_roffset, -+ regs->regs[0], 0, 0, 0, 0, 0); -+ } -+ -+ this_cpu_core->cpu_dtrace_regs = oregs; -+ -+ return DBG_HOOK_HANDLED; -+ } -+ } -+ -+ return DBG_HOOK_ERROR; -+} -+ -+uint64_t fbt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, -+ int aframes) -+{ -+ struct pt_regs *regs = this_cpu_core->cpu_dtrace_regs; -+ uint64_t *st; -+ uint64_t val; -+ -+ if (regs == NULL) -+ regs = current_pt_regs(); -+ -+ if (argno < 8) -+ return regs->regs[argno]; -+ -+ /* -+ * Arguments are passed by register for the first 8 arguments, and the -+ * rest is placed on the stack. The frame pointer (fp) points at the -+ * beginning of the current frame, and the stack pointer (sp) will -+ * point to the end of the frame. Arguments passed by stack are placed -+ * in stack slots at the end of the frame, so at (sp), (sp + 1), etc... -+ */ -+ st = (uint64_t *)regs->sp; -+ -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); -+ val = READ_ONCE_NOCHECK(st[argno - 8]); -+ DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); -+ -+ return 0; -+} -+ -+void fbt_provide_probe_arch(struct fbt_probe *fbp, int type, int stype) -+{ -+ fbp->fbp_patchval = type == FBT_ENTRY ? BRK64_OPCODE_DPROBE_FBE -+ : BRK64_OPCODE_DPROBE_FBR; -+ fbp->fbp_savedval = dtrace_text_peek(fbp->fbp_patchpoint); -+} -+ -+int fbt_can_patch_return_arch(asm_instr_t *addr) -+{ -+ return 1; -+} -+ -+int fbt_provide_module_arch(void *arg, struct module *mp) -+{ -+ return 1; -+} -+ -+void fbt_destroy_module(void *arg, struct module *mp) -+{ -+} -+ -+void fbt_enable_arch(struct fbt_probe *fbp, dtrace_id_t id, void *arg) -+{ -+ dtrace_text_poke(fbp->fbp_patchpoint, fbp->fbp_patchval); -+} -+ -+void fbt_disable_arch(struct fbt_probe *fbp, dtrace_id_t id, void *arg) -+{ -+ dtrace_text_poke(fbp->fbp_patchpoint, fbp->fbp_savedval); -+} -+ -+static struct break_hook dtrace_fbe_break_hook = { -+ .imm = DPROBES_FBE_BRK_IMM, -+ .fn = fbt_brk_hook, -+}; -+ -+static struct break_hook dtrace_fbr_break_hook = { -+ .imm = DPROBES_FBR_BRK_IMM, -+ .fn = fbt_brk_hook, -+}; -+ -+int fbt_dev_init_arch(void) -+{ -+ fbt_probetab_mask = fbt_probetab_size - 1; -+ fbt_probetab = dtrace_vzalloc_try(fbt_probetab_size * -+ sizeof(struct fbt_probe *)); -+ -+ if (fbt_probetab == NULL) -+ return -ENOMEM; -+ -+ dtrace_kernel_brk_start(&dtrace_fbe_break_hook); -+ dtrace_kernel_brk_start(&dtrace_fbr_break_hook); -+ -+ return 0; -+} -+ -+void fbt_dev_exit_arch(void) -+{ -+ dtrace_kernel_brk_stop(&dtrace_fbr_break_hook); -+ dtrace_kernel_brk_stop(&dtrace_fbe_break_hook); -+ -+ vfree(fbt_probetab); -+ fbt_probetab_mask = 0; -+ fbt_probetab_size = 0; -+} -diff --git a/arch/arm64/dtrace/include/dtrace/fasttrap_arch.h b/arch/arm64/dtrace/include/dtrace/fasttrap_arch.h -new file mode 100644 -index 000000000000..d5ffa6e711db ---- /dev/null -+++ b/arch/arm64/dtrace/include/dtrace/fasttrap_arch.h -@@ -0,0 +1,30 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Dynamic Tracing for Linux - Fasttrap provider implementation defines -+ * -+ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _ARM64_FASTTRAP_ARCH_H -+#define _ARM64_FASTTRAP_ARCH_H -+ -+#define FASTTRAP_ENTRY_AFRAMES 8 -+#define FASTTRAP_RETURN_AFRAMES 8 -+#define FASTTRAP_OFFSET_AFRAMES 8 -+ -+#endif /* _ARM64_FASTTRAP_ARCH_H */ -diff --git a/arch/arm64/dtrace/include/dtrace/fbt_arch.h b/arch/arm64/dtrace/include/dtrace/fbt_arch.h -new file mode 100644 -index 000000000000..ed1cd785b3ba ---- /dev/null -+++ b/arch/arm64/dtrace/include/dtrace/fbt_arch.h -@@ -0,0 +1,53 @@ -+/* -+ * Dynamic Tracing for Linux - FBT Implementation defines -+ * -+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _ARM64_FBT_ARCH_H -+#define _ARM64_FBT_ARCH_H -+ -+/* -+ * FBT entry probes are triggered from a breakpoint. The following stack trace -+ * illustrates the frames that are involved in the call sequence prior to the -+ * actual FBT provider handler. -+ * -+ * vmlinux`brk_handler+0x70 <- to be skipped -+ * vmlinux`do_debug_exception+0x9c <- to be skipped -+ * vmlinux`el1_sync+0x1d8 <- to be skipped -+ * vmlinux`SyS_read+0x4 -+ * -+ * Therefore, 3 frames need to be skipped. -+ */ -+#define FBT_AFRAMES 3 -+ -+struct fbt_probe { -+ char *fbp_name; /* name of probe */ -+ dtrace_id_t fbp_id; /* probe ID */ -+ struct module *fbp_module; /* defining module */ -+ int fbp_primary; /* non-zero if primary mod */ -+ asm_instr_t *fbp_patchpoint;/* patch point */ -+ asm_instr_t fbp_patchval; /* instruction to patch */ -+ asm_instr_t fbp_savedval; /* saved instruction value */ -+ uint64_t fbp_roffset; /* relative offset */ -+ struct fbt_probe *fbp_next; /* next probe */ -+ struct fbt_probe *fbp_hashnext; /* next on hash */ -+ int fbp_isret; -+}; -+ -+#endif /* _ARM64_FBT_ARCH_H */ -diff --git a/arch/arm64/dtrace/include/dtrace/sdt_arch.h b/arch/arm64/dtrace/include/dtrace/sdt_arch.h -new file mode 100644 -index 000000000000..237922a77495 ---- /dev/null -+++ b/arch/arm64/dtrace/include/dtrace/sdt_arch.h -@@ -0,0 +1,28 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Dynamic Tracing for Linux - SDT Implementation defines -+ * -+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+/* -+ * Note: The contents of this file are private to the implementation of the -+ * DTrace subsystem and are subject to change at any time without notice. -+ */ -+ -+#ifndef _ARM64_SDT_ARCH_H -+#define _ARM64_SDT_ARCH_H -+ -+#define SDT_AFRAMES 1 -+ -+#endif /* _ARM64_SDT_ARCH_H */ -diff --git a/arch/arm64/dtrace/sdt_arm64.c b/arch/arm64/dtrace/sdt_arm64.c -new file mode 100644 -index 000000000000..ba25824e413e ---- /dev/null -+++ b/arch/arm64/dtrace/sdt_arm64.c -@@ -0,0 +1,122 @@ -+/* -+ * FILE: sdt_arm64.c -+ * DESCRIPTION: DTrace - SDT provider implementation for arm64 -+ * -+ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include <linux/ptrace.h> -+#include <linux/sdt.h> -+#include <asm/debug-monitors.h> -+ -+#include "dtrace.h" -+#include "dtrace_dev.h" -+#include "sdt_impl.h" -+ -+static int sdt_brk_hook(struct pt_regs *regs, unsigned int esr) -+{ -+ uintptr_t ip = instruction_pointer(regs); -+ struct sdt_probe *sdt = sdt_probetab[SDT_ADDR2NDX(ip)]; -+ -+ for (; sdt != NULL; sdt = sdt->sdp_hashnext) { -+ if ((uintptr_t)sdt->sdp_patchpoint == ip) { -+ if (sdt->sdp_ptype == SDTPT_IS_ENABLED) -+ regs->regs[0] = 1; -+ else { -+ this_cpu_core->cpu_dtrace_regs = regs; -+ dtrace_probe(sdt->sdp_id, regs->regs[0], -+ regs->regs[1], regs->regs[2], -+ regs->regs[3], regs->regs[4], -+ regs->regs[5], regs->regs[6]); -+ this_cpu_core->cpu_dtrace_regs = NULL; -+ } -+ -+ instruction_pointer_set(regs, -+ instruction_pointer(regs) + 4); -+ -+ return DBG_HOOK_HANDLED; -+ } -+ } -+ -+ return DBG_HOOK_ERROR; -+} -+ -+void sdt_provide_probe_arch(struct sdt_probe *sdp, struct module *mp, int idx) -+{ -+ sdp->sdp_patchval = BRK64_OPCODE_DPROBE_SDT; -+ sdp->sdp_savedval = dtrace_text_peek(sdp->sdp_patchpoint); -+} -+ -+int sdt_provide_module_arch(void *arg, struct module *mp) -+{ -+ return 1; -+} -+ -+void sdt_destroy_module(void *arg, struct module *mp) -+{ -+} -+ -+void sdt_enable_arch(struct sdt_probe *sdp, dtrace_id_t id, void *arg) -+{ -+ dtrace_text_poke(sdp->sdp_patchpoint, sdp->sdp_patchval); -+} -+ -+void sdt_disable_arch(struct sdt_probe *sdp, dtrace_id_t id, void *arg) -+{ -+ dtrace_text_poke(sdp->sdp_patchpoint, sdp->sdp_savedval); -+} -+ -+static struct break_hook dtrace_sdt_break_hook = { -+ .imm = DPROBES_SDT_BRK_IMM, -+ .fn = sdt_brk_hook, -+}; -+ -+int sdt_dev_init_arch(void) -+{ -+ dtrace_kernel_brk_start(&dtrace_sdt_break_hook); -+ return 0; -+} -+ -+void sdt_dev_exit_arch(void) -+{ -+ dtrace_kernel_brk_stop(&dtrace_sdt_break_hook); -+} -+ -+uint64_t sdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, -+ int aframes) -+{ -+ struct pt_regs *regs = this_cpu_core->cpu_dtrace_regs; -+ uint64_t *st; -+ uint64_t val; -+ -+ if (regs == NULL) -+ regs = current_pt_regs(); -+ -+ if (argno < 8) -+ return regs->regs[argno]; -+ -+ /* -+ * Arguments are passed by register for the first 8 arguments, and the -+ * rest is placed on the stack. The frame pointer (fp) points at the -+ * beginning of the current frame, and the stack pointer (sp) will -+ * point to the end of the frame. Arguments passed by stack are placed -+ * in stack slots at the end of the frame, so at (sp), (sp + 1), etc... -+ */ -+ st = (uint64_t *)regs->sp; -+ -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); -+ val = READ_ONCE_NOCHECK(st[argno - 8]); -+ DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); -+ -+ return val; -+} -diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h -index e3d47b52161d..0bf18e702bbc 100644 ---- a/arch/arm64/include/asm/brk-imm.h -+++ b/arch/arm64/include/asm/brk-imm.h -@@ -10,6 +10,9 @@ - * #imm16 values used for BRK instruction generation - * 0x004: for installing kprobes - * 0x005: for installing uprobes -+ * 0x006: for installing DTrace SDT probes -+ * 0x007: for installing DTrace function-boundary tracing entry probes -+ * 0x008: for installing DTrace function-boundary tracing return probes - * Allowed values for kgdb are 0x400 - 0x7ff - * 0x100: for triggering a fault on purpose (reserved) - * 0x400: for dynamic BRK instruction -@@ -19,6 +22,9 @@ - */ - #define KPROBES_BRK_IMM 0x004 - #define UPROBES_BRK_IMM 0x005 -+#define DPROBES_SDT_BRK_IMM 0x006 -+#define DPROBES_FBE_BRK_IMM 0x007 -+#define DPROBES_FBR_BRK_IMM 0x008 - #define FAULT_BRK_IMM 0x100 - #define KGDB_DYN_DBG_BRK_IMM 0x400 - #define KGDB_COMPILED_DBG_BRK_IMM 0x401 -diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h -index b4a40535a3d8..b9bbfe1e4780 100644 ---- a/arch/arm64/include/asm/cpu.h -+++ b/arch/arm64/include/asm/cpu.h -@@ -56,6 +56,7 @@ struct cpuinfo_arm64 { - }; - - DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data); -+#define cpu_data(cpu) per_cpu(cpu_data, (cpu)) - - void cpuinfo_store_cpu(void); - void __init cpuinfo_store_boot_cpu(void); -diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h -index 7619f473155f..d5f27c8c7bf2 100644 ---- a/arch/arm64/include/asm/debug-monitors.h -+++ b/arch/arm64/include/asm/debug-monitors.h -@@ -55,6 +55,10 @@ - #define BRK64_OPCODE_KPROBES (AARCH64_BREAK_MON | (KPROBES_BRK_IMM << 5)) - /* uprobes BRK opcodes with ESR encoding */ - #define BRK64_OPCODE_UPROBES (AARCH64_BREAK_MON | (UPROBES_BRK_IMM << 5)) -+/* DTrace probes BRK opcodes with ESR encoding */ -+#define BRK64_OPCODE_DPROBE_SDT (AARCH64_BREAK_MON | (DPROBES_SDT_BRK_IMM << 5)) -+#define BRK64_OPCODE_DPROBE_FBE (AARCH64_BREAK_MON | (DPROBES_FBE_BRK_IMM << 5)) -+#define BRK64_OPCODE_DPROBE_FBR (AARCH64_BREAK_MON | (DPROBES_FBR_BRK_IMM << 5)) - - /* AArch32 */ - #define DBG_ESR_EVT_BKPT 0x4 -diff --git a/arch/arm64/include/asm/dtrace_arch.h b/arch/arm64/include/asm/dtrace_arch.h -new file mode 100644 -index 000000000000..89f883e20aa7 ---- /dev/null -+++ b/arch/arm64/include/asm/dtrace_arch.h -@@ -0,0 +1,31 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. */ -+ -+#ifndef _ASM_ARM64_DTRACE_ARCH_H -+#define _ASM_ARM64_DTRACE_ARCH_H -+ -+/* Number of argumens stored inside the mstate. */ -+#define DTRACE_MSTATE_ARGS_MAX 7 -+ -+typedef uint32_t asm_instr_t; -+ -+typedef int (*prov_exit_f)(void); -+ -+/* -+ * Structure to hold DTrace specific information about modules (including the -+ * core kernel module). Note that each module (and the main kernel) already -+ * has three fields that relate to probing: -+ * - sdt_probes: description of SDT probes in the module -+ * - sdt_probec: number of SDT probes in the module -+ * - pdata: pointer to a dtrace_module struct (for DTrace) -+ */ -+struct dtrace_module { -+ int enabled_cnt; -+ size_t sdt_probe_cnt; -+ asm_instr_t *sdt_tab; -+ size_t fbt_probe_cnt; -+ asm_instr_t *fbt_tab; -+ prov_exit_f prov_exit; /* Called with module_mutex held */ -+}; -+ -+#endif /* _ASM_ARM64_DTRACE_ARCH_H */ -diff --git a/arch/arm64/include/asm/dtrace_cpuinfo.h b/arch/arm64/include/asm/dtrace_cpuinfo.h -new file mode 100644 -index 000000000000..4e0ab793c92c ---- /dev/null -+++ b/arch/arm64/include/asm/dtrace_cpuinfo.h -@@ -0,0 +1,13 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* Copyright (C) 2018 Oracle, Inc. */ -+ -+#ifndef _ASM_ARM64_DTRACE_CPUINFO_H_ -+#define _ASM_ARM64_DTRACE_CPUINFO_H_ -+ -+#include <asm/cpu.h> -+ -+typedef struct cpuinfo_arm64 cpuinfo_arch_t; -+ -+#define dtrace_cpuinfo_chip(ci) ((ci)->cpu.node_id) -+ -+#endif /* _ASM_ARM64_DTRACE_CPUINFO_H_ */ -diff --git a/arch/arm64/include/asm/dtrace_sdt_arch.h b/arch/arm64/include/asm/dtrace_sdt_arch.h -new file mode 100644 -index 000000000000..b93a03c215b3 ---- /dev/null -+++ b/arch/arm64/include/asm/dtrace_sdt_arch.h -@@ -0,0 +1,15 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* Copyright (C) 2018 Oracle, Inc. */ -+ -+#ifndef _ASM_ARM64_DTRACE_SDT_ARCH_H -+#define _ASM_ARM64_DTRACE_SDT_ARCH_H -+ -+#include <asm/dtrace_arch.h> -+ -+#define NOP_INSTR 0xd503201f -+#define MOV_INSTR 0xd2800000 /* mov x0, #0x0 - default = false */ -+ -+#define __DTRACE_SDT_ISENABLED_PROTO void -+#define __DTRACE_SDT_ISENABLED_ARGS -+ -+#endif /* _ASM_ARM64_DTRACE_SDT_ARCH_H */ -diff --git a/arch/arm64/include/asm/dtrace_syscall.h b/arch/arm64/include/asm/dtrace_syscall.h -new file mode 100644 -index 000000000000..402826562478 ---- /dev/null -+++ b/arch/arm64/include/asm/dtrace_syscall.h -@@ -0,0 +1,3 @@ -+/* -+ * Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved. -+ */ -diff --git a/arch/arm64/include/asm/dtrace_syscall_types.h b/arch/arm64/include/asm/dtrace_syscall_types.h -new file mode 100644 -index 000000000000..88e6eca6e169 ---- /dev/null -+++ b/arch/arm64/include/asm/dtrace_syscall_types.h -@@ -0,0 +1,11 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+#include <linux/types.h> -+#include <linux/dtrace_types.h> -+ -+typedef asmlinkage long (*dt_sys_call_t)(const struct pt_regs *regs); -+ -+#define DTRACE_SYSCALL_WRAP_PREFIX "__arm64_" -diff --git a/arch/arm64/include/asm/dtrace_util.h b/arch/arm64/include/asm/dtrace_util.h -new file mode 100644 -index 000000000000..003bd34524d6 ---- /dev/null -+++ b/arch/arm64/include/asm/dtrace_util.h -@@ -0,0 +1,14 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* Copyright (C) 2019, Oracle and/or its affiliates. All rights reserved. */ -+ -+#ifndef _ASM_ARM64_DTRACE_UTIL_H -+#define _ASM_ARM64_DTRACE_UTIL_H -+ -+#include <asm/dtrace_arch.h> -+ -+extern asm_instr_t dtrace_text_peek(asm_instr_t *addr); -+extern void dtrace_text_poke(asm_instr_t *addr, asm_instr_t opcode); -+extern void dtrace_kernel_brk_start(void *arg); -+extern void dtrace_kernel_brk_stop(void *arg); -+ -+#endif /* _ASM_ARM64_DTRACE_UTIL_H */ -diff --git a/arch/arm64/include/asm/kdebug.h b/arch/arm64/include/asm/kdebug.h -new file mode 100644 -index 000000000000..66c884086d04 ---- /dev/null -+++ b/arch/arm64/include/asm/kdebug.h -@@ -0,0 +1,11 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _ARM64_KDEBUG_H -+#define _ARM64_KDEBUG_H -+ -+/* Grossly misnamed. */ -+enum die_val { -+ DIE_OOPS = 1, -+ DIE_PAGE_FAULT, -+}; -+ -+#endif /* _ARM64_KDEBUG_H */ -diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h -index 65299a2dcf9c..dfae078efcb9 100644 ---- a/arch/arm64/include/asm/syscall.h -+++ b/arch/arm64/include/asm/syscall.h -@@ -11,7 +11,13 @@ - - typedef long (*syscall_fn_t)(const struct pt_regs *regs); - --extern const syscall_fn_t sys_call_table[]; -+#if IS_ENABLED(CONFIG_DT_SYSTRACE) -+#define CONST_SYS_CALL_TABLE -+#else -+#define CONST_SYS_CALL_TABLE const -+#endif -+ -+extern CONST_SYS_CALL_TABLE syscall_fn_t sys_call_table[]; - - #ifdef CONFIG_COMPAT - extern const syscall_fn_t compat_sys_call_table[]; -diff --git a/arch/arm64/kernel/dtrace_fbt.c b/arch/arm64/kernel/dtrace_fbt.c -new file mode 100644 -index 000000000000..3761e8aa4550 ---- /dev/null -+++ b/arch/arm64/kernel/dtrace_fbt.c -@@ -0,0 +1,187 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_fbt.c -+ * DESCRIPTION: Dynamic Tracing: FBT registration code (arch-specific) -+ * -+ * Copyright (c) 2010, 2018, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+#include <linux/kernel.h> -+#include <linux/kallsyms.h> -+#include <linux/dtrace_os.h> -+#include <linux/dtrace_fbt.h> -+#include <linux/dtrace_task_impl.h> -+#include <linux/slab.h> -+#include <linux/sort.h> -+#include <asm/insn.h> -+#include <asm/sections.h> -+ -+#define FBT_REG_FP 0x1d /* fp is regiater 29 */ -+#define FBT_REG_LR 0x1e /* lr is regiater 30 */ -+#define FBT_REG_SP 0x1f /* sp is register 31 */ -+#define FBT_MOV_FP_SP 0x910003fd /* "mov x29, sp" */ -+ -+#define BL_SENTRY(tp, nm) extern tp nm; -+#define BL_DENTRY(tp, nm) -+#include "fbt_blacklist.h" -+#undef BL_DENTRY -+#undef BL_SENTRY -+ -+static void -+dtrace_fbt_populate_bl(void) -+{ -+#define BL_SENTRY(tp, nm) dtrace_fbt_bl_add((unsigned long)&nm, \ -+ __stringify(nm)); -+#define BL_DENTRY(tp, nm) dtrace_fbt_bl_add(0, __stringify(nm)); -+#include "fbt_blacklist.h" -+#undef BL_SENTRY -+#undef BL_DENTRY -+} -+ -+void dtrace_fbt_init(fbt_add_probe_fn fbt_add_probe, struct module *mp, -+ void *arg) -+{ -+ loff_t pos; -+ struct kallsym_iter sym; -+ asm_instr_t *paddr = NULL; -+ struct dt_fbt_bl_entry *blent = NULL; -+ -+ /* -+ * Look up any unresolved symbols in the blacklist, and sort the list -+ * by ascending address. -+ */ -+ dtrace_fbt_populate_bl(); -+ -+ blent = dtrace_fbt_bl_first(); -+ -+ pos = 0; -+ kallsyms_iter_reset(&sym, 0); -+ while (kallsyms_iter_update(&sym, pos++)) { -+ asm_instr_t *addr, *end; -+ asm_instr_t instr; -+ void *fbtp = NULL; -+ -+ /* -+ * There is no point considering non-function symbols for FBT, -+ * or symbols that have a zero size. We could consider weak -+ * symbols but that gets quite complicated and there is no -+ * demands for that (so far). -+ */ -+ if (sym.type != 'T' && sym.type != 't') -+ continue; -+ if (!sym.size) -+ continue; -+ -+ /* -+ * The symbol must be at a properly aligned text address. -+ */ -+ if (!IS_ALIGNED(sym.value, sizeof(asm_instr_t))) -+ continue; -+ -+ /* -+ * Handle only symbols that belong to the module we have been -+ * asked for. -+ */ -+ if (mp == dtrace_kmod && !core_kernel_text(sym.value)) -+ continue; -+ -+ /* -+ * Ensure we have not been given .init symbol from kallsyms -+ * interface. This could lead to memory corruption once DTrace -+ * tries to enable probe in already freed memory. -+ */ -+ if (mp != dtrace_kmod && !within_module_core(sym.value, mp)) -+ continue; -+ -+ /* -+ * See if the symbol is on the FBT's blacklist. Since both -+ * iterators are workng in sort order by ascending address we -+ * can use concurrent traversal. -+ */ -+ while (blent != NULL && -+ dtrace_fbt_bl_entry_addr(blent) < sym.value) { -+ blent = dtrace_fbt_bl_next(blent); -+ } -+ if (dtrace_fbt_bl_entry_addr(blent) == sym.value) -+ continue; -+ -+ /* -+ * No FBT tracing for DTrace functions, and functions that are -+ * crucial to probe processing. -+ * Also weed out symbols that are not relevant here. -+ */ -+ if (strncmp(sym.name, "dtrace_", 7) == 0) -+ continue; -+ if (strncmp(sym.name, "insn_", 5) == 0) -+ continue; -+ if (strncmp(sym.name, "inat_", 5) == 0) -+ continue; -+ if (strncmp(sym.name, "_GLOBAL_", 8) == 0) -+ continue; -+ if (strncmp(sym.name, "do_", 3) == 0) -+ continue; -+ if (strncmp(sym.name, "xen_", 4) == 0) -+ continue; -+ -+ addr = (asm_instr_t *)sym.value; -+ end = (asm_instr_t *)(sym.value + sym.size); -+ -+ /* -+ * FIXME: -+ * When there are multiple symbols for the same address, we -+ * should link them together as probes associated with the -+ * same function. When a probe for that function is triggered -+ * all associated probes should fire. -+ * -+ * For now, we ignore duplicates. -+ */ -+ if (addr == paddr) -+ continue; -+ paddr = addr; -+ -+ instr = le32_to_cpu(*addr); -+ -+ /* -+ * We can only instrument functions that begin with a proper -+ * frame set-up sequence: -+ * stp x29, x30, [sp,#-80]! -+ * mov x29, sp -+ * So, a STP instruction storing the FP (x29) and LR (x30) -+ * registers as a pair in a location relative to the SP -+ * register value. And then a MOV instruction that sets the -+ * FP (x29) register to the current SP value (effectively -+ * establishing the new stack frame). -+ * -+ * We will place our breakpoint on the MOV instruction. -+ */ -+ if (!aarch64_insn_is_stp_pre(instr) || -+ aarch64_insn_decode_register( -+ AARCH64_INSN_REGTYPE_RN, instr) != FBT_REG_SP || -+ aarch64_insn_decode_register( -+ AARCH64_INSN_REGTYPE_RT, instr) != FBT_REG_FP || -+ aarch64_insn_decode_register( -+ AARCH64_INSN_REGTYPE_RT2, instr) != FBT_REG_LR) -+ continue; -+ -+ addr++; -+ instr = le32_to_cpu(*addr); -+ if (instr != FBT_MOV_FP_SP) -+ continue; -+ -+ fbt_add_probe(mp, sym.name, FBT_ENTRY, instr, addr, 0, NULL, -+ arg); -+ -+ while (++addr < end) { -+ uintptr_t off; -+ -+ instr = le32_to_cpu(*addr); -+ if (!aarch64_insn_is_ret(instr)) -+ continue; -+ -+ off = (uintptr_t)addr - sym.value; -+ fbtp = fbt_add_probe(mp, sym.name, FBT_RETURN, instr, -+ addr, off, fbtp, arg); -+ } -+ } -+} -+EXPORT_SYMBOL(dtrace_fbt_init); -diff --git a/arch/arm64/kernel/dtrace_sdt.c b/arch/arm64/kernel/dtrace_sdt.c -new file mode 100644 -index 000000000000..d5a6a9d398b3 ---- /dev/null -+++ b/arch/arm64/kernel/dtrace_sdt.c -@@ -0,0 +1,25 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_sdt.c -+ * DESCRIPTION: Dynamic Tracing: SDT registration code (arch-specific) -+ * -+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+#include <linux/module.h> -+#include <asm/insn.h> -+#include <asm/dtrace_arch.h> -+#include <asm/dtrace_sdt_arch.h> -+ -+void __init_or_module dtrace_sdt_nop_multi(asm_instr_t **addrs, -+ int *is_enabled, int cnt) -+{ -+ int i; -+ -+ for (i = 0; i < cnt; i++) { -+ if (likely(!is_enabled[i])) -+ aarch64_insn_patch_text_nosync(addrs[i], NOP_INSTR); -+ else -+ aarch64_insn_patch_text_nosync(addrs[i], MOV_INSTR); -+ } -+} -diff --git a/arch/arm64/kernel/dtrace_syscall.c b/arch/arm64/kernel/dtrace_syscall.c -new file mode 100644 -index 000000000000..73730e42f3b8 ---- /dev/null -+++ b/arch/arm64/kernel/dtrace_syscall.c -@@ -0,0 +1,89 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_syscall.c -+ * DESCRIPTION: Dynamic Tracing: system call tracing support (arch-specific) -+ * -+ * Copyright (C) 2010, 2018 Oracle Corporation -+ */ -+ -+#include <linux/dtrace_syscall.h> -+#include <linux/ptrace.h> -+#include <asm/syscall.h> -+ -+void (*systrace_probe)(dtrace_id_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, -+ uintptr_t, uintptr_t, uintptr_t); -+ -+void systrace_stub(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, -+ uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, -+ uintptr_t arg5, uintptr_t arg6) -+{ -+} -+ -+asmlinkage long systrace_syscall(const struct pt_regs *regs); -+ -+static struct systrace_info systrace_info = { -+ &systrace_probe, -+ systrace_stub, -+ systrace_syscall, -+ {}, -+ { -+#undef __SYSCALL -+#define __SYSCALL(nr, sym) [nr] { .name = __stringify(sym), }, -+#include <asm/unistd.h> -+#undef __SYSCALL -+ } -+ }; -+ -+ -+asmlinkage long systrace_syscall(const struct pt_regs *regs) -+{ -+ long rc = 0; -+ unsigned long sysnum; -+ dtrace_id_t id; -+ struct dtrace_syscalls *sc; -+ -+ sysnum = syscall_get_nr(current, (struct pt_regs *) regs); -+ sc = &systrace_info.sysent[sysnum]; -+ -+ id = sc->stsy_entry; -+ /* TODO: arg 6. */ -+ if (id != DTRACE_IDNONE) -+ (*systrace_probe)(id, regs->regs[0], regs->regs[1], -+ regs->regs[2], regs->regs[3], regs->regs[4], -+ regs->regs[5], 0); -+ -+ /* -+ * FIXME: Add stop functionality for DTrace. -+ */ -+ -+ if (sc->stsy_underlying != NULL) -+ rc = (*sc->stsy_underlying)(regs); -+ -+ id = sc->stsy_return; -+ if (id != DTRACE_IDNONE) -+ (*systrace_probe)(id, (uintptr_t)rc, (uintptr_t)rc, -+ (uintptr_t)((uint64_t)rc >> 32), 0, 0, 0, 0); -+ -+ return rc; -+} -+ -+struct systrace_info *dtrace_syscalls_init() -+{ -+ int i; -+ -+ /* -+ * Only initialize this stuff once... -+ */ -+ if (systrace_info.sysent[0].stsy_tblent != NULL) -+ return &systrace_info; -+ -+ for (i = 0; i < NR_syscalls; i++) { -+ systrace_info.sysent[i].stsy_tblent = -+ (dt_sys_call_t *)&sys_call_table[i]; -+ systrace_info.sysent[i].stsy_underlying = -+ (dt_sys_call_t)sys_call_table[i]; -+ } -+ -+ return &systrace_info; -+} -+EXPORT_SYMBOL(dtrace_syscalls_init); -diff --git a/arch/arm64/kernel/dtrace_syscall_stubs.S b/arch/arm64/kernel/dtrace_syscall_stubs.S -new file mode 100644 -index 000000000000..e69de29bb2d1 -diff --git a/arch/arm64/kernel/dtrace_util.c b/arch/arm64/kernel/dtrace_util.c -new file mode 100644 -index 000000000000..8142cf0459c2 ---- /dev/null -+++ b/arch/arm64/kernel/dtrace_util.c -@@ -0,0 +1,292 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * FILE: dtrace_util.c -+ * DESCRIPTION: Dynamic Tracing: Architecture utility functions -+ * -+ * Copyright (c) 2010, 2019, Oracle and/or its affiliates. All rights reserved. -+ */ -+ -+#include <linux/dtrace_cpu.h> -+#include <linux/dtrace_os.h> -+#include <linux/dtrace_task_impl.h> -+#include <linux/kdebug.h> -+#include <linux/notifier.h> -+#include <linux/ptrace.h> -+#include <linux/uaccess.h> -+#include <linux/uprobes.h> -+#include <asm/debug-monitors.h> -+#include <asm/insn.h> -+ -+void dtrace_skip_instruction(struct pt_regs *regs) -+{ -+ instruction_pointer_set(regs, instruction_pointer(regs) + 4); -+} -+ -+void dtrace_handle_badaddr(struct pt_regs *regs) -+{ -+ unsigned long addr = current->thread.fault_address; -+ -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); -+ this_cpu_core->cpuc_dtrace_illval = addr; -+ -+ dtrace_skip_instruction(regs); -+} -+ -+int dtrace_die_notifier(struct notifier_block *nb, unsigned long val, -+ void *args) -+{ -+ struct die_args *dargs = args; -+ -+ switch (val) { -+ case DIE_PAGE_FAULT: { -+ if (!DTRACE_CPUFLAG_ISSET(CPU_DTRACE_NOFAULT)) -+ return NOTIFY_DONE; -+ -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); -+ this_cpu_core->cpuc_dtrace_illval = dargs->err; -+ -+ dtrace_skip_instruction(dargs->regs); -+ -+ return NOTIFY_OK | NOTIFY_STOP_MASK; -+ } -+ case DIE_OOPS: { -+ pr_info("DTrace: last probe %u\n", -+ this_cpu_core->cpuc_current_probe); -+ return NOTIFY_DONE; -+ } -+ default: -+ return NOTIFY_DONE; -+ } -+} -+ -+struct user_stackframe { -+ struct user_stackframe __user *fp; -+ unsigned long lr; -+} __packed; -+ -+static int dtrace_unwind_frame(struct user_stackframe *frame) -+{ -+ struct user_stackframe __user *ofp = frame->fp; -+ unsigned long ret; -+ -+ /* Verify alignment. */ -+ if ((unsigned long)ofp & 0xf) -+ return -EINVAL; -+ -+ /* Verify read access. */ -+ if (!access_ok(ofp, sizeof(struct user_stackframe))) -+ return -EINVAL; -+ -+ pagefault_disable(); -+ ret = __copy_from_user_inatomic(frame, ofp, -+ sizeof(struct user_stackframe)); -+ pagefault_enable(); -+ -+ /* Make sure the read worked. */ -+ if (ret) { -+ frame->fp = ofp; -+ return -EINVAL; -+ } -+ -+ /* -+ * If the frame pointer in the current frame is NULL, we have reached -+ * the end of the call chain. -+ */ -+ if (frame->fp == NULL) -+ return 0; -+ -+ /* -+ * In older glibc versions, the call chain did not end with an initial -+ * frame with NULL frame pointer. Instead, the initial frame stored -+ * the beginning of the stack as frame pointer. We look for that here -+ * as a special case, and return a frame where the frame pointer is -+ * set to NULL (as it ought to be). -+ * -+ * If we do not know the beginning of the stack, we are out of luck. -+ */ -+ if (current->dt_task && current->dt_task->dt_ustack == frame->fp) { -+ frame->fp = NULL; -+ return 0; -+ } -+ -+ /* -+ * Verify strictly increasing consecutive values. Since the stack -+ * grows downward, walking the call chain in reverse must yield ever -+ * increasing frame pointers. -+ */ -+ if (ofp >= frame->fp) -+ return -EINVAL; -+ -+ return 0; -+} -+ -+void dtrace_user_stacktrace(struct stacktrace_state *st) -+{ -+ struct pt_regs *regs = current_pt_regs(); -+ uint64_t *pcs = st->pcs; -+ int limit = st->limit; -+ int fixups, patches, skip; -+ struct user_stackframe frame0, frame; -+ struct user_stackframe *bos = current->dt_task -+ ? current->dt_task->dt_ustack -+ : NULL; -+ struct return_instance *rilist = current->utask -+ ? current->utask->return_instances -+ : NULL; -+ struct return_instance *ri; -+ -+ /* -+ * If we do not have user-mode registers, or if there is no known -+ * bottom of stack, we cannot collect a call chain. -+ */ -+ if (!user_mode(regs)) -+ goto out; -+ if (!bos) -+ goto out; -+ if (!limit) -+ goto out; -+ -+ frame0.fp = (struct user_stackframe __user *)regs->regs[29]; -+ frame0.lr = regs->regs[30]; -+ -+ /* -+ * The first special situation we need to deal with here is the rare -+ * case of tracing the instruction after a call, when the current -+ * program counter just got loaded from the link register, i.e. they -+ * will be the same. In that case, we don't want to record both pc -+ * and lr in the trace. -+ * -+ * Uretprobes are also tricky because if we are asked to provide a -+ * ustack() while processing a uretprobe firing, we are still in the -+ * middle of handling the probe. Things are not back to normal yet. -+ */ -+ if (regs->pc != frame0.lr) { -+ ri = rilist; -+ if (pcs) { -+ if (uprobe_return_addr_is_hijacked(frame0.lr) && -+ ri && ri->orig_ret_vaddr == regs->pc) -+ *pcs++ = ri->func; -+ else -+ *pcs++ = regs->pc; -+ } -+ -+ limit--; -+ st->depth++; -+ -+ if (!limit) -+ goto out; -+ } -+ -+ /* -+ * First pass: determine how many return addresses need to be fixed up, -+ * and how many return instances we have. -+ */ -+ frame = frame0; -+ fixups = 0; -+ do { -+ if (uprobe_return_addr_is_hijacked(frame.lr)) -+ fixups++; -+ -+ if (frame.fp == NULL) -+ break; -+ -+ if (dtrace_unwind_frame(&frame) < 0) { -+ this_cpu_core->cpuc_dtrace_illval = (uintptr_t)frame.fp; -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_BADSTACK); -+ break; -+ } -+ } while (frame.lr); -+ -+ patches = 0; -+ for (ri = rilist; ri != NULL; ri = ri->next) -+ patches++; -+ -+ /* -+ * It is possible that we think we need one more fixup than we can -+ * satisfy with the return instances. This is because we cannot quite -+ * determine whether the first one is actually needed or not (due to -+ * lack of proper state when the uretprobe implementation interferes -+ * with frame chain walking). -+ */ -+ skip = fixups - patches; -+ if (skip > 1) { -+ this_cpu_core->cpuc_dtrace_illval = 0; -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_BADSTACK); -+ goto out; -+ } -+ -+ /* -+ * Second pass: fill in the actual stack trace. -+ */ -+ frame = frame0; -+ ri = rilist; -+ do { -+ if (uprobe_return_addr_is_hijacked(frame.lr)) { -+ if (skip) { -+ skip = 0; -+ goto skip_frame; -+ } -+ -+ frame.lr = ri->orig_ret_vaddr; -+ ri = ri->next; -+ } -+ -+ if (pcs) -+ *pcs++ = frame.lr; -+ -+ limit--; -+ st->depth++; -+ -+skip_frame: -+ if (frame.fp == NULL) -+ break; -+ -+ if (dtrace_unwind_frame(&frame) < 0) { -+ this_cpu_core->cpuc_dtrace_illval = (uintptr_t)frame.fp; -+ DTRACE_CPUFLAG_SET(CPU_DTRACE_BADSTACK); -+ break; -+ } -+ } while (limit); -+ -+out: -+ if (pcs) { -+ while (limit--) -+ *pcs++ = 0; -+ } -+} -+ -+asm_instr_t dtrace_text_peek(asm_instr_t *addr) -+{ -+ asm_instr_t opcode; -+ -+ aarch64_insn_read(addr, &opcode); -+ -+ return opcode; -+} -+EXPORT_SYMBOL(dtrace_text_peek); -+ -+void dtrace_text_poke(asm_instr_t *addr, asm_instr_t opcode) -+{ -+ aarch64_insn_patch_text_nosync(addr, opcode); -+} -+EXPORT_SYMBOL(dtrace_text_poke); -+ -+void dtrace_kernel_brk_start(void *arg) -+{ -+ register_kernel_break_hook((struct break_hook *)arg); -+} -+EXPORT_SYMBOL(dtrace_kernel_brk_start); -+ -+void dtrace_kernel_brk_stop(void *arg) -+{ -+ unregister_kernel_break_hook((struct break_hook *)arg); -+} -+EXPORT_SYMBOL(dtrace_kernel_brk_stop); -+ -+void dtrace_mod_pdata_init(struct dtrace_module *pdata) -+{ -+} -+ -+void dtrace_mod_pdata_cleanup(struct dtrace_module *pdata) -+{ -+} -diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c -index fde59981445c..8ec90f382054 100644 ---- a/arch/arm64/kernel/entry-common.c -+++ b/arch/arm64/kernel/entry-common.c -@@ -66,7 +66,7 @@ static void notrace el1_dbg(struct pt_regs *regs, unsigned long esr) - } - NOKPROBE_SYMBOL(el1_dbg); - --asmlinkage void notrace el1_sync_handler(struct pt_regs *regs) -+asmlinkage int notrace el1_sync_handler(struct pt_regs *regs) - { - unsigned long esr = read_sysreg(esr_el1); - -@@ -91,10 +91,11 @@ asmlinkage void notrace el1_sync_handler(struct pt_regs *regs) - case ESR_ELx_EC_WATCHPT_CUR: - case ESR_ELx_EC_BRK64: - el1_dbg(regs, esr); -- break; -+ return 1; - default: - el1_inv(regs, esr); - }; -+ return 0; - } - NOKPROBE_SYMBOL(el1_sync_handler); - -diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S -index 9461d812ae27..0ff4377f18ef 100644 ---- a/arch/arm64/kernel/entry.S -+++ b/arch/arm64/kernel/entry.S -@@ -25,6 +25,7 @@ - #include <asm/thread_info.h> - #include <asm/asm-uaccess.h> - #include <asm/unistd.h> -+#include <asm/debug-monitors.h> - - /* - * Context tracking subsystem. Used to instrument transitions -@@ -252,7 +253,7 @@ alternative_else_nop_endif - */ - .endm - -- .macro kernel_exit, el -+ .macro kernel_exit, el, fbt_emu = 0 - .if \el != 0 - disable_daif - -@@ -346,7 +347,11 @@ alternative_else_nop_endif - - msr elr_el1, x21 // set up the return data - msr spsr_el1, x22 -- ldp x0, x1, [sp, #16 * 0] -+ -+ /* -+ * No need to restore x0 and x1 - we may still clobber them. We will -+ * restore them right before we return. -+ */ - ldp x2, x3, [sp, #16 * 1] - ldp x4, x5, [sp, #16 * 2] - ldp x6, x7, [sp, #16 * 3] -@@ -362,7 +367,44 @@ alternative_else_nop_endif - ldp x26, x27, [sp, #16 * 13] - ldp x28, x29, [sp, #16 * 14] - ldr lr, [sp, #S_LR] -+ -+ .if \fbt_emu != 0 // FBT emulation needed? -+ mrs x0, esr_el1 // check if ESR is FBT probe -+ and x0, x0, #0x1f // ... mask code -+ cmp x0, #DPROBES_FBE_BRK_IMM // ... compare with FBE code -+ beq 6f // FBT entry -> emulate instr. -+ cmp x0, #DPROBES_FBR_BRK_IMM // ... compare with FBR code -+ beq 7f // FBT return -> emulate instr. -+ b 8f // not FBT -> skip next section -+ -+6: -+ mrs x0, elr_el1 // retrieve xeceptionx link reg -+ add x0, x0, #0x4 // advance to next instr -+ msr elr_el1, x0 // set exception link reg -+ -+ ldp x0, x1, [sp, #16 * 0] // done with x0, restore orig - add sp, sp, #S_FRAME_SIZE // restore sp -+ mov x29, sp // instr we put probe on -+ b 9f // FBT done -> branch to eret -+ -+7: -+ msr elr_el1, lr // set exception link reg to -+ // link register value, to -+ // simulate the 'ret' instr. -+ -+ ldp x0, x1, [sp, #16 * 0] // done with x0, restore orig -+ add sp, sp, #S_FRAME_SIZE // restore sp -+ b 9f // FBT done -> branch to eret -+ -+8: -+ ldp x0, x1, [sp, #16 * 0] // done with x0, restore orig -+ add sp, sp, #S_FRAME_SIZE // restore sp -+9: -+ .else -+ ldp x0, x1, [sp, #16 * 0] // done with x0, restore orig -+ add sp, sp, #S_FRAME_SIZE // restore sp -+ -+ .endif - - .if \el == 0 - alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 -@@ -520,6 +562,7 @@ __bad_stack: - /* Time to die */ - bl handle_bad_stack - ASM_BUG() -+END(__bad_stack) - #endif /* CONFIG_VMAP_STACK */ - - /* -@@ -580,7 +623,12 @@ el1_sync: - kernel_entry 1 - mov x0, sp - bl el1_sync_handler -- kernel_exit 1 -+#if IS_ENABLED(CONFIG_DT_FBT) -+ cmp x0, 1 -+ b.ne 1020f -+ kernel_exit 1, 1 -+#endif -+1020: kernel_exit 1 - ENDPROC(el1_sync) - - .align 6 -@@ -733,6 +781,7 @@ work_pending: - #endif - ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for single-step - b finish_ret_to_user -+END(work_pending) - /* - * "slow" syscall return path. - */ -diff --git a/arch/arm64/kernel/fbt_blacklist.h b/arch/arm64/kernel/fbt_blacklist.h -new file mode 100644 -index 000000000000..399b946f919b ---- /dev/null -+++ b/arch/arm64/kernel/fbt_blacklist.h -@@ -0,0 +1,91 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Functions used in die notifier chain calling. -+ */ -+BL_SENTRY(void *, notify_die) -+BL_DENTRY(void *, notifier_call_chain) -+BL_SENTRY(typeof(__atomic_notifier_call_chain), __atomic_notifier_call_chain) -+BL_SENTRY(typeof(atomic_notifier_call_chain), atomic_notifier_call_chain) -+BL_SENTRY(typeof(__raw_notifier_call_chain), __raw_notifier_call_chain) -+BL_SENTRY(typeof(raw_notifier_call_chain), raw_notifier_call_chain) -+BL_DENTRY(void *, hw_breakpoint_exceptions_notify) -+BL_DENTRY(void *, kprobe_exceptions_notify) -+ -+/* -+ * Functions used to update vtime in probe context. -+ */ -+BL_SENTRY(typeof(ktime_get_raw_fast_ns), ktime_get_raw_fast_ns) -+BL_DENTRY(void *, raw_read_seqcount) -+BL_DENTRY(void *, read_seqcount_retry) -+BL_DENTRY(void *, __read_seqcount_retry) -+ -+/* xen_clocksource */ -+BL_DENTRY(void *, xen_clocksource_get_cycles) -+BL_DENTRY(void *, xen_clocksource_read) -+BL_DENTRY(void *, pvclock_clocksource_read) -+BL_DENTRY(void *, pvclock_touch_watchdogs) -+BL_DENTRY(void *, touch_softlockup_watchdog_sync) -+BL_DENTRY(void *, clocksource_touch_watchdog) -+BL_DENTRY(void *, clocksource_resume_watchdog) -+BL_DENTRY(void *, reset_hung_task_detector) -+/* clocksource_tsc */ -+BL_DENTRY(void *, read_tsc) -+BL_DENTRY(void *, get_cycles) -+/* clocksource_hpet */ -+BL_DENTRY(void *, read_hpet) -+BL_DENTRY(void *, hpet_readl) -+/* kvm_clock */ -+BL_DENTRY(void *, kvm_clock_get_cycles) -+BL_DENTRY(void *, kvm_clock_read) -+/* arm_arch */ -+BL_DENTRY(void *, arch_counter_get_cntvct); -+BL_DENTRY(void *, arch_counter_get_cntvct_mem); -+BL_DENTRY(void *, arch_counter_get_cntpct); -+BL_DENTRY(void *, arch_counter_read); -+ -+/* -+ * Functions used in trap handling. -+ */ -+BL_DENTRY(void *, fixup_exception) -+BL_DENTRY(void *, paranoid_entry) -+BL_DENTRY(void *, kgdb_ll_trap) -+BL_DENTRY(void *, error_entry) -+BL_DENTRY(void *, xen_int3) -+BL_DENTRY(void *, ftrace_int3_handler) -+BL_DENTRY(typeof(poke_int3_handler), poke_int3_handler) -+BL_DENTRY(void *, fixup_bad_iret) -+BL_DENTRY(void *, xen_adjust_exception_frame) -+BL_DENTRY(void *, paravirt_nop) -+BL_DENTRY(void *, ist_enter) -+BL_DENTRY(void *, rcu_nmi_enter) -+BL_DENTRY(void *, rcu_dynticks_curr_cpu_in_eqs) -+BL_DENTRY(void *, rcu_dynticks_eqs_exit) -+BL_DENTRY(void *, rcu_nmi_exit) -+BL_DENTRY(void *, rcu_dynticks_eqs_enter) -+BL_DENTRY(void *, ist_exit) -+ -+/* -+ * Functions used in page fault handling. -+ */ -+BL_DENTRY(void *, do_page_fault) -+BL_DENTRY(void *, __do_page_fault) -+BL_DENTRY(void *, down_read_trylock) -+BL_DENTRY(void *, __get_user_pages_fast) -+BL_DENTRY(void *, gup_pud_range) -+BL_DENTRY(void *, gup_huge_pud) -+BL_DENTRY(void *, gup_pmd_range) -+BL_DENTRY(void *, gup_huge_pmd) -+BL_DENTRY(void *, gup_pte_range) -+BL_DENTRY(void *, pte_mfn_to_pfn) -+ -+/* -+ * Functions used under 4.19 idr_find -+ */ -+BL_DENTRY(void *, idr_find) -+BL_DENTRY(void *, find_next_bit) -+BL_DENTRY(void *, _find_next_bit) -+BL_DENTRY(void *, radix_tree_lookup) -+BL_DENTRY(void *, __radix_tree_lookup) -+BL_DENTRY(void *, radix_tree_load_root) -+BL_DENTRY(void *, radix_tree_descend) -+BL_DENTRY(void *, is_sibling_entry) -diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c -index a412d8edbcd2..49482af77b9f 100644 ---- a/arch/arm64/kernel/probes/uprobes.c -+++ b/arch/arm64/kernel/probes/uprobes.c -@@ -179,7 +179,8 @@ static int uprobe_single_step_handler(struct pt_regs *regs, - { - struct uprobe_task *utask = current->utask; - -- WARN_ON(utask && (instruction_pointer(regs) != utask->xol_vaddr + 4)); -+ WARN_ON(utask && utask->active_uprobe && -+ (instruction_pointer(regs) != utask->xol_vaddr + 4)); - if (uprobe_post_sstep_notifier(regs)) - return DBG_HOOK_HANDLED; - -diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c -index d5ffaaab31a7..4563f0a4d0db 100644 ---- a/arch/arm64/kernel/sys.c -+++ b/arch/arm64/kernel/sys.c -@@ -55,7 +55,7 @@ asmlinkage long __arm64_sys_ni_syscall(const struct pt_regs *__unused) - #undef __SYSCALL - #define __SYSCALL(nr, sym) [nr] = __arm64_##sym, - --const syscall_fn_t sys_call_table[__NR_syscalls] = { -+CONST_SYS_CALL_TABLE syscall_fn_t sys_call_table[__NR_syscalls] = { - [0 ... __NR_syscalls - 1] = __arm64_sys_ni_syscall, - #include <asm/unistd.h> - }; -diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S -index ffa68d5713f1..f331be1a84ec 100644 ---- a/arch/arm64/kvm/hyp/hyp-entry.S -+++ b/arch/arm64/kvm/hyp/hyp-entry.S -@@ -74,6 +74,7 @@ el1_sync: // Guest trapped into EL2 - - eret - sb -+ENDPROC(el1_sync) - - el1_hvc_guest: - /* -@@ -125,21 +126,25 @@ wa_epilogue: - add sp, sp, #16 - eret - sb -+ENDPROC(el1_hvc_guest) - - el1_trap: - get_vcpu_ptr x1, x0 - mov x0, #ARM_EXCEPTION_TRAP - b __guest_exit -+ENDPROC(el1_trap) - - el1_irq: - get_vcpu_ptr x1, x0 - mov x0, #ARM_EXCEPTION_IRQ - b __guest_exit -+ENDPROC(el1_irq) - - el1_error: - get_vcpu_ptr x1, x0 - mov x0, #ARM_EXCEPTION_EL1_SERROR - b __guest_exit -+ENDPROC(el1_error) - - el2_sync: - /* Check for illegal exception return, otherwise panic */ -@@ -179,6 +184,7 @@ el2_error: - mov x0, #(1 << ARM_EXIT_WITH_SERROR_BIT) - eret - sb -+ENDPROC(el2_error) - - ENTRY(__hyp_do_panic) - mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ -diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c -index 85566d32958f..9753c396a4cb 100644 ---- a/arch/arm64/mm/fault.c -+++ b/arch/arm64/mm/fault.c -@@ -14,6 +14,7 @@ - #include <linux/mm.h> - #include <linux/hardirq.h> - #include <linux/init.h> -+#include <linux/kdebug.h> - #include <linux/kprobes.h> - #include <linux/uaccess.h> - #include <linux/page-flags.h> -@@ -61,6 +62,19 @@ static inline const struct fault_info *esr_to_debug_fault_info(unsigned int esr) - return debug_fault_info + DBG_ESR_EVT(esr); - } - -+#ifdef CONFIG_DTRACE -+static int dtrace_fault(struct pt_regs *regs, unsigned long addr) -+{ -+ preempt_disable(); -+ if (notify_die(DIE_PAGE_FAULT, "page fault", regs, addr, 14, -+ SIGKILL) == NOTIFY_STOP) -+ return 1; -+ preempt_enable(); -+ -+ return 0; -+} -+#endif -+ - static void data_abort_decode(unsigned int esr) - { - pr_alert("Data abort info:\n"); -@@ -450,6 +464,10 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, - - if (kprobe_page_fault(regs, esr)) - return 0; -+#ifdef CONFIG_DTRACE -+ if (dtrace_fault(regs, addr)) -+ return 0; -+#endif - - /* - * If we're in an interrupt or have no user context, we must not take -diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h -index f46e0ca0169c..b27885982066 100644 ---- a/include/linux/uprobes.h -+++ b/include/linux/uprobes.h -@@ -124,6 +124,7 @@ extern void uprobe_copy_process(struct task_struct *t, unsigned long flags); - extern int uprobe_post_sstep_notifier(struct pt_regs *regs); - extern int uprobe_pre_sstep_notifier(struct pt_regs *regs); - extern void uprobe_notify_resume(struct pt_regs *regs); -+extern bool uprobe_return_addr_is_hijacked(unsigned long addr); - extern bool uprobe_deny_signal(void); - extern bool arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs); - extern void uprobe_clear_state(struct mm_struct *mm); -diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c -index ece7e13f6e4a..295b2f3cacb3 100644 ---- a/kernel/events/uprobes.c -+++ b/kernel/events/uprobes.c -@@ -1846,6 +1846,16 @@ static unsigned long get_trampoline_vaddr(void) - return trampoline_vaddr; - } - -+/* -+ * Verify whether a return address is a trampoline address or a regular return -+ * address. This is used by stack unwinders to determine whether a return -+ * address in a stack trace needs to be adjusted. -+ */ -+bool uprobe_return_addr_is_hijacked(unsigned long addr) -+{ -+ return addr == get_trampoline_vaddr(); -+} -+ - static void cleanup_return_instances(struct uprobe_task *utask, bool chained, - struct pt_regs *regs) - { -diff --git a/scripts/dtrace_sdt_arm64.sh b/scripts/dtrace_sdt_arm64.sh -new file mode 100755 -index 000000000000..f710c08fa824 ---- /dev/null -+++ b/scripts/dtrace_sdt_arm64.sh -@@ -0,0 +1,425 @@ -+#!/bin/sh -+ -+LANG=C -+export LANG -+ -+# -+# Syntax: -+# dtrace_sdt_arm64.sh sdtinfo <S-file> <l-file> <o-file> -+# This is used to generate DTrace SDT probe definitions for a -+# linked kernel image file <l-file>, based on relocation info -+# from the kernel object file <o-file>. The output is written -+# to <S-file>. -+# -+ -+opr="$1" -+shift -+if [ -z "$opr" ]; then -+ echo "ERROR: Missing operation" > /dev/stderr -+ exit 1 -+elif [ "$opr" != "sdtinfo" ]; then -+ echo "ERROR: Invalid operation: ${opr}" > /dev/stderr -+ exit 1 -+fi -+ -+tfn="$1" -+shift -+if [ -z "$tfn" ]; then -+ echo "ERROR: Missing target filename" > /dev/stderr -+ exit 1 -+fi -+ -+lfn="$1" -+ofn="$2" -+ -+if [ -z "$lfn" ]; then -+ echo "ERROR: Missing linked kernel file argument" > /dev/stderr -+ exit 1 -+elif [ -z "$ofn" ]; then -+ echo "ERROR: Missing kernel object file argument" > /dev/stderr -+ exit 1 -+fi -+ -+# For arm64, the kernel is built using "-ffunction-sections -fdata-sections" -+# which due to the linked bug conflicts with "--emit-relocs". Probe discovery -+# therefore is a bit more complicated. -+# -+# First we collect the VMA address of all the code sections in the linked -+# kernel image. -+# -+# Subsequently, we go through the list of symbols in the linked kernel image, -+# and write out records for some select symbols that are used in the processing -+# of probe locations: -+# -+# <section> <address> B <name> -+# Named identifier at a specific address (global variable). -+# -+# We also process any function symbols, and build a lookup map for section-name -+# pairs and just name. Due to the possibility of having symbols with identical -+# names (in the same section, e.g. global and/or one or more local), we append -+# -<n> to every 2nd and later copy of the same symbol name in the current -+# section. -+# section and name -+# name -+# (If multiple symbols map to any of the above combinations, that specific -+# combination is omitted from the mapping.) -+# -+# Next, we process the list of function symbols, and for any function that -+# is not located in a section that starts with .exit.text, .init.text, or -+# .meminit.text) we determine its in-section offset and output a record: -+# -+# <section> <offset> F <name> <address> <section-base-address> -+# Named function at a specific address. -+# -+# Finally, each relocation record from a non-init or exit section that relates -+# to SDT probes is written to the output stream: -+# -+# <section> <address> R <value> -+# Relocation within a section at a specific address -+# -+# Probes are identified in the relocation records as symbols with either a -+# __dtrace_probe_ or __dtrace_isenabled_ prefix. -+# -+# All these records are sorted by section and offset, and any SDT probe -+# location relocation records (R) result in writing out an entry that records -+# its offset relative to the _stext symbol, along with the name of the function -+# it was found in, and the probe name. -+ -+( -+ objdump -ht ${lfn} -+ objdump -tr ${ofn} -+) | \ -+ awk 'function subl(v0, v1, v0h, v0l, v1h, v1l, d, tmp) { -+ tmp = $0; -+ if (length(v0) > 8) { -+ d = length(v0); -+ v0h = strtonum("0x"substr(v0, 1, d - 8)); -+ v0l = strtonum("0x"substr(v0, d - 8 + 1)); -+ d = length(v1); -+ v1h = strtonum("0x"substr(v1, 1, d - 8)); -+ v1l = strtonum("0x"substr(v1, d - 8 + 1)); -+ -+ if (v0l >= v1l) { -+ if (v0h >= v1h) { -+ d = sprintf("%08x%08x", v0h - v1h, v0l - v1l); -+ } else { -+ printf "ERROR: [stage 1.a] Invalid addresses: %s - %s\n", v0, v1; -+ d = 0; -+ errc++; -+ } -+ } else { -+ if (v0h > v1h) { -+ v0h--; -+ v0l += 4294967296; -+ d = sprintf("%08x%08x", v0h - v1h, v0l - v1l); -+ } else { -+ printf "ERROR: [stage 1.b] Invalid addresses: %s - %s\n", v0, v1; -+ d = 0; -+ errc++; -+ } -+ } -+ } else { -+ v0 = strtonum("0x"v0); -+ v1 = strtonum("0x"v1); -+ d = sprintf("%016x", v0 - v1); -+ } -+ $0 = tmp; -+ -+ return d; -+ } -+ -+ BEGIN { -+ phase = 0; -+ } -+ -+ /^SYMBOL / { -+ phase++; -+ delete scnt; -+ next; -+ } -+ -+ phase == 0 && /^ *[1-9][0-9]* / { -+ snam = $2; -+ addr = $4; -+ getline; -+ if (/CODE/) -+ secs[snam] = addr; -+ -+ next; -+ } -+ -+ phase == 1 && $NF ~ /_(stext|_init_(begin|end))$/ { -+ print ". " $1 " B " $NF; -+ next; -+ } -+ -+ phase == 1 && / F / { -+ if ($4 ~ /^\.(exit|init|meminit)\.text/) -+ next; -+ -+ off = subl($1, secs[$4]); -+ -+ sym = $NF; -+ scnt[sym]++; -+ if (scnt[sym] > 1) -+ sym = sym"-"(scnt[sym] - 1); -+ -+ # section and name -+ id = $4 " " sym; -+ if (id in smap) { -+ if (smap[id] != $1) -+ smap[id] = 0; -+ } else -+ smap[id] = $1; -+ -+ # name -+ id = sym; -+ if (id in smap) { -+ if (smap[id] != $1) -+ smap[id] = 0; -+ } else -+ smap[id] = $1; -+ -+ next; -+ } -+ -+ phase == 2 && / F / { -+ if ($4 ~ /^\.(exit|init|meminit)\.text/) -+ next; -+ -+ sym = $NF; -+ scnt[sym]++; -+ if (scnt[sym] > 1) -+ sym = sym"-"(scnt[sym] - 1); -+ -+ # section and name -+ id = $4 " " sym; -+ if (!(id in smap)) -+ id = sym; -+ # name -+ if (id in smap) { -+ addr = smap[id]; -+ if (!addr) -+ print "ERROR: Non-unique symbol: " $4 " " $6 " " $1 " ["sym"]"; -+ } else { -+ print "ERROR: Could not find " $4 " " $6 " " $1 " ["sym"]"; -+ addr = 0; -+ } -+ -+ print $4 " " $1 " F " $6 " " addr " " secs[$4]; -+ next; -+ } -+ -+ /^RELOC/ { -+ sect = substr($4, 2, length($4) - 3); -+ next; -+ } -+ -+ sect ~ /^\.(exit|init|meminit)\.text/ { -+ next; -+ } -+ -+ sect && /__dtrace_probe_/ { -+ $3 = substr($3, 16); -+ sub(/[\-+].*$/, "", $3); -+ print sect " " $1 " R " $3; -+ next; -+ } -+ -+ sect && /__dtrace_isenabled_/ { -+ $3 = substr($3, 20); -+ sub(/[\-+].*$/, "", $3); -+ print sect " " $1 " R ?" $3; -+ next; -+ }' | \ -+ sort -u | \ -+ awk 'function addl(v0, v1, v0h, v0l, v1h, v1l, d, tmp) { -+ tmp = $0; -+ if (length(v0) > 8 || length(v1) > 8) { -+ d = length(v0); -+ v0h = strtonum("0x"substr(v0, 1, d - 8)); -+ v0l = strtonum("0x"substr(v0, d - 8 + 1)); -+ d = length(v1); -+ v1h = strtonum("0x"substr(v1, 1, d - 8)); -+ v1l = strtonum("0x"substr(v1, d - 8 + 1)); -+ -+ v0l += v1l; -+ v0h += v1h; -+ d = sprintf("%x", v0l); -+ if (length(d) > 8) { -+ v0h++; -+ v0l -= 4294967296; -+ } -+ d = sprintf("%x", v0h); -+ if (length(d) <= 8) { -+ d = sprintf("%08x%08x", v0h, v0l); -+ } else { -+ printf "#error [stage 2.a] Invalid addresses: %s + %s\n", v0, v1 \ -+ >"/dev/stderr"; -+ errc++; -+ } -+ } else { -+ v0 = strtonum("0x"v0); -+ v1 = strtonum("0x"v1); -+ d = sprintf("%016x", v0 + v1); -+ } -+ $0 = tmp; -+ -+ return d; -+ } -+ -+ function subl(v0, v1, v0h, v0l, v1h, v1l, d, tmp) { -+ tmp = $0; -+ if (length(v0) > 8) { -+ d = length(v0); -+ v0h = strtonum("0x"substr(v0, 1, d - 8)); -+ v0l = strtonum("0x"substr(v0, d - 8 + 1)); -+ d = length(v1); -+ v1h = strtonum("0x"substr(v1, 1, d - 8)); -+ v1l = strtonum("0x"substr(v1, d - 8 + 1)); -+ -+ if (v0l >= v1l) { -+ if (v0h >= v1h) { -+ d = sprintf("%08x%08x", v0h - v1h, v0l - v1l); -+ } else { -+ printf "#error [stage 2.b] Invalid addresses: %s - %s\n", v0, v1 \ -+ >"/dev/stderr"; -+ errc++; -+ } -+ } else { -+ if (v0h > v1h) { -+ v0h--; -+ v0l += 4294967296; -+ d = sprintf("%08x%08x", v0h - v1h, v0l - v1l); -+ } else { -+ printf "#error [stage 2.c] Invalid addresses: %s - %s\n", v0, v1 \ -+ >"/dev/stderr"; -+ errc++; -+ } -+ } -+ } else { -+ v0 = strtonum("0x"v0); -+ v1 = strtonum("0x"v1); -+ d = sprintf("%016x", v0 - v1); -+ } -+ $0 = tmp; -+ -+ return d; -+ } -+ -+ function map_string(str, off) { -+ if (str in strmap) -+ off = strmap[str]; -+ else { -+ off = strsz; -+ strmap[str] = strsz; -+ strv[strc++] = str; -+ strsz += length(str) + 1; -+ } -+ -+ return off; -+ } -+ -+ BEGIN { -+ print "#include <asm/types.h>"; -+ print "#if BITS_PER_LONG == 64"; -+ print "# define PTR .quad"; -+ if (arch == "aarch64") -+ print "# define ALGN .align 3"; -+ else -+ print "# define ALGN .align 8"; -+ print "#else"; -+ print "# define PTR .long"; -+ if (arch == "aarch64") -+ print "# define ALGN .align 2"; -+ else -+ print "# define ALGN .align 4"; -+ print "#endif"; -+ -+ print "\t.section .rodata, \042a\042"; -+ print ""; -+ -+ print ".globl dtrace_sdt_probes"; -+ print "\tALGN"; -+ print "dtrace_sdt_probes:"; -+ -+ probec = 0; -+ stroff = 0; -+ strc = 0; -+ } -+ -+ $1 == "ERROR:" { -+ next; -+ } -+ -+ $4 == "_stext" { -+ stext = $2; -+ next; -+ } -+ -+ $4 == "__init_begin" { -+ init_beg = $2; -+ next; -+ } -+ -+ $4 == "__init_end" { -+ init_end = $2; -+ next; -+ } -+ -+ $3 == "F" { -+ fnam = $4; -+ sub(/\..*$/, "", fnam); -+ foff = $2; -+ fadr = $5; -+ -+ if (fadr != padr) -+ funcc++; -+ padr = fadr; -+ -+ next; -+ } -+ -+ $3 == "R" { -+ addr = addl(fadr, subl($2, foff)); -+ if (addr >= init_beg && addr <= init_end) -+ next; -+ addr = subl(addr, stext); -+ -+ print "/*"; -+ print " * " $1 " " foff " F " fnam " " fadr; -+ print " * " $0; -+ print " * (" fadr " + (" $2 " - " foff ")) - " stext; -+ print " */"; -+ printf "\tPTR\t_stext + 0x%s\n", addr; -+ printf "\tPTR\t%d\n", map_string($4); -+ printf "\tPTR\t%d\n", map_string(fnam); -+ -+ probec++; -+ -+ next; -+ } -+ -+ END { -+ print ""; -+ print ".globl dtrace_sdt_strings"; -+ print "\tALGN"; -+ print "dtrace_sdt_strings:"; -+ -+ -+ for (i = 0; i < strc; i++) -+ printf "\t.asciz\t\042%s\042\n", strv[i]; -+ -+ print ""; -+ print ".globl dtrace_sdt_nprobes"; -+ print ".globl dtrace_fbt_nfuncs"; -+ print "\tALGN"; -+ print "dtrace_sdt_nprobes:"; -+ printf "\tPTR\t%d\n", probec; -+ print "dtrace_fbt_nfuncs:"; -+ printf "\tPTR\t%d\n", funcc; -+ -+ exit(errc == 0 ? 0 : 1); -+ }' > ${tfn} -+ -+exit $? -diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh -index 86540859ba94..024cce3b3c3d 100755 ---- a/scripts/link-vmlinux.sh -+++ b/scripts/link-vmlinux.sh -@@ -59,7 +59,12 @@ sdtinfo() - { - info SDTINF ${2} - -- ${srctree}/scripts/dtrace_sdt.sh sdtinfo .tmp_sdtinfo.S ${1} -+ if [ -n "${CONFIG_ARM64}" ]; then -+ ${srctree}/scripts/dtrace_sdt_arm64.sh sdtinfo .tmp_sdtinfo.S \ -+ ${1} ${3} -+ else -+ ${srctree}/scripts/dtrace_sdt.sh sdtinfo .tmp_sdtinfo.S ${1} -+ fi - - local aflags="${KBUILD_AFLAGS} ${KBUILD_AFLAGS_KERNEL} \ - ${NOSTDINC_FLAGS} ${LINUXINCLUDE} ${KBUILD_CPPFLAGS}" -@@ -358,16 +363,14 @@ if [ -n "${CONFIG_KALLSYMS}" ]; then - - # step 1 - if [ -n "${CONFIG_DTRACE}" ]; then -- sdtinfo vmlinux.o ${sdtinfoo} -+ sdtinfo vmlinux.o ${sdtinfoo} vmlinux.o - fi - - kallsyms_step 1 - - if [ -n "${CONFIG_DTRACE}" ]; then -- if [ -n "${CONFIG_ARM64}" ]; then -- kallsyms_step 1 -- else -- kallsyms_step 1 -r -+ if [ -n "${CONFIG_X86_64}" ]; then -+ kallsyms_step 1 --emit-relocs - fi - sdtinfo ${kallsyms_vmlinux} ${sdtinfoo} vmlinux.o - fi --- -2.27.0 - diff --git a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0017-dtrace-add-SDT-probes.patch b/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0017-dtrace-add-SDT-probes.patch deleted file mode 100644 index 26c66b35599c..000000000000 --- a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0017-dtrace-add-SDT-probes.patch +++ /dev/null @@ -1,3303 +0,0 @@ -From 63473b4888e3b4a6dbd0d9209ca7faade23ef3b9 Mon Sep 17 00:00:00 2001 -From: Kris Van Hees <kris.van.hees@oracle.com> -Date: Thu, 8 Nov 2018 18:59:39 +0000 -Subject: [PATCH 17/20] dtrace: add SDT probes - -This adds a variety of SDT probes. - -XXX add documentation here from the commit messages - -Signed-off-by: Nick Alcock <nick.alcock@oracle.com> -Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com> -Signed-off-by: Tomas Jedlicka <tomas.jedlicka@oracle.com> -Signed-off-by: Eugene Loh <eugene.loh@oracle.com> -Signed-off-by: Alan Maguire <alan.maguire@oracle.com> -Signed-off-by: David Mc Lean <david.mclean@oracle.com> -Signed-off-by: Vincent Lim <vincent.lim@oracle.com> ---- - block/bio.c | 7 ++ - block/blk-core.c | 4 + - fs/exec.c | 11 +- - fs/nfs/internal.h | 14 +++ - fs/nfs/read.c | 3 + - fs/nfs/write.c | 2 + - fs/xfs/xfs_buf.c | 19 ++++ - include/linux/rwlock_api_smp.h | 38 +++++++ - include/linux/spinlock_api_smp.h | 12 ++ - kernel/exit.c | 5 + - kernel/fork.c | 3 + - kernel/locking/mutex.c | 52 ++++++++- - kernel/locking/qrwlock.c | 24 +++- - kernel/locking/qspinlock.c | 20 +++- - kernel/sched/core.c | 31 ++++- - kernel/signal.c | 31 ++++- - kernel/time/timer.c | 4 + - net/ipv4/ip_input.c | 66 +++++++++-- - net/ipv4/ip_output.c | 71 +++++++++++- - net/ipv4/raw.c | 49 ++++++-- - net/ipv4/tcp.c | 26 +++++ - net/ipv4/tcp_input.c | 43 +++++++ - net/ipv4/tcp_ipv4.c | 114 ++++++++++++++++++- - net/ipv4/tcp_minisocks.c | 15 +++ - net/ipv4/tcp_output.c | 29 +++++ - net/ipv4/udp.c | 26 ++++- - net/ipv6/ip6_input.c | 103 ++++++++++++++--- - net/ipv6/ip6_output.c | 187 ++++++++++++++++++++++++++----- - net/ipv6/mcast.c | 72 +++++++++--- - net/ipv6/ndisc.c | 9 ++ - net/ipv6/output_core.c | 9 ++ - net/ipv6/raw.c | 38 ++++++- - net/ipv6/tcp_ipv6.c | 106 +++++++++++++++++- - net/ipv6/udp.c | 26 ++++- - 34 files changed, 1156 insertions(+), 113 deletions(-) - -diff --git a/block/bio.c b/block/bio.c -index 94d697217887..f09aeed3822e 100644 ---- a/block/bio.c -+++ b/block/bio.c -@@ -1024,7 +1024,11 @@ int submit_bio_wait(struct bio *bio) - bio->bi_end_io = submit_bio_wait_endio; - bio->bi_opf |= REQ_SYNC; - submit_bio(bio); -+ DTRACE_IO(wait__start, struct bio * : (bufinfo_t *, devinfo_t *), bio, -+ struct file * : fileinfo_t *, NULL); - wait_for_completion_io(&done); -+ DTRACE_IO(wait__done, struct bio * : (bufinfo_t *, devinfo_t *), bio, -+ struct file * : fileinfo_t *, NULL); - - return blk_status_to_errno(bio->bi_status); - } -@@ -1866,6 +1870,9 @@ void bio_endio(struct bio *bio) - } - - blk_throtl_bio_endio(bio); -+ DTRACE_IO(done, struct bio * : -+ (bufinfo_t *, devinfo_t *), bio, -+ struct file * : fileinfo_t *, NULL); - /* release cgroup info */ - bio_uninit(bio); - if (bio->bi_end_io) -diff --git a/block/blk-core.c b/block/blk-core.c -index 60dc9552ef8d..196a726e7b6e 100644 ---- a/block/blk-core.c -+++ b/block/blk-core.c -@@ -974,11 +974,15 @@ generic_make_request_checks(struct bio *bio) - */ - bio_set_flag(bio, BIO_TRACE_COMPLETION); - } -+ DTRACE_IO(start, struct bio * : (bufinfo_t *, devinfo_t *), bio, -+ struct file * : fileinfo_t *, NULL); - return true; - - not_supported: - status = BLK_STS_NOTSUPP; - end_io: -+ DTRACE_IO(start, struct bio * : (bufinfo_t *, devinfo_t *), bio, -+ struct file * : fileinfo_t *, NULL); - bio->bi_status = status; - bio_endio(bio); - return false; -diff --git a/fs/exec.c b/fs/exec.c -index 4bc77cee8f77..a853b39e9693 100644 ---- a/fs/exec.c -+++ b/fs/exec.c -@@ -62,6 +62,7 @@ - #include <linux/oom.h> - #include <linux/compat.h> - #include <linux/vmalloc.h> -+#include <linux/sdt.h> - #include <linux/dtrace_os.h> - - #include <linux/uaccess.h> -@@ -1764,8 +1765,10 @@ static int __do_execve_file(int fd, struct filename *filename, - check_unsafe_exec(bprm); - current->in_execve = 1; - -- if (!file) -+ if (!file) { - file = do_open_execat(fd, filename, flags); -+ DTRACE_PROC(exec, char *, filename->name); -+ } - retval = PTR_ERR(file); - if (IS_ERR(file)) - goto out_unmark; -@@ -1843,6 +1846,8 @@ static int __do_execve_file(int fd, struct filename *filename, - putname(filename); - if (displaced) - put_files_struct(displaced); -+ -+ DTRACE_PROC(exec__success); - return retval; - - out: -@@ -1863,8 +1868,10 @@ static int __do_execve_file(int fd, struct filename *filename, - if (displaced) - reset_files_struct(displaced); - out_ret: -- if (filename) -+ if (filename) { - putname(filename); -+ DTRACE_PROC(exec__failure, int, retval); -+ } - return retval; - } - -diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h -index f80c47d5ff27..b6931f8c3025 100644 ---- a/fs/nfs/internal.h -+++ b/fs/nfs/internal.h -@@ -10,6 +10,7 @@ - #include <linux/sunrpc/addr.h> - #include <linux/nfs_page.h> - #include <linux/wait_bit.h> -+#include <linux/sdt.h> - - #define NFS_SB_MASK (SB_RDONLY|SB_NOSUID|SB_NODEV|SB_NOEXEC|SB_SYNCHRONOUS) - -@@ -790,3 +791,16 @@ static inline void nfs_set_port(struct sockaddr *sap, int *port, - - rpc_set_port(sap, *port); - } -+ -+#define DTRACE_IO_NFS(name, rw, size, inode) \ -+ if (DTRACE_IO_ENABLED(name)) { \ -+ struct bio bio __maybe_unused = { \ -+ .bi_opf = rw, \ -+ .bi_flags = (1 << BIO_USER_MAPPED), \ -+ .bi_iter.bi_size = size, \ -+ .bi_iter.bi_sector = NFS_FILEID(inode), \ -+ }; \ -+ DTRACE_IO(name, struct bio * : (bufinfo_t *, \ -+ devinfo_t *), &bio, \ -+ struct file * : fileinfo_t *, NULL); \ -+} -diff --git a/fs/nfs/read.c b/fs/nfs/read.c -index 34bb9add2302..6b830d9943ac 100644 ---- a/fs/nfs/read.c -+++ b/fs/nfs/read.c -@@ -212,6 +212,8 @@ static void nfs_initiate_read(struct nfs_pgio_header *hdr, - struct inode *inode = hdr->inode; - int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; - -+ DTRACE_IO_NFS(start, REQ_OP_READ, hdr->args.count, hdr->inode); -+ - task_setup_data->flags |= swap_flags; - rpc_ops->read_setup(hdr, msg); - trace_nfs_initiate_read(hdr); -@@ -243,6 +245,7 @@ static int nfs_readpage_done(struct rpc_task *task, - struct inode *inode) - { - int status = NFS_PROTO(inode)->read_done(task, hdr); -+ DTRACE_IO_NFS(done, REQ_OP_READ, hdr->res.count, hdr->inode); - if (status != 0) - return status; - -diff --git a/fs/nfs/write.c b/fs/nfs/write.c -index 38abd130528a..e74c009104e7 100644 ---- a/fs/nfs/write.c -+++ b/fs/nfs/write.c -@@ -1416,6 +1416,7 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr, - task_setup_data->priority = priority; - rpc_ops->write_setup(hdr, msg, &task_setup_data->rpc_client); - trace_nfs_initiate_write(hdr); -+ DTRACE_IO_NFS(start, REQ_OP_WRITE, hdr->args.count, hdr->inode); - } - - /* If a nfs_flush_* function fails, it should remove reqs from @head and -@@ -1575,6 +1576,7 @@ static int nfs_writeback_done(struct rpc_task *task, - * depend on tighter cache coherency when writing. - */ - status = NFS_PROTO(inode)->write_done(task, hdr); -+ DTRACE_IO_NFS(done, REQ_OP_WRITE, hdr->res.count, hdr->inode); - if (status != 0) - return status; - -diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c -index 217e4f82a44a..a69039b6174e 100644 ---- a/fs/xfs/xfs_buf.c -+++ b/fs/xfs/xfs_buf.c -@@ -49,6 +49,21 @@ static kmem_zone_t *xfs_buf_zone; - * b_lock (trylock due to inversion) - */ - -+#define DTRACE_IO_XFS_WAIT(name, bp, is_write) \ -+ if (DTRACE_IO_ENABLED(name)) { \ -+ struct bio bio __maybe_unused = { \ -+ .bi_iter.bi_sector = (bp)->b_bn, \ -+ .bi_iter.bi_size = (bp)->b_length, \ -+ .bi_opf = is_write ? \ -+ REQ_OP_WRITE : REQ_OP_READ, \ -+ .bi_disk = (bp)->b_target->bt_bdev->bd_disk, \ -+ .bi_partno = (bp)->b_target->bt_bdev->bd_partno,\ -+ }; \ -+ DTRACE_IO(name, struct bio * : (bufinfo_t *, \ -+ devinfo_t *), &bio, \ -+ struct file * : fileinfo_t *, NULL); \ -+ } -+ - static inline int - xfs_buf_is_vmapped( - struct xfs_buf *bp) -@@ -1448,10 +1463,14 @@ static int - xfs_buf_iowait( - struct xfs_buf *bp) - { -+ int orig_flags __attribute__((unused)) = bp->b_flags; -+ - ASSERT(!(bp->b_flags & XBF_ASYNC)); - - trace_xfs_buf_iowait(bp, _RET_IP_); -+ DTRACE_IO_XFS_WAIT(wait__start, bp, orig_flags & XBF_WRITE); - wait_for_completion(&bp->b_iowait); -+ DTRACE_IO_XFS_WAIT(wait__done, bp, orig_flags & XBF_WRITE); - trace_xfs_buf_iowait_done(bp, _RET_IP_); - - return bp->b_error; -diff --git a/include/linux/rwlock_api_smp.h b/include/linux/rwlock_api_smp.h -index abfb53ab11be..2531929ccb58 100644 ---- a/include/linux/rwlock_api_smp.h -+++ b/include/linux/rwlock_api_smp.h -@@ -5,6 +5,8 @@ - # error "please don't include this file directly" - #endif - -+#include <linux/sdt.h> -+ - /* - * include/linux/rwlock_api_smp.h - * -@@ -119,6 +121,8 @@ static inline int __raw_read_trylock(rwlock_t *lock) - preempt_disable(); - if (do_raw_read_trylock(lock)) { - rwlock_acquire_read(&lock->dep_map, 0, 1, _RET_IP_); -+ DTRACE_LOCKSTAT(rw__acquire, struct rwlock *, lock, int, -+ DTRACE_LOCKSTAT_RW_READER); - return 1; - } - preempt_enable(); -@@ -130,6 +134,8 @@ static inline int __raw_write_trylock(rwlock_t *lock) - preempt_disable(); - if (do_raw_write_trylock(lock)) { - rwlock_acquire(&lock->dep_map, 0, 1, _RET_IP_); -+ DTRACE_LOCKSTAT(rw__acquire, struct rwlock *, lock, int, -+ DTRACE_LOCKSTAT_RW_WRITER); - return 1; - } - preempt_enable(); -@@ -148,6 +154,8 @@ static inline void __raw_read_lock(rwlock_t *lock) - preempt_disable(); - rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED(lock, do_raw_read_trylock, do_raw_read_lock); -+ DTRACE_LOCKSTAT(rw__acquire, struct rwlock *, lock, int, -+ DTRACE_LOCKSTAT_RW_READER); - } - - static inline unsigned long __raw_read_lock_irqsave(rwlock_t *lock) -@@ -159,6 +167,8 @@ static inline unsigned long __raw_read_lock_irqsave(rwlock_t *lock) - rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED_FLAGS(lock, do_raw_read_trylock, do_raw_read_lock, - do_raw_read_lock_flags, &flags); -+ DTRACE_LOCKSTAT(rw__acquire, struct rwlock *, lock, int, -+ DTRACE_LOCKSTAT_RW_READER); - return flags; - } - -@@ -168,6 +178,8 @@ static inline void __raw_read_lock_irq(rwlock_t *lock) - preempt_disable(); - rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED(lock, do_raw_read_trylock, do_raw_read_lock); -+ DTRACE_LOCKSTAT(rw__acquire, struct rwlock *, lock, int, -+ DTRACE_LOCKSTAT_RW_READER); - } - - static inline void __raw_read_lock_bh(rwlock_t *lock) -@@ -175,6 +187,8 @@ static inline void __raw_read_lock_bh(rwlock_t *lock) - __local_bh_disable_ip(_RET_IP_, SOFTIRQ_LOCK_OFFSET); - rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED(lock, do_raw_read_trylock, do_raw_read_lock); -+ DTRACE_LOCKSTAT(rw__acquire, struct rwlock *, lock, int, -+ DTRACE_LOCKSTAT_RW_READER); - } - - static inline unsigned long __raw_write_lock_irqsave(rwlock_t *lock) -@@ -186,6 +200,8 @@ static inline unsigned long __raw_write_lock_irqsave(rwlock_t *lock) - rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED_FLAGS(lock, do_raw_write_trylock, do_raw_write_lock, - do_raw_write_lock_flags, &flags); -+ DTRACE_LOCKSTAT(rw__acquire, struct rwlock *, lock, int, -+ DTRACE_LOCKSTAT_RW_WRITER); - return flags; - } - -@@ -195,6 +211,8 @@ static inline void __raw_write_lock_irq(rwlock_t *lock) - preempt_disable(); - rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED(lock, do_raw_write_trylock, do_raw_write_lock); -+ DTRACE_LOCKSTAT(rw__acquire, struct rwlock *, lock, int, -+ DTRACE_LOCKSTAT_RW_WRITER); - } - - static inline void __raw_write_lock_bh(rwlock_t *lock) -@@ -202,6 +220,8 @@ static inline void __raw_write_lock_bh(rwlock_t *lock) - __local_bh_disable_ip(_RET_IP_, SOFTIRQ_LOCK_OFFSET); - rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED(lock, do_raw_write_trylock, do_raw_write_lock); -+ DTRACE_LOCKSTAT(rw__acquire, struct rwlock *, lock, int, -+ DTRACE_LOCKSTAT_RW_WRITER); - } - - static inline void __raw_write_lock(rwlock_t *lock) -@@ -209,6 +229,8 @@ static inline void __raw_write_lock(rwlock_t *lock) - preempt_disable(); - rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED(lock, do_raw_write_trylock, do_raw_write_lock); -+ DTRACE_LOCKSTAT(rw__acquire, struct rwlock *, lock, int, -+ DTRACE_LOCKSTAT_RW_WRITER); - } - - #endif /* !CONFIG_GENERIC_LOCKBREAK || CONFIG_DEBUG_LOCK_ALLOC */ -@@ -217,6 +239,8 @@ static inline void __raw_write_unlock(rwlock_t *lock) - { - rwlock_release(&lock->dep_map, _RET_IP_); - do_raw_write_unlock(lock); -+ DTRACE_LOCKSTAT(rw__release, struct rwlock *, lock, int, -+ DTRACE_LOCKSTAT_RW_WRITER); - preempt_enable(); - } - -@@ -224,6 +248,8 @@ static inline void __raw_read_unlock(rwlock_t *lock) - { - rwlock_release(&lock->dep_map, _RET_IP_); - do_raw_read_unlock(lock); -+ DTRACE_LOCKSTAT(rw__release, struct rwlock *, lock, int, -+ DTRACE_LOCKSTAT_RW_READER); - preempt_enable(); - } - -@@ -232,6 +258,8 @@ __raw_read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) - { - rwlock_release(&lock->dep_map, _RET_IP_); - do_raw_read_unlock(lock); -+ DTRACE_LOCKSTAT(rw__release, struct rwlock *, lock, int, -+ DTRACE_LOCKSTAT_RW_READER); - local_irq_restore(flags); - preempt_enable(); - } -@@ -240,6 +268,8 @@ static inline void __raw_read_unlock_irq(rwlock_t *lock) - { - rwlock_release(&lock->dep_map, _RET_IP_); - do_raw_read_unlock(lock); -+ DTRACE_LOCKSTAT(rw__release, struct rwlock *, lock, int, -+ DTRACE_LOCKSTAT_RW_READER); - local_irq_enable(); - preempt_enable(); - } -@@ -248,6 +278,8 @@ static inline void __raw_read_unlock_bh(rwlock_t *lock) - { - rwlock_release(&lock->dep_map, _RET_IP_); - do_raw_read_unlock(lock); -+ DTRACE_LOCKSTAT(rw__release, struct rwlock *, lock, int, -+ DTRACE_LOCKSTAT_RW_READER); - __local_bh_enable_ip(_RET_IP_, SOFTIRQ_LOCK_OFFSET); - } - -@@ -256,6 +288,8 @@ static inline void __raw_write_unlock_irqrestore(rwlock_t *lock, - { - rwlock_release(&lock->dep_map, _RET_IP_); - do_raw_write_unlock(lock); -+ DTRACE_LOCKSTAT(rw__release, struct rwlock *, lock, int, -+ DTRACE_LOCKSTAT_RW_WRITER); - local_irq_restore(flags); - preempt_enable(); - } -@@ -264,6 +298,8 @@ static inline void __raw_write_unlock_irq(rwlock_t *lock) - { - rwlock_release(&lock->dep_map, _RET_IP_); - do_raw_write_unlock(lock); -+ DTRACE_LOCKSTAT(rw__release, struct rwlock *, lock, int, -+ DTRACE_LOCKSTAT_RW_WRITER); - local_irq_enable(); - preempt_enable(); - } -@@ -272,6 +308,8 @@ static inline void __raw_write_unlock_bh(rwlock_t *lock) - { - rwlock_release(&lock->dep_map, _RET_IP_); - do_raw_write_unlock(lock); -+ DTRACE_LOCKSTAT(rw__release, struct rwlock *, lock, int, -+ DTRACE_LOCKSTAT_RW_WRITER); - __local_bh_enable_ip(_RET_IP_, SOFTIRQ_LOCK_OFFSET); - } - -diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h -index 19a9be9d97ee..e7601d01b87d 100644 ---- a/include/linux/spinlock_api_smp.h -+++ b/include/linux/spinlock_api_smp.h -@@ -5,6 +5,8 @@ - # error "please don't include this file directly" - #endif - -+#include <linux/sdt.h> -+ - /* - * include/linux/spinlock_api_smp.h - * -@@ -88,6 +90,7 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lock) - preempt_disable(); - if (do_raw_spin_trylock(lock)) { - spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); -+ DTRACE_LOCKSTAT(spin__acquire, spinlock_t *, lock); - return 1; - } - preempt_enable(); -@@ -118,6 +121,7 @@ static inline unsigned long __raw_spin_lock_irqsave(raw_spinlock_t *lock) - #else - do_raw_spin_lock_flags(lock, &flags); - #endif -+ DTRACE_LOCKSTAT(spin__acquire, spinlock_t *, lock); - return flags; - } - -@@ -127,6 +131,7 @@ static inline void __raw_spin_lock_irq(raw_spinlock_t *lock) - preempt_disable(); - spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock); -+ DTRACE_LOCKSTAT(spin__acquire, spinlock_t *, lock); - } - - static inline void __raw_spin_lock_bh(raw_spinlock_t *lock) -@@ -134,6 +139,7 @@ static inline void __raw_spin_lock_bh(raw_spinlock_t *lock) - __local_bh_disable_ip(_RET_IP_, SOFTIRQ_LOCK_OFFSET); - spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock); -+ DTRACE_LOCKSTAT(spin__acquire, spinlock_t *, lock); - } - - static inline void __raw_spin_lock(raw_spinlock_t *lock) -@@ -141,6 +147,7 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock) - preempt_disable(); - spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock); -+ DTRACE_LOCKSTAT(spin__acquire, spinlock_t *, lock); - } - - #endif /* !CONFIG_GENERIC_LOCKBREAK || CONFIG_DEBUG_LOCK_ALLOC */ -@@ -149,6 +156,7 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock) - { - spin_release(&lock->dep_map, _RET_IP_); - do_raw_spin_unlock(lock); -+ DTRACE_LOCKSTAT(spin__release, spinlock_t *, lock); - preempt_enable(); - } - -@@ -157,6 +165,7 @@ static inline void __raw_spin_unlock_irqrestore(raw_spinlock_t *lock, - { - spin_release(&lock->dep_map, _RET_IP_); - do_raw_spin_unlock(lock); -+ DTRACE_LOCKSTAT(spin__release, spinlock_t *, lock); - local_irq_restore(flags); - preempt_enable(); - } -@@ -165,6 +174,7 @@ static inline void __raw_spin_unlock_irq(raw_spinlock_t *lock) - { - spin_release(&lock->dep_map, _RET_IP_); - do_raw_spin_unlock(lock); -+ DTRACE_LOCKSTAT(spin__release, spinlock_t *, lock); - local_irq_enable(); - preempt_enable(); - } -@@ -173,6 +183,7 @@ static inline void __raw_spin_unlock_bh(raw_spinlock_t *lock) - { - spin_release(&lock->dep_map, _RET_IP_); - do_raw_spin_unlock(lock); -+ DTRACE_LOCKSTAT(spin__release, spinlock_t *, lock); - __local_bh_enable_ip(_RET_IP_, SOFTIRQ_LOCK_OFFSET); - } - -@@ -181,6 +192,7 @@ static inline int __raw_spin_trylock_bh(raw_spinlock_t *lock) - __local_bh_disable_ip(_RET_IP_, SOFTIRQ_LOCK_OFFSET); - if (do_raw_spin_trylock(lock)) { - spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); -+ DTRACE_LOCKSTAT(spin__acquire, spinlock_t *, lock); - return 1; - } - __local_bh_enable_ip(_RET_IP_, SOFTIRQ_LOCK_OFFSET); -diff --git a/kernel/exit.c b/kernel/exit.c -index 145e1fbe63c9..26ad800572a0 100644 ---- a/kernel/exit.c -+++ b/kernel/exit.c -@@ -63,6 +63,7 @@ - #include <linux/random.h> - #include <linux/rcuwait.h> - #include <linux/compat.h> -+#include <linux/sdt.h> - #include <linux/dtrace_os.h> - - #include <linux/uaccess.h> -@@ -786,6 +787,10 @@ void __noreturn do_exit(long code) - tsk->exit_code = code; - taskstats_exit(tsk, group_dead); - -+ DTRACE_PROC(lwp__exit); -+ if (group_dead) -+ DTRACE_PROC(exit, int, code & 0x80 ? 3 : code & 0x7f ? 2 : 1); -+ - /* Remove DTrace state for this task */ - dtrace_task_free(tsk); - -diff --git a/kernel/fork.c b/kernel/fork.c -index d6e085e5b6d7..a4a3b11bf23d 100644 ---- a/kernel/fork.c -+++ b/kernel/fork.c -@@ -94,6 +94,7 @@ - #include <linux/thread_info.h> - #include <linux/stackleak.h> - #include <linux/kasan.h> -+#include <linux/sdt.h> - #include <linux/dtrace_task_impl.h> - - #include <asm/pgtable.h> -@@ -2487,6 +2488,8 @@ long _do_fork(struct kernel_clone_args *args) - } - - put_pid(pid); -+ DTRACE_PROC(lwp__create, struct task_struct * : (lwpsinfo_t *, psinfo_t *), p); -+ DTRACE_PROC(create, struct task_struct * : psinfo_t *, p); - return nr; - } - -diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c -index 5352ce50a97e..e784dd89d924 100644 ---- a/kernel/locking/mutex.c -+++ b/kernel/locking/mutex.c -@@ -29,6 +29,7 @@ - #include <linux/interrupt.h> - #include <linux/debug_locks.h> - #include <linux/osq_lock.h> -+#include <linux/sdt.h> - - #ifdef CONFIG_DEBUG_MUTEXES - # include "mutex-debug.h" -@@ -282,6 +283,7 @@ void __sched mutex_lock(struct mutex *lock) - - if (!__mutex_trylock_fast(lock)) - __mutex_lock_slowpath(lock); -+ DTRACE_LOCKSTAT(adaptive__acquire, struct mutex *, lock); - } - EXPORT_SYMBOL(mutex_lock); - #endif -@@ -734,10 +736,14 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne - void __sched mutex_unlock(struct mutex *lock) - { - #ifndef CONFIG_DEBUG_LOCK_ALLOC -- if (__mutex_unlock_fast(lock)) -+ if (__mutex_unlock_fast(lock)) { -+ DTRACE_LOCKSTAT(adaptive__release, struct mutex *, lock); - return; -+ } - #endif -+ - __mutex_unlock_slowpath(lock, _RET_IP_); -+ DTRACE_LOCKSTAT(adaptive__release, struct mutex *, lock); - } - EXPORT_SYMBOL(mutex_unlock); - -@@ -927,6 +933,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, - struct lockdep_map *nest_lock, unsigned long ip, - struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx) - { -+ u64 spinstart = 0, spinend, spintotal = 0; -+ u64 waitstart, waitend, waittotal = 0; - struct mutex_waiter waiter; - bool first = false; - struct ww_mutex *ww; -@@ -958,9 +966,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, - if (__mutex_trylock(lock) || - mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, NULL)) { - /* got the lock, yay! */ -+ - lock_acquired(&lock->dep_map, ip); - if (use_ww_ctx && ww_ctx) - ww_mutex_set_context_fastpath(ww, ww_ctx); -+ DTRACE_LOCKSTAT(adaptive__acquire, struct mutex *, lock); - preempt_enable(); - return 0; - } -@@ -1003,6 +1013,9 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, - waiter.task = current; - - set_current_state(state); -+ if (DTRACE_LOCKSTAT_ENABLED(adaptive__spin)) -+ spinstart = dtrace_gethrtime_ns(); -+ - for (;;) { - /* - * Once we hold wait_lock, we're serialized against -@@ -1030,7 +1043,15 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, - } - - spin_unlock(&lock->wait_lock); -- schedule_preempt_disabled(); -+ -+ if (DTRACE_LOCKSTAT_ENABLED(adaptive__block)) { -+ waitstart = dtrace_gethrtime_ns(); -+ schedule_preempt_disabled(); -+ waitend = dtrace_gethrtime_ns(); -+ if (waitend > waitstart) -+ waittotal += waitend - waitstart; -+ } else -+ schedule_preempt_disabled(); - - /* - * ww_mutex needs to always recheck its position since its waiter -@@ -1082,6 +1103,19 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, - ww_mutex_lock_acquired(ww, ww_ctx); - - spin_unlock(&lock->wait_lock); -+ -+ if (DTRACE_LOCKSTAT_ENABLED(adaptive__spin) && spinstart) { -+ spinend = dtrace_gethrtime_ns(); -+ spintotal = (spinend > spinstart) ? (spinend - spinstart) : 0; -+ spintotal = (spintotal > waittotal) ? -+ (spintotal - waittotal) : 0; -+ DTRACE_LOCKSTAT(adaptive__spin, struct mutex *, lock, -+ uint64_t, spintotal); -+ } -+ if (DTRACE_LOCKSTAT_ENABLED(adaptive__block) && waittotal) -+ DTRACE_LOCKSTAT(adaptive__block, struct mutex *, lock, -+ uint64_t, waittotal); -+ DTRACE_LOCKSTAT(adaptive__acquire, struct mutex *, lock); - preempt_enable(); - return 0; - -@@ -1092,6 +1126,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, - spin_unlock(&lock->wait_lock); - debug_mutex_free_waiter(&waiter); - mutex_release(&lock->dep_map, ip); -+ DTRACE_LOCKSTAT(adaptive__acquire__error, struct mutex *, lock, -+ int, ret); - preempt_enable(); - return ret; - } -@@ -1307,8 +1343,10 @@ int __sched mutex_lock_interruptible(struct mutex *lock) - { - might_sleep(); - -- if (__mutex_trylock_fast(lock)) -+ if (__mutex_trylock_fast(lock)) { -+ DTRACE_LOCKSTAT(adaptive__acquire, struct mutex *, lock); - return 0; -+ } - - return __mutex_lock_interruptible_slowpath(lock); - } -@@ -1331,8 +1369,10 @@ int __sched mutex_lock_killable(struct mutex *lock) - { - might_sleep(); - -- if (__mutex_trylock_fast(lock)) -+ if (__mutex_trylock_fast(lock)) { -+ DTRACE_LOCKSTAT(adaptive__acquire, struct mutex *, lock); - return 0; -+ } - - return __mutex_lock_killable_slowpath(lock); - } -@@ -1416,8 +1456,10 @@ int __sched mutex_trylock(struct mutex *lock) - #endif - - locked = __mutex_trylock(lock); -- if (locked) -+ if (locked) { - mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); -+ DTRACE_LOCKSTAT(adaptive__acquire, struct mutex *, lock); -+ } - - return locked; - } -diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c -index fe9ca92faa2a..6b1511be7805 100644 ---- a/kernel/locking/qrwlock.c -+++ b/kernel/locking/qrwlock.c -@@ -11,6 +11,7 @@ - #include <linux/cpumask.h> - #include <linux/percpu.h> - #include <linux/hardirq.h> -+#include <linux/sdt.h> - #include <linux/spinlock.h> - #include <asm/qrwlock.h> - -@@ -20,9 +21,13 @@ - */ - void queued_read_lock_slowpath(struct qrwlock *lock) - { -+ u64 spinstart = 0, spinend, spintime; -+ - /* - * Readers come here when they cannot get the lock without waiting - */ -+ if (DTRACE_LOCKSTAT_ENABLED(rw__spin)) -+ spinstart = dtrace_gethrtime_ns(); - if (unlikely(in_interrupt())) { - /* - * Readers in interrupt context will get the lock immediately -@@ -31,7 +36,7 @@ void queued_read_lock_slowpath(struct qrwlock *lock) - * without waiting in the queue. - */ - atomic_cond_read_acquire(&lock->cnts, !(VAL & _QW_LOCKED)); -- return; -+ goto done; - } - atomic_sub(_QR_BIAS, &lock->cnts); - -@@ -52,6 +57,13 @@ void queued_read_lock_slowpath(struct qrwlock *lock) - * Signal the next one in queue to become queue head - */ - arch_spin_unlock(&lock->wait_lock); -+done: -+ if (DTRACE_LOCKSTAT_ENABLED(rw__spin) && spinstart) { -+ spinend = dtrace_gethrtime_ns(); -+ spintime = spinend > spinstart ? spinend - spinstart : 0; -+ DTRACE_LOCKSTAT(rw__spin, rwlock_t *, lock, uint64_t, spintime, -+ int, DTRACE_LOCKSTAT_RW_READER); -+ } - } - EXPORT_SYMBOL(queued_read_lock_slowpath); - -@@ -61,7 +73,11 @@ EXPORT_SYMBOL(queued_read_lock_slowpath); - */ - void queued_write_lock_slowpath(struct qrwlock *lock) - { -+ u64 spinstart = 0, spinend, spintime; -+ - /* Put the writer into the wait queue */ -+ if (DTRACE_LOCKSTAT_ENABLED(rw__spin)) -+ spinstart = dtrace_gethrtime_ns(); - arch_spin_lock(&lock->wait_lock); - - /* Try to acquire the lock directly if no reader is present */ -@@ -79,5 +95,11 @@ void queued_write_lock_slowpath(struct qrwlock *lock) - _QW_LOCKED) != _QW_WAITING); - unlock: - arch_spin_unlock(&lock->wait_lock); -+ if (DTRACE_LOCKSTAT_ENABLED(rw__spin) && spinstart) { -+ spinend = dtrace_gethrtime_ns(); -+ spintime = spinend > spinstart ? spinend - spinstart : 0; -+ DTRACE_LOCKSTAT(rw__spin, rwlock_t *, lock, uint64_t, spintime, -+ int, DTRACE_LOCKSTAT_RW_WRITER); -+ } - } - EXPORT_SYMBOL(queued_write_lock_slowpath); -diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c -index b9515fcc9b29..71b393a65fb5 100644 ---- a/kernel/locking/qspinlock.c -+++ b/kernel/locking/qspinlock.c -@@ -20,6 +20,7 @@ - #include <linux/hardirq.h> - #include <linux/mutex.h> - #include <linux/prefetch.h> -+#include <linux/sdt.h> - #include <asm/byteorder.h> - #include <asm/qspinlock.h> - -@@ -315,16 +316,20 @@ static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock, - void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) - { - struct mcs_spinlock *prev, *next, *node; -+ u64 spinstart = 0, spinend, spintime; - u32 old, tail; - int idx; - - BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); - -+ if (DTRACE_LOCKSTAT_ENABLED(spin__spin)) -+ spinstart = dtrace_gethrtime_ns(); -+ - if (pv_enabled()) - goto pv_queue; - - if (virt_spin_lock(lock)) -- return; -+ goto out; - - /* - * Wait for in-progress pending->locked hand-overs with a bounded -@@ -388,7 +393,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) - */ - clear_pending_set_locked(lock); - lockevent_inc(lock_pending); -- return; -+ goto out; - - /* - * End of pending bit optimistic spinning and beginning of MCS -@@ -558,6 +563,17 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) - * release the node - */ - __this_cpu_dec(qnodes[0].mcs.count); -+ -+ /* -+ * Fire spin-spin probe to note time waiting for a lock. -+ */ -+out: -+ if (DTRACE_LOCKSTAT_ENABLED(spin__spin)) { -+ spinend = dtrace_gethrtime_ns(); -+ spintime = spinend > spinstart ? spinend - spinstart : 0; -+ DTRACE_LOCKSTAT(spin__spin, spinlock_t *, lock, -+ uint64_t, spintime); -+ } - } - EXPORT_SYMBOL(queued_spin_lock_slowpath); - -diff --git a/kernel/sched/core.c b/kernel/sched/core.c -index 4c2014cc77cd..0d09eae0c8ef 100644 ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -1291,6 +1291,9 @@ static inline void init_uclamp(void) { } - - static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags) - { -+ DTRACE_SCHED(enqueue, struct task_struct * : (lwpsinfo_t *, -+ psinfo_t *), p, -+ cpuinfo_t *, rq->dtrace_cpu_info); - if (!(flags & ENQUEUE_NOCLOCK)) - update_rq_clock(rq); - -@@ -1305,6 +1308,10 @@ static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags) - - static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags) - { -+ DTRACE_SCHED(dequeue, struct task_struct * : (lwpsinfo_t *, -+ psinfo_t *), p, -+ cpuinfo_t *, rq->dtrace_cpu_info, -+ int, 0); - if (!(flags & DEQUEUE_NOCLOCK)) - update_rq_clock(rq); - -@@ -2546,6 +2553,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) - goto unlock; - - trace_sched_waking(p); -+ DTRACE_SCHED(wakeup, struct task_struct * : (lwpsinfo_t *, -+ psinfo_t *), p); - - /* We're going to change ->state: */ - success = 1; -@@ -3148,6 +3157,8 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev, - sched_info_switch(rq, prev, next); - perf_event_task_sched_out(prev, next); - rseq_preempt(prev); -+ DTRACE_SCHED(off__cpu, struct task_struct * : (lwpsinfo_t *, -+ psinfo_t *), next); - fire_sched_out_preempt_notifiers(prev, next); - prepare_task(next); - prepare_arch_switch(next); -@@ -3221,6 +3232,7 @@ static struct rq *finish_task_switch(struct task_struct *prev) - finish_arch_post_lock_switch(); - kcov_finish_switch(current); - -+ DTRACE_SCHED(on__cpu); - fire_sched_in_preempt_notifiers(current); - /* - * When switching through a kernel thread, the loop in -@@ -3321,6 +3333,8 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev) - put_user(task_pid_vnr(current), current->set_child_tid); - - calculate_sigpending(); -+ DTRACE_PROC(start); -+ DTRACE_PROC(lwp__start); - } - - /* -@@ -4034,6 +4048,7 @@ static void __sched notrace __schedule(bool preempt) - - switch_count = &prev->nivcsw; - if (!preempt && prev->state) { -+ DTRACE_SCHED(sleep); - if (signal_pending_state(prev->state, prev)) { - prev->state = TASK_RUNNING; - } else { -@@ -4045,7 +4060,8 @@ static void __sched notrace __schedule(bool preempt) - } - } - switch_count = &prev->nvcsw; -- } -+ } else -+ DTRACE_SCHED(preempt); - - next = pick_next_task(rq, prev, &rf); - clear_tsk_need_resched(prev); -@@ -4080,6 +4096,7 @@ static void __sched notrace __schedule(bool preempt) - rq = context_switch(rq, prev, next, &rf); - } else { - rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP); -+ DTRACE_SCHED(remain__cpu); - rq_unlock_irq(rq, &rf); - } - -@@ -4538,6 +4555,9 @@ void set_user_nice(struct task_struct *p, long nice) - old_prio = p->prio; - p->prio = effective_prio(p); - -+ DTRACE_SCHED(change__pri, struct task_struct * : (lwpsinfo_t *, -+ psinfo_t *), p, -+ int, old_prio); - if (queued) - enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK); - if (running) -@@ -5604,6 +5624,9 @@ static void do_sched_yield(void) - rq_unlock(rq, &rf); - sched_preempt_enable_no_resched(); - -+ DTRACE_SCHED(surrender, -+ struct task_struct * : (lwpsinfo_t *, psinfo_t *), -+ current); - schedule(); - } - -@@ -5750,8 +5773,12 @@ int __sched yield_to(struct task_struct *p, bool preempt) - out_irq: - local_irq_restore(flags); - -- if (yielded > 0) -+ if (yielded > 0) { -+ DTRACE_SCHED(surrender, -+ struct task_struct * : (lwpsinfo_t *, psinfo_t *), -+ curr); - schedule(); -+ } - - return yielded; - } -diff --git a/kernel/signal.c b/kernel/signal.c -index 9abf962bbde4..d6f3f2b83c50 100644 ---- a/kernel/signal.c -+++ b/kernel/signal.c -@@ -49,6 +49,7 @@ - - #define CREATE_TRACE_POINTS - #include <trace/events/signal.h> -+#include <linux/sdt.h> - - #include <asm/param.h> - #include <linux/uaccess.h> -@@ -1078,8 +1079,12 @@ static int __send_signal(int sig, struct kernel_siginfo *info, struct task_struc - assert_spin_locked(&t->sighand->siglock); - - result = TRACE_SIGNAL_IGNORED; -- if (!prepare_signal(sig, t, force)) -+ if (!prepare_signal(sig, t, force)) { -+ DTRACE_PROC(signal__discard, -+ struct task_struct * : (lwpsinfo_t *, psinfo_t *), t, -+ int, sig); - goto ret; -+ } - - pending = (type != PIDTYPE_PID) ? &t->signal->shared_pending : &t->pending; - /* -@@ -1178,6 +1183,9 @@ static int __send_signal(int sig, struct kernel_siginfo *info, struct task_struc - } - - complete_signal(sig, t, type); -+ DTRACE_PROC(signal__send, -+ struct task_struct * : (lwpsinfo_t *, psinfo_t *), t, -+ int, sig); - ret: - trace_signal_generate(sig, info, t, type != PIDTYPE_PID, result); - return ret; -@@ -1882,6 +1890,9 @@ int send_sigqueue(struct sigqueue *q, struct pid *pid, enum pid_type type) - list_add_tail(&q->list, &pending->list); - sigaddset(&pending->signal, sig); - complete_signal(sig, t, type); -+ DTRACE_PROC(signal__send, -+ struct task_struct * : (lwpsinfo_t *, psinfo_t *), t, -+ int, sig); - result = TRACE_SIGNAL_DELIVERED; - out: - trace_signal_generate(sig, &q->info, t, type != PIDTYPE_PID, result); -@@ -2591,6 +2602,11 @@ bool get_signal(struct ksignal *ksig) - trace_signal_deliver(SIGKILL, SEND_SIG_NOINFO, - &sighand->action[SIGKILL - 1]); - recalc_sigpending(); -+ DTRACE_PROC(signal__handle, -+ int, signal->group_exit_code -+ ? signal->group_exit_code -+ : signr, -+ siginfo_t *, NULL, void (*)(void), NULL); - goto fatal; - } - -@@ -2643,6 +2659,15 @@ bool get_signal(struct ksignal *ksig) - - ka = &sighand->action[signr-1]; - -+ DTRACE_PROC(signal__handle, -+ int, signal->group_exit_code -+ ? signal->group_exit_code -+ : signr, -+ siginfo_t *, ksig->ka.sa.sa_handler != SIG_DFL -+ ? NULL -+ : &ksig->info, -+ void (*)(void), ksig->ka.sa.sa_handler); -+ - /* Trace actually delivered signals. */ - trace_signal_deliver(signr, &ksig->info, ka); - -@@ -3483,8 +3508,10 @@ static int do_sigtimedwait(const sigset_t *which, kernel_siginfo_t *info, - } - spin_unlock_irq(&tsk->sighand->siglock); - -- if (sig) -+ if (sig) { -+ DTRACE_PROC(signal__clear, int, sig); - return sig; -+ } - return ret ? -EINTR : -EAGAIN; - } - -diff --git a/kernel/time/timer.c b/kernel/time/timer.c -index 4820823515e9..fc611aea27d3 100644 ---- a/kernel/time/timer.c -+++ b/kernel/time/timer.c -@@ -43,6 +43,7 @@ - #include <linux/sched/debug.h> - #include <linux/slab.h> - #include <linux/compat.h> -+#include <linux/sdt.h> - - #include <linux/uaccess.h> - #include <asm/unistd.h> -@@ -1720,6 +1721,9 @@ void update_process_times(int user_tick) - { - struct task_struct *p = current; - -+ DTRACE_SCHED(tick, struct task_struct * : (lwpsinfo_t *, psinfo_t *), -+ p); -+ - /* Note: this timer irq context must be accounted for as well. */ - account_process_tick(p, user_tick); - run_local_timers(); -diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c -index aa438c6758a7..acaf9ccd1dd3 100644 ---- a/net/ipv4/ip_input.c -+++ b/net/ipv4/ip_input.c -@@ -141,6 +141,7 @@ - #include <linux/mroute.h> - #include <linux/netlink.h> - #include <net/dst_metadata.h> -+#include <linux/sdt.h> - - /* - * Process Router Attention IP option (RFC 2113) -@@ -239,16 +240,26 @@ static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_b - */ - int ip_local_deliver(struct sk_buff *skb) - { -+ struct iphdr *iph = ip_hdr(skb); -+ - /* - * Reassemble IP fragments. - */ - struct net *net = dev_net(skb->dev); - -- if (ip_is_fragment(ip_hdr(skb))) { -+ if (ip_is_fragment(iph)) { - if (ip_defrag(net, skb, IP_DEFRAG_LOCAL_DELIVER)) - return 0; - } - -+ DTRACE_IP(receive, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, skb->sk, -+ void_ip_t * : ipinfo_t *, iph, -+ struct net_device * : ifinfo_t *, skb->dev, -+ struct iphdr * : ipv4info_t *, iph, -+ struct ipv6hdr * : ipv6info_t *, NULL); -+ - return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN, - net, NULL, skb, skb->dev, NULL, - ip_local_deliver_finish); -@@ -257,7 +268,8 @@ int ip_local_deliver(struct sk_buff *skb) - static inline bool ip_rcv_options(struct sk_buff *skb, struct net_device *dev) - { - struct ip_options *opt; -- const struct iphdr *iph; -+ const struct iphdr *iph = NULL; -+ const char *dropreason; - - /* It looks as overkill, because not all - IP options require packet mangling. -@@ -267,6 +279,7 @@ static inline bool ip_rcv_options(struct sk_buff *skb, struct net_device *dev) - --ANK (980813) - */ - if (skb_cow(skb, skb_headroom(skb))) { -+ dropreason = "copy-on-write failed"; - __IP_INC_STATS(dev_net(dev), IPSTATS_MIB_INDISCARDS); - goto drop; - } -@@ -276,6 +289,7 @@ static inline bool ip_rcv_options(struct sk_buff *skb, struct net_device *dev) - opt->optlen = iph->ihl*4 - sizeof(struct iphdr); - - if (ip_options_compile(dev_net(dev), opt, skb)) { -+ dropreason = "invalid options"; - __IP_INC_STATS(dev_net(dev), IPSTATS_MIB_INHDRERRORS); - goto drop; - } -@@ -289,16 +303,28 @@ static inline bool ip_rcv_options(struct sk_buff *skb, struct net_device *dev) - net_info_ratelimited("source route option %pI4 -> %pI4\n", - &iph->saddr, - &iph->daddr); -+ dropreason = "invalid source route options"; - goto drop; - } - } - -- if (ip_options_rcv_srr(skb, dev)) -+ if (ip_options_rcv_srr(skb, dev)) { -+ dropreason = "invalid options"; - goto drop; -+ } - } - - return false; - drop: -+ DTRACE_IP(drop__in, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, skb->sk, -+ void_ip_t * : ipinfo_t *, iph, -+ struct net_device * : ifinfo_t *, skb->dev, -+ struct iphdr * : ipv4info_t *, iph, -+ struct ipv6hdr * : ipv6info_t *, NULL, -+ const char * : string, dropreason); -+ - return true; - } - -@@ -432,27 +458,35 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) - /* - * Main IP Receive routine. - */ --static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net) -+static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net, -+ struct net_device *dev -+ __attribute__((__unused__))) - { -- const struct iphdr *iph; -+ const struct iphdr *iph = NULL; - u32 len; -+ const char *dropreason = "header invalid"; - - /* When the interface is in promisc. mode, drop all the crap - * that it receives, do not try to analyse it. - */ -- if (skb->pkt_type == PACKET_OTHERHOST) -+ if (skb->pkt_type == PACKET_OTHERHOST) { -+ dropreason = "for other host"; - goto drop; -+ } - - __IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len); - - skb = skb_share_check(skb, GFP_ATOMIC); - if (!skb) { -+ dropreason = "could not clone shared buffer"; - __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS); -- goto out; -+ goto drop; - } - -- if (!pskb_may_pull(skb, sizeof(struct iphdr))) -+ if (!pskb_may_pull(skb, sizeof(struct iphdr))) { -+ dropreason = "could not pull skb"; - goto inhdr_error; -+ } - - iph = ip_hdr(skb); - -@@ -487,6 +521,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net) - - len = ntohs(iph->tot_len); - if (skb->len < len) { -+ dropreason = "packet too short"; - __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS); - goto drop; - } else if (len < (iph->ihl*4)) -@@ -497,6 +532,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net) - * Note this now means skb->len holds ntohs(iph->tot_len). - */ - if (pskb_trim_rcsum(skb, len)) { -+ dropreason = "could not trim buffer"; - __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS); - goto drop; - } -@@ -515,11 +551,19 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net) - - csum_error: - __IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS); -+ dropreason = "checksum error"; - inhdr_error: - __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); - drop: -+ DTRACE_IP(drop__in, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, skb ? skb->sk : NULL, -+ void_ip_t * : ipinfo_t *, iph, -+ struct net_device * : ifinfo_t *, dev, -+ struct iphdr * : ipv4info_t *, iph, -+ void * : ipv6info_t *, NULL, -+ const char * : string, dropreason); - kfree_skb(skb); --out: - return NULL; - } - -@@ -531,7 +575,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, - { - struct net *net = dev_net(dev); - -- skb = ip_rcv_core(skb, net); -+ skb = ip_rcv_core(skb, net, dev); - if (skb == NULL) - return NET_RX_DROP; - -@@ -622,7 +666,7 @@ void ip_list_rcv(struct list_head *head, struct packet_type *pt, - struct net *net = dev_net(dev); - - skb_list_del_init(skb); -- skb = ip_rcv_core(skb, net); -+ skb = ip_rcv_core(skb, net, dev); - if (skb == NULL) - continue; - -diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c -index d84819893db9..164127d36458 100644 ---- a/net/ipv4/ip_output.c -+++ b/net/ipv4/ip_output.c -@@ -81,6 +81,7 @@ - #include <linux/netfilter_bridge.h> - #include <linux/netlink.h> - #include <linux/tcp.h> -+#include <linux/sdt.h> - - static int - ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, -@@ -111,6 +112,14 @@ int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) - - skb->protocol = htons(ETH_P_IP); - -+ DTRACE_IP(send, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, sk, -+ void_ip_t * : ipinfo_t *, iph, -+ struct net_device * : ifinfo_t *, skb->dev, -+ struct iphdr * : ipv4info_t *, iph, -+ struct ipv6hdr * : ipv6info_t *, NULL); -+ - return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, - net, sk, skb, NULL, skb_dst(skb)->dev, - dst_output); -@@ -975,6 +984,7 @@ static int __ip_append_data(struct sock *sk, - unsigned int wmem_alloc_delta = 0; - bool paged, extra_uref = false; - u32 tskey = 0; -+ const char *dropreason; - - skb = skb_peek_tail(queue); - -@@ -993,9 +1003,13 @@ static int __ip_append_data(struct sock *sk, - maxnonfragsize = ip_sk_ignore_df(sk) ? 0xFFFF : mtu; - - if (cork->length + length > maxnonfragsize - fragheaderlen) { -+ struct iphdr *iph __attribute__((unused)) = ip_hdr(skb); -+ - ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, - mtu - (opt ? opt->optlen : 0)); -- return -EMSGSIZE; -+ dropreason = "packet too big"; -+ err = -EMSGSIZE; -+ goto error2; - } - - /* -@@ -1095,8 +1109,10 @@ static int __ip_append_data(struct sock *sk, - 2 * sk->sk_sndbuf) - skb = alloc_skb(alloclen + hh_len + 15, - sk->sk_allocation); -- if (unlikely(!skb)) -+ if (unlikely(!skb)) { -+ dropreason = "no buffers"; - err = -ENOBUFS; -+ } - } - if (!skb) - goto error; -@@ -1130,7 +1146,9 @@ static int __ip_append_data(struct sock *sk, - copy = datalen - transhdrlen - fraggap - pagedlen; - if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { - err = -EFAULT; -+ dropreason = "could not fragment packet"; - kfree_skb(skb); -+ skb = NULL; - goto error; - } - -@@ -1173,6 +1191,7 @@ static int __ip_append_data(struct sock *sk, - if (getfrag(from, skb_put(skb, copy), - offset, copy, off, skb) < 0) { - __skb_trim(skb, off); -+ dropreason = "could not fragment packet"; - err = -EFAULT; - goto error; - } -@@ -1180,14 +1199,18 @@ static int __ip_append_data(struct sock *sk, - int i = skb_shinfo(skb)->nr_frags; - - err = -ENOMEM; -- if (!sk_page_frag_refill(sk, pfrag)) -+ if (!sk_page_frag_refill(sk, pfrag)) { -+ dropreason = "no memory"; - goto error; -+ } - - if (!skb_can_coalesce(skb, i, pfrag->page, - pfrag->offset)) { - err = -EMSGSIZE; -- if (i == MAX_SKB_FRAGS) -+ if (i == MAX_SKB_FRAGS) { -+ dropreason = "too many fragments"; - goto error; -+ } - - __skb_fill_page_desc(skb, i, pfrag->page, - pfrag->offset, 0); -@@ -1197,8 +1220,10 @@ static int __ip_append_data(struct sock *sk, - copy = min_t(int, copy, pfrag->size - pfrag->offset); - if (getfrag(from, - page_address(pfrag->page) + pfrag->offset, -- offset, copy, skb->len, skb) < 0) -+ offset, copy, skb->len, skb) < 0) { -+ dropreason = "could not framgent packet"; - goto error_efault; -+ } - - pfrag->offset += copy; - skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); -@@ -1227,6 +1252,16 @@ static int __ip_append_data(struct sock *sk, - cork->length -= length; - IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); - refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); -+error2: -+ DTRACE_IP(drop__out, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, skb ? skb->sk : NULL, -+ void_ip_t * : ipinfo_t *, skb ? ip_hdr(skb) : NULL, -+ struct net_device * : ifinfo_t *, skb ? skb->dev : NULL, -+ struct iphdr * : ipv4info_t *, skb ? ip_hdr(skb) : NULL, -+ struct ipv6hdr * : ipv6info_t *, NULL, -+ const char * : string, dropreason); -+ - return err; - } - -@@ -1330,6 +1365,8 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, - int len; - int err; - unsigned int maxfraglen, fragheaderlen, fraggap, maxnonfragsize; -+ struct iphdr *iph; -+ const char *dropreason; - - if (inet->hdrincl) - return -EPERM; -@@ -1383,6 +1420,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, - alloclen = fragheaderlen + hh_len + fraggap + 15; - skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation); - if (unlikely(!skb)) { -+ dropreason = "no buffers"; - err = -ENOBUFS; - goto error; - } -@@ -1422,6 +1460,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, - len = size; - - if (skb_append_pagefrags(skb, page, offset, len)) { -+ dropreason = "packet too big"; - err = -EMSGSIZE; - goto error; - } -@@ -1444,6 +1483,16 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, - error: - cork->length -= size; - IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); -+ iph = skb ? ip_hdr(skb) : NULL; -+ DTRACE_IP(drop__out, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, skb ? skb->sk : NULL, -+ void_ip_t * : ipinfo_t *, iph, -+ struct net_device * : ifinfo_t *, skb ? skb->dev : NULL, -+ struct iphdr * : ipv4info_t *, iph, -+ struct ipv6hdr * : ipv6info_t *, NULL, -+ const char * : string, dropreason); -+ - return err; - } - -@@ -1561,8 +1610,18 @@ int ip_send_skb(struct net *net, struct sk_buff *skb) - if (err) { - if (err > 0) - err = net_xmit_errno(err); -- if (err) -+ if (err) { - IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); -+ /* skb may have been freed */ -+ DTRACE_IP(drop__out, -+ struct sk_buff * : pktinfo_t *, NULL, -+ struct sock * : csinfo_t *, NULL, -+ void_ip_t * : ipinfo_t *, NULL, -+ struct net_device * : ifinfo_t *, NULL, -+ struct iphdr * : ipv4info_t *, NULL, -+ struct ipv6hdr * : ipv6info_t *, NULL, -+ char * : string, "packet too short"); -+ } - } - - return err; -diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c -index 3183413ebc6c..b9e851337468 100644 ---- a/net/ipv4/raw.c -+++ b/net/ipv4/raw.c -@@ -75,6 +75,7 @@ - #include <linux/netfilter_ipv4.h> - #include <linux/compat.h> - #include <linux/uio.h> -+#include <linux/sdt.h> - - struct raw_frag_vec { - struct msghdr *msg; -@@ -348,19 +349,25 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, - struct inet_sock *inet = inet_sk(sk); - struct net *net = sock_net(sk); - struct iphdr *iph; -- struct sk_buff *skb; -+ struct sk_buff *skb = NULL; - unsigned int iphlen; - int err; - struct rtable *rt = *rtp; - int hlen, tlen; -+ const char *dropreason; - - if (length > rt->dst.dev->mtu) { - ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, - rt->dst.dev->mtu); -- return -EMSGSIZE; -+ dropreason = "packet too big"; -+ err = -EMSGSIZE; -+ goto trace_drop; -+ } -+ if (length < sizeof(struct iphdr)) { -+ dropreason = "packet too short"; -+ err = -EINVAL; -+ goto trace_drop; - } -- if (length < sizeof(struct iphdr)) -- return -EINVAL; - - if (flags&MSG_PROBE) - goto out; -@@ -370,8 +377,10 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, - skb = sock_alloc_send_skb(sk, - length + hlen + tlen + 15, - flags & MSG_DONTWAIT, &err); -- if (!skb) -+ if (!skb) { -+ dropreason = "out of memory"; - goto error; -+ } - skb_reserve(skb, hlen); - - skb->priority = sk->sk_priority; -@@ -393,8 +402,10 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, - - skb->transport_header = skb->network_header; - err = -EFAULT; -- if (memcpy_from_msg(iph, msg, length)) -+ if (memcpy_from_msg(iph, msg, length)) { -+ dropreason = "could not copy msg"; - goto error_free; -+ } - - iphlen = iph->ihl * 4; - -@@ -406,8 +417,10 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, - * in, reject the frame as invalid - */ - err = -EINVAL; -- if (iphlen > length) -+ if (iphlen > length) { -+ dropreason = "IP header too big"; - goto error_free; -+ } - - if (iphlen >= sizeof(*iph)) { - if (!iph->saddr) -@@ -425,20 +438,40 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, - skb_transport_header(skb))->type); - } - -+ DTRACE_IP(send, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, sk, -+ void_ip_t * : ipinfo_t *, iph, -+ struct net_device * : ifinfo_t *, skb->dev, -+ struct iphdr * : ipv4info_t *, iph, -+ struct ipv6hdr * : ipv6info_t *, NULL); -+ - err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, - net, sk, skb, NULL, rt->dst.dev, - dst_output); - if (err > 0) - err = net_xmit_errno(err); -- if (err) -+ if (err) { -+ dropreason = "device dropping packets of this priority"; - goto error; -+ } - out: - return 0; - - error_free: - kfree_skb(skb); -+ skb = NULL; - error: - IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); -+trace_drop: -+ DTRACE_IP(drop__out, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, skb ? skb->sk : NULL, -+ void_ip_t * : ipinfo_t *, skb ? ip_hdr(skb) : NULL, -+ struct net_device * : ifinfo_t *, skb ? skb->dev : NULL, -+ struct iphdr * : ipv4info_t *, skb ? ip_hdr(skb) : NULL, -+ struct ipv6hdr * : ipv6info_t *, NULL, -+ const char * : string, dropreason); - if (err == -ENOBUFS && !inet->recverr) - err = 0; - return err; -diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c -index 06aad5e09459..054ad38e6283 100644 ---- a/net/ipv4/tcp.c -+++ b/net/ipv4/tcp.c -@@ -267,6 +267,7 @@ - #include <linux/slab.h> - #include <linux/errqueue.h> - #include <linux/static_key.h> -+#include <linux/sdt.h> - - #include <net/icmp.h> - #include <net/inet_common.h> -@@ -2220,6 +2221,19 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, - } - EXPORT_SYMBOL(tcp_recvmsg); - -+/* We wish to avoid instrumenting TCP state transitions to SYN_SENT as we trace -+ * those state changes later once the destination address is committed to the -+ * sk. We also need to deal with the fact that separate timewait sockets are -+ * used to handle the TIME_WAIT state. We do not want to trace direct -+ * transitions from CLOSING/FIN_WAIT2 -> CLOSE since they do not represent -+ * connection close, rather a transition to using the timewait socket. -+ * Accordingly skip instrumentation of transitions from CLOSING/FIN_WAIT2 to -+ * CLOSE. -+ */ -+#define REAL_STATE_CHANGE(old, new) \ -+ (old != new && new != TCP_SYN_SENT && \ -+ ((old != TCP_CLOSING && old != TCP_FIN_WAIT2) || new != TCP_CLOSE)) -+ - void tcp_set_state(struct sock *sk, int state) - { - int oldstate = sk->sk_state; -@@ -2272,6 +2286,18 @@ void tcp_set_state(struct sock *sk, int state) - * socket sitting in hash tables. - */ - inet_sk_state_store(sk, state); -+ -+ if (DTRACE_TCP_ENABLED(state__change) && -+ REAL_STATE_CHANGE(oldstate, state)) -+ DTRACE_TCP_NOCHECK(state__change, -+ struct sk_buff * : pktinfo_t *, NULL, -+ struct sock * : csinfo_t *, sk, -+ __dtrace_tcp_void_ip_t * : ipinfo_t *, NULL, -+ struct tcp_sock * : tcpsinfo_t *, tcp_sk(sk), -+ struct tcphdr * : tcpinfo_t *, NULL, -+ int : tcplsinfo_t *, oldstate, -+ int : int, state, -+ int : int, DTRACE_NET_PROBE_OUTBOUND); - } - EXPORT_SYMBOL_GPL(tcp_set_state); - -diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c -index e17d396102ce..7557a39692cc 100644 ---- a/net/ipv4/tcp_input.c -+++ b/net/ipv4/tcp_input.c -@@ -80,6 +80,7 @@ - #include <linux/jump_label_ratelimit.h> - #include <net/busy_poll.h> - #include <net/mptcp.h> -+#include <linux/sdt.h> - - int sysctl_tcp_max_orphans __read_mostly = NR_FILE; - -@@ -5771,6 +5772,14 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) - struct tcp_sock *tp = tcp_sk(sk); - struct inet_connection_sock *icsk = inet_csk(sk); - -+ DTRACE_TCP(connect__established, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, sk, -+ __dtrace_tcp_void_ip_t * : ipinfo_t *, ip_hdr(skb), -+ struct tcp_sock * : tcpsinfo_t *, tp, -+ struct tcphdr * : tcpinfo_t *, tcp_hdr(skb), -+ int : tcplsinfo_t *, TCP_ESTABLISHED, -+ int, TCP_ESTABLISHED, int, DTRACE_NET_PROBE_INBOUND); - tcp_set_state(sk, TCP_ESTABLISHED); - icsk->icsk_ack.lrcvtime = tcp_jiffies32; - -@@ -5935,6 +5944,17 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, - */ - - if (th->rst) { -+ DTRACE_TCP(connect__refused, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, sk, -+ __dtrace_tcp_void_ip_t * : ipinfo_t *, -+ ip_hdr(skb), -+ struct tcp_sock * : tcpsinfo_t *, tp, -+ struct tcphdr * : tcpinfo_t *, th, -+ int : tcplsinfo_t *, -+ sk ? sk->sk_state : TCP_CLOSE, -+ int, sk ? sk->sk_state : TCP_CLOSE, -+ int, DTRACE_NET_PROBE_INBOUND); - tcp_reset(sk); - goto discard; - } -@@ -6259,6 +6279,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) - WRITE_ONCE(tp->copied_seq, tp->rcv_nxt); - } - smp_mb(); -+ -+ DTRACE_TCP(accept__established, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, sk, -+ __dtrace_tcp_void_ip_t * : ipinfo_t *, ip_hdr(skb), -+ struct tcp_sock * : tcpsinfo_t *, tp, -+ struct tcphdr * : tcpinfo_t *, tcp_hdr(skb), -+ int : tcplsinfo_t *, TCP_ESTABLISHED, -+ int, TCP_ESTABLISHED, -+ int, DTRACE_NET_PROBE_INBOUND); - tcp_set_state(sk, TCP_ESTABLISHED); - sk->sk_state_change(sk); - -@@ -6720,6 +6750,19 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, - af_ops->send_synack(sk, dst, &fl, req, &foc, - !want_cookie ? TCP_SYNACK_NORMAL : - TCP_SYNACK_COOKIE); -+ /* Do not pass in tcp sock as ports/addresses are not yet -+ * populated - instead translators will fill them in from -+ * skb data. -+ */ -+ DTRACE_TCP(state__change, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, sk, -+ __dtrace_tcp_void_ip_t * : ipinfo_t *, ip_hdr(skb), -+ struct tcp_sock * : tcpsinfo_t *, NULL, -+ struct tcphdr * : tcpinfo_t *, tcp_hdr(skb), -+ int : tcplsinfo_t *, TCP_LISTEN, -+ int, TCP_SYN_RECV, int, DTRACE_NET_PROBE_INBOUND); -+ - if (want_cookie) { - reqsk_free(req); - return 0; -diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c -index df1166b76126..778465091297 100644 ---- a/net/ipv4/tcp_ipv4.c -+++ b/net/ipv4/tcp_ipv4.c -@@ -79,6 +79,7 @@ - - #include <crypto/hash.h> - #include <linux/scatterlist.h> -+#include <linux/sdt.h> - - #include <trace/events/tcp.h> - -@@ -633,6 +634,21 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) - } - EXPORT_SYMBOL(tcp_v4_send_check); - -+/* Since we want to trace send events in TCP prior to pushing the segment to -+ * IP - where the IP header is added - we need to construct an argument -+ * containing relevant IP info so that TCP probe consumers can utilize it. -+ */ -+static inline void dtrace_tcp_build_iphdr(__be32 saddr, __be32 daddr, -+ struct iphdr *iph) -+{ -+ iph->version = 4; -+ iph->ihl = 5; -+ iph->tot_len = 5; -+ iph->protocol = IPPROTO_TCP; -+ iph->saddr = saddr; -+ iph->daddr = daddr; -+} -+ - /* - * This routine will send an RST to the other tcp. - * -@@ -791,6 +807,39 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) - inet_twsk(sk)->tw_priority : sk->sk_priority; - transmit_time = tcp_transmit_time(sk); - } -+ -+ if (DTRACE_TCP_ENABLED(send) || -+ DTRACE_TCP_ENABLED(accept__refused)) { -+ struct iphdr iph; -+ -+ dtrace_tcp_build_iphdr(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr, -+ &iph); -+ -+ DTRACE_TCP_NOCHECK(send, -+ struct sk_buff * : pktinfo_t *, NULL, -+ struct sock * : csinfo_t *, NULL, -+ __dtrace_tcp_void_ip_t * : ipinfo_t *, &iph, -+ struct tcp_sock * : tcpsinfo_t *, NULL, -+ struct tcphdr * : tcpinfo_t *, &rep.th, -+ int : tcplsinfo_t *, TCP_CLOSE, -+ int : int, TCP_CLOSE, -+ int : int, DTRACE_NET_PROBE_OUTBOUND); -+ if (th->syn && rep.th.seq == 0) -+ DTRACE_TCP_NOCHECK(accept__refused, -+ struct sk_buff * : pktinfo_t *, NULL, -+ struct sock * : csinfo_t *, NULL, -+ __dtrace_tcp_void_ip_t * : -+ ipinfo_t *, &iph, -+ struct tcp_sock * : tcpsinfo_t *, -+ NULL, -+ struct tcphdr * : tcpinfo_t *, -+ &rep.th, -+ int : tcplsinfo_t *, TCP_CLOSE, -+ int : int, TCP_CLOSE, -+ int : int, -+ DTRACE_NET_PROBE_OUTBOUND); -+ } -+ - ip_send_unicast_reply(ctl_sk, - skb, &TCP_SKB_CB(skb)->header.h4.opt, - ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, -@@ -887,6 +936,24 @@ static void tcp_v4_send_ack(const struct sock *sk, - ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ? - inet_twsk(sk)->tw_priority : sk->sk_priority; - transmit_time = tcp_transmit_time(sk); -+ -+ if (DTRACE_TCP_ENABLED(send)) { -+ struct iphdr iph; -+ -+ dtrace_tcp_build_iphdr(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr, -+ &iph); -+ -+ DTRACE_TCP_NOCHECK(send, -+ struct sk_buff * : pktinfo_t *, NULL, -+ struct sock * : csinfo_t *, NULL, -+ __dtrace_tcp_void_ip_t * : ipinfo_t *, &iph, -+ struct tcp_sock * : tcpsinfo_t *, NULL, -+ struct tcphdr * : tcpinfo_t *, &rep, -+ int : tcplsinfo_t *, TCP_CLOSE, -+ int : int, TCP_CLOSE, -+ int : int, DTRACE_NET_PROBE_OUTBOUND); -+ } -+ - ip_send_unicast_reply(ctl_sk, - skb, &TCP_SKB_CB(skb)->header.h4.opt, - ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, -@@ -972,6 +1039,30 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, - if (skb) { - __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); - -+ if (DTRACE_TCP_ENABLED(send)) { -+ struct iphdr iph; -+ -+ dtrace_tcp_build_iphdr(ireq->ir_loc_addr, -+ ireq->ir_rmt_addr, &iph); -+ -+ /* Do not supply tcp sk - addresses/ports are not -+ * committed yet - instead translators will fill them -+ * in from skb/IP info. -+ */ -+ DTRACE_TCP_NOCHECK(send, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, sk, -+ __dtrace_tcp_void_ip_t * : -+ ipinfo_t *, &iph, -+ struct tcp_sock * : tcpsinfo_t *, -+ NULL, -+ struct tcphdr * : tcpinfo_t *, -+ tcp_hdr(skb), -+ int : tcplsinfo_t *, TCP_LISTEN, -+ int, TCP_LISTEN, -+ int, DTRACE_NET_PROBE_OUTBOUND); -+ } -+ - rcu_read_lock(); - err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, - ireq->ir_rmt_addr, -@@ -1869,7 +1960,7 @@ int tcp_v4_rcv(struct sk_buff *skb) - const struct iphdr *iph; - const struct tcphdr *th; - bool refcounted; -- struct sock *sk; -+ struct sock *sk = NULL; - int ret; - - if (skb->pkt_type != PACKET_HOST) -@@ -1904,6 +1995,15 @@ int tcp_v4_rcv(struct sk_buff *skb) - if (!sk) - goto no_tcp_socket; - -+ DTRACE_TCP(receive, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, sk, -+ __dtrace_tcp_void_ip_t * : ipinfo_t *, ip_hdr(skb), -+ struct tcp_sock * : tcpsinfo_t *, tcp_sk(sk), -+ struct tcphdr * : tcpinfo_t *, tcp_hdr(skb), -+ int : tcplsinfo_t *, sk ? sk->sk_state : TCP_CLOSE, -+ int, sk ? sk->sk_state : TCP_CLOSE, -+ int, DTRACE_NET_PROBE_INBOUND); - process: - if (sk->sk_state == TCP_TIME_WAIT) - goto do_time_wait; -@@ -2031,6 +2131,18 @@ int tcp_v4_rcv(struct sk_buff *skb) - - discard_it: - /* Discard frame. */ -+ if (DTRACE_TCP_ENABLED(receive) && skb->pkt_type == PACKET_HOST) -+ DTRACE_TCP_NOCHECK(receive, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, sk, -+ __dtrace_tcp_void_ip_t * : ipinfo_t *, -+ ip_hdr(skb), -+ struct tcp_sock * : tcpsinfo_t *, tcp_sk(sk), -+ struct tcphdr * : tcpinfo_t *, tcp_hdr(skb), -+ int : tcplsinfo_t *, -+ sk ? sk->sk_state : TCP_CLOSE, -+ int, sk ? sk->sk_state : TCP_CLOSE, -+ int, DTRACE_NET_PROBE_INBOUND); - kfree_skb(skb); - return 0; - -diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c -index ad3b56d9fa71..e678b548010c 100644 ---- a/net/ipv4/tcp_minisocks.c -+++ b/net/ipv4/tcp_minisocks.c -@@ -24,6 +24,7 @@ - #include <linux/slab.h> - #include <linux/sysctl.h> - #include <linux/workqueue.h> -+#include <linux/sdt.h> - #include <linux/static_key.h> - #include <net/tcp.h> - #include <net/inet_common.h> -@@ -328,6 +329,20 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) - */ - inet_twsk_hashdance(tw, sk, &tcp_hashinfo); - local_bh_enable(); -+ -+ if (DTRACE_TCP_ENABLED(state__change) && -+ state != sk->sk_state) -+ DTRACE_TCP_NOCHECK(state__change, -+ struct sk_buff * : pktinfo_t *, NULL, -+ struct sock * : csinfo_t *, sk, -+ __dtrace_tcp_void_ip_t * : -+ ipinfo_t *, NULL, -+ struct tcp_sock * : tcpsinfo_t *, -+ tcp_sk(sk), -+ struct tcphdr * : tcpinfo_t *, NULL, -+ int : tcplsinfo_t *, sk->sk_state, -+ int, state, -+ int, DTRACE_NET_PROBE_OUTBOUND); - } else { - /* Sorry, if we're out of memory, just CLOSE this - * socket up. We've got bigger problems than -diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c -index 2f45cde168c4..f8701d061ae9 100644 ---- a/net/ipv4/tcp_output.c -+++ b/net/ipv4/tcp_output.c -@@ -44,6 +44,7 @@ - #include <linux/gfp.h> - #include <linux/module.h> - #include <linux/static_key.h> -+#include <linux/sdt.h> - - #include <trace/events/tcp.h> - -@@ -1218,6 +1219,27 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, - tp->bytes_sent += skb->len - tcp_header_size; - } - -+ DTRACE_TCP(send, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, sk, -+ __dtrace_tcp_void_ip_t * : ipinfo_t *, NULL, -+ struct tcp_sock * : tcpsinfo_t *, tp, -+ struct tcphdr * : tcpinfo_t *, tcp_hdr(skb), -+ int : tcplsinfo_t *, sk->sk_state, int, sk->sk_state, -+ int, DTRACE_NET_PROBE_OUTBOUND); -+ if (DTRACE_TCP_ENABLED(connect__request) && th->syn && -+ th->ack_seq == 0) -+ DTRACE_TCP_NOCHECK(connect__request, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, sk, -+ __dtrace_tcp_void_ip_t * : ipinfo_t *, -+ ip_hdr(skb), -+ struct tcp_sock * : tcpsinfo_t *, tp, -+ struct tcphdr * : tcpinfo_t *, th, -+ int : tcplsinfo_t *, sk->sk_state, -+ int, sk->sk_state, -+ int, DTRACE_NET_PROBE_OUTBOUND); -+ - if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) - TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, - tcp_skb_pcount(skb)); -@@ -3658,6 +3680,13 @@ int tcp_connect(struct sock *sk) - tp->retrans_stamp = tcp_time_stamp(tp); - tcp_connect_queue_skb(sk, buff); - tcp_ecn_send_syn(sk, buff); -+ DTRACE_TCP(state__change, struct sk_buff * : pktinfo_t *, NULL, -+ struct sock * : csinfo_t *, sk, -+ __dtrace_tcp_void_ip_t * : ipinfo_t *, ip_hdr(buff), -+ struct tcp_sock * : tcpsinfo_t *, tp, -+ struct tcphdr * : tcpinfo_t *, tcp_hdr(buff), -+ int : tcplsinfo_t *, TCP_CLOSE, -+ int, sk->sk_state, int, DTRACE_NET_PROBE_OUTBOUND); - tcp_rbtree_insert(&sk->tcp_rtx_queue, buff); - - /* Send off SYN; include data in Fast Open. */ -diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c -index 08a41f1e1cd2..043c641d1b8c 100644 ---- a/net/ipv4/udp.c -+++ b/net/ipv4/udp.c -@@ -106,6 +106,7 @@ - #include <net/xfrm.h> - #include <trace/events/udp.h> - #include <linux/static_key.h> -+#include <linux/sdt.h> - #include <trace/events/skb.h> - #include <net/busy_poll.h> - #include "udp_impl.h" -@@ -888,6 +889,13 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, - uh->check = CSUM_MANGLED_0; - - send: -+ DTRACE_UDP(send, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, sk, -+ void_ip_t * : ipinfo_t *, ip_hdr(skb), -+ struct udp_sock * : udpsinfo_t *, udp_sk(sk), -+ struct udphdr * : udpinfo_t *, uh); -+ - err = ip_send_skb(sock_net(sk), skb); - if (err) { - if (err == -ENOBUFS && !inet->recverr) { -@@ -1786,9 +1794,16 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, - return err; - } - -- if (!peeking) -+ if (!peeking) { -+ DTRACE_UDP(receive, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, sk, -+ void_ip_t * : ipinfo_t *, ip_hdr(skb), -+ struct udp_sock * : udpsinfo_t *, udp_sk(sk), -+ struct udphdr * : udpinfo_t *, udp_hdr(skb)); - UDP_INC_STATS(sock_net(sk), - UDP_MIB_INDATAGRAMS, is_udplite); -+ } - - sock_recv_ts_and_drops(msg, sk, skb); - -@@ -2033,6 +2048,15 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) - - ret = encap_rcv(sk, skb); - if (ret <= 0) { -+ DTRACE_UDP(receive, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, sk, -+ void_ip_t * : ipinfo_t *, -+ ip_hdr(skb), -+ struct udp_sock * : udpsinfo_t *, -+ udp_sk(sk), -+ struct udphdr * : udpinfo_t *, -+ udp_hdr(skb)); - __UDP_INC_STATS(sock_net(sk), - UDP_MIB_INDATAGRAMS, - is_udplite); -diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c -index 7b089d0ac8cd..561cb89dafd3 100644 ---- a/net/ipv6/ip6_input.c -+++ b/net/ipv6/ip6_input.c -@@ -43,6 +43,7 @@ - #include <net/xfrm.h> - #include <net/inet_ecn.h> - #include <net/dst_metadata.h> -+#include <linux/sdt.h> - - INDIRECT_CALLABLE_DECLARE(void udp_v6_early_demux(struct sk_buff *)); - INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *)); -@@ -145,13 +146,14 @@ static void ip6_list_rcv_finish(struct net *net, struct sock *sk, - static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, - struct net *net) - { -- const struct ipv6hdr *hdr; -+ const struct ipv6hdr *hdr = NULL; - u32 pkt_len; - struct inet6_dev *idev; -+ const char *dropreason; - - if (skb->pkt_type == PACKET_OTHERHOST) { -- kfree_skb(skb); -- return NULL; -+ dropreason = "for other host"; -+ goto trace_drop; - } - - rcu_read_lock(); -@@ -163,6 +165,7 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, - if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL || - !idev || unlikely(idev->cnf.disable_ipv6)) { - __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); -+ dropreason = "could not clone shared buffer"; - goto drop; - } - -@@ -181,13 +184,18 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, - */ - IP6CB(skb)->iif = skb_valid_dst(skb) ? ip6_dst_idev(skb_dst(skb))->dev->ifindex : dev->ifindex; - -- if (unlikely(!pskb_may_pull(skb, sizeof(*hdr)))) -+ if (unlikely(!pskb_may_pull(skb, sizeof(*hdr)))) { -+ hdr = ipv6_hdr(skb); -+ dropreason = "could not pull skb"; - goto err; -+ } - - hdr = ipv6_hdr(skb); - -- if (hdr->version != 6) -+ if (hdr->version != 6) { -+ dropreason = "header invalid"; - goto err; -+ } - - __IP6_ADD_STATS(net, idev, - IPSTATS_MIB_NOECTPKTS + -@@ -203,8 +211,10 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, - if ((ipv6_addr_loopback(&hdr->saddr) || - ipv6_addr_loopback(&hdr->daddr)) && - !(dev->flags & IFF_LOOPBACK) && -- !netif_is_l3_master(dev)) -+ !netif_is_l3_master(dev)) { -+ dropreason = "loopback destination received on interface"; - goto err; -+ } - - /* RFC4291 Errata ID: 3480 - * Interface-Local scope spans only a single interface on a -@@ -215,8 +225,10 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, - if (!(skb->pkt_type == PACKET_LOOPBACK || - dev->flags & IFF_LOOPBACK) && - ipv6_addr_is_multicast(&hdr->daddr) && -- IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 1) -+ IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 1) { -+ dropreason = "interface-local scope received from other node"; - goto err; -+ } - - /* If enabled, drop unicast packets that were encapsulated in link-layer - * multicast or broadcast to protected against the so-called "hole-196" -@@ -225,8 +237,10 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, - if (!ipv6_addr_is_multicast(&hdr->daddr) && - (skb->pkt_type == PACKET_BROADCAST || - skb->pkt_type == PACKET_MULTICAST) && -- idev->cnf.drop_unicast_in_l2_multicast) -+ idev->cnf.drop_unicast_in_l2_multicast) { -+ dropreason = "unicast packet encapsulated in multi/broadcast"; - goto err; -+ } - - /* RFC4291 2.7 - * Nodes must not originate a packet to a multicast address whose scope -@@ -234,16 +248,21 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, - * must be silently dropped. - */ - if (ipv6_addr_is_multicast(&hdr->daddr) && -- IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 0) -+ IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 0) { -+ dropreason = -+ "packet to multicast address with reserved scope 0"; - goto err; -+ } - - /* - * RFC4291 2.7 - * Multicast addresses must not be used as source addresses in IPv6 - * packets or appear in any Routing header. - */ -- if (ipv6_addr_is_multicast(&hdr->saddr)) -+ if (ipv6_addr_is_multicast(&hdr->saddr)) { -+ dropreason = "multicast source address in IPv6 packet"; - goto err; -+ } - - /* While RFC4291 is not explicit about v4mapped addresses - * in IPv6 headers, it seems clear linux dual-stack -@@ -253,7 +272,10 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, - * https://tools.ietf.org/html/draft-itojun-v6ops-v4mapped-harmful-02 - */ - if (ipv6_addr_v4mapped(&hdr->saddr)) -+ { -+ dropreason = "v4-mapped address in IPv6 packet"; - goto err; -+ } - - skb->transport_header = skb->network_header + sizeof(*hdr); - IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); -@@ -265,10 +287,12 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, - if (pkt_len + sizeof(struct ipv6hdr) > skb->len) { - __IP6_INC_STATS(net, - idev, IPSTATS_MIB_INTRUNCATEDPKTS); -+ dropreason = "truncated packet"; - goto drop; - } - if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) { - __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); -+ dropreason = "could not trim buffer"; - goto drop; - } - hdr = ipv6_hdr(skb); -@@ -276,9 +300,10 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, - - if (hdr->nexthdr == NEXTHDR_HOP) { - if (ipv6_parse_hopopts(skb) < 0) { -- __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); -- rcu_read_unlock(); -- return NULL; -+ dropreason = "could not parse hop opts"; -+ /* do not free skb */ -+ skb = NULL; -+ goto err; - } - } - -@@ -292,6 +317,15 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, - __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); - drop: - rcu_read_unlock(); -+trace_drop: -+ DTRACE_IP(drop__in, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, skb ? skb->sk : NULL, -+ void_ip_t * : ipinfo_t *, hdr, -+ struct net_device * : ifinfo_t *, skb ? skb->dev : NULL, -+ struct iphdr * : ipv4info_t *, NULL, -+ struct ipv6hdr * : ipv6info_t *, hdr, -+ const char * : string, dropreason); - kfree_skb(skb); - return NULL; - } -@@ -364,6 +398,8 @@ void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr, - struct inet6_dev *idev; - unsigned int nhoff; - bool raw; -+ const struct ipv6hdr *hdr; -+ const char *dropreason; - - /* - * Parse extension headers -@@ -373,8 +409,10 @@ void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr, - idev = ip6_dst_idev(skb_dst(skb)); - nhoff = IP6CB(skb)->nhoff; - if (!have_final) { -- if (!pskb_pull(skb, skb_transport_offset(skb))) -+ if (!pskb_pull(skb, skb_transport_offset(skb))) { -+ dropreason = "could not pull skb"; - goto discard; -+ } - nexthdr = skb_network_header(skb)[nhoff]; - } - -@@ -391,10 +429,10 @@ void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr, - * ones. This allows foo in UDP encapsulation - * to work. - */ -+ dropreason = "non-final protocol"; - goto discard; - } - } else if (ipprot->flags & INET6_PROTO_FINAL) { -- const struct ipv6hdr *hdr; - int sdif = inet6_sdif(skb); - struct net_device *dev; - -@@ -413,8 +451,10 @@ void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr, - /* skb->dev passed may be master dev for vrfs. */ - if (sdif) { - dev = dev_get_by_index_rcu(net, sdif); -- if (!dev) -+ if (!dev) { -+ dropreason = "device disappeared"; - goto discard; -+ } - } else { - dev = skb->dev; - } -@@ -422,12 +462,16 @@ void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr, - if (ipv6_addr_is_multicast(&hdr->daddr) && - !ipv6_chk_mcast_addr(dev, &hdr->daddr, - &hdr->saddr) && -- !ipv6_is_mld(skb, nexthdr, skb_network_header_len(skb))) -+ !ipv6_is_mld(skb, nexthdr, skb_network_header_len(skb))) { -+ dropreason = "destination is multicast"; - goto discard; -+ } - } - if (!(ipprot->flags & INET6_PROTO_NOPOLICY) && -- !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) -+ !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { -+ dropreason = "policy failure"; - goto discard; -+ } - - ret = INDIRECT_CALL_2(ipprot->handler, tcp_v6_rcv, udpv6_rcv, - skb); -@@ -453,6 +497,8 @@ void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr, - IPSTATS_MIB_INUNKNOWNPROTOS); - icmpv6_send(skb, ICMPV6_PARAMPROB, - ICMPV6_UNK_NEXTHDR, nhoff); -+ dropreason = "policy failure"; -+ goto trace_drop; - } - kfree_skb(skb); - } else { -@@ -464,6 +510,17 @@ void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr, - - discard: - __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); -+trace_drop: -+ hdr = ipv6_hdr(skb); -+ DTRACE_IP(drop__in, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, skb->sk, -+ void_ip_t * : ipinfo_t *, hdr, -+ struct net_device * : ifinfo_t *, skb->dev, -+ struct iphdr * : ipv4info_t *, NULL, -+ struct ipv6hdr * : ipv6info_t *, hdr, -+ const char * : string, dropreason); -+ rcu_read_unlock(); - kfree_skb(skb); - } - -@@ -479,6 +536,16 @@ static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *sk - - int ip6_input(struct sk_buff *skb) - { -+ struct ipv6hdr *hdr = ipv6_hdr(skb); -+ -+ DTRACE_IP(receive, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, skb->sk, -+ void_ip_t * : ipinfo_t *, hdr, -+ struct net_device * : ifinfo_t *, skb->dev, -+ struct iphdr * : ipv4info_t *, NULL, -+ struct ipv6hdr * : ipv6info_t *, hdr); -+ - return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, - dev_net(skb->dev), NULL, skb, skb->dev, NULL, - ip6_input_finish); -diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c -index 087304427bbb..aa6903a2a39b 100644 ---- a/net/ipv6/ip6_output.c -+++ b/net/ipv6/ip6_output.c -@@ -54,6 +54,7 @@ - #include <linux/mroute6.h> - #include <net/l3mdev.h> - #include <net/lwtunnel.h> -+#include <linux/sdt.h> - - static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) - { -@@ -61,7 +62,8 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * - struct net_device *dev = dst->dev; - const struct in6_addr *nexthop; - struct neighbour *neigh; -- int ret; -+ const char *dropreason; -+ int ret = 0; - - if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { - struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); -@@ -82,10 +84,11 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * - dev_loopback_xmit); - - if (ipv6_hdr(skb)->hop_limit == 0) { -+ dropreason = "hoplimit exceeded"; -+ - IP6_INC_STATS(net, idev, - IPSTATS_MIB_OUTDISCARDS); -- kfree_skb(skb); -- return 0; -+ goto drop; - } - } - -@@ -94,8 +97,8 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * - if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <= - IPV6_ADDR_SCOPE_NODELOCAL && - !(dev->flags & IFF_LOOPBACK)) { -- kfree_skb(skb); -- return 0; -+ dropreason = "invalid scope"; -+ goto drop; - } - } - -@@ -119,9 +122,20 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * - } - rcu_read_unlock_bh(); - -+ dropreason = "no route to host"; - IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); -+ ret = -EINVAL; -+drop: -+ DTRACE_IP(drop__out, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, skb->sk, -+ void_ip_t * : ipinfo_t *, ipv6_hdr(skb), -+ struct net_device * : ifinfo_t *, skb->dev, -+ struct iphdr * : ipv4info_t *, NULL, -+ struct ipv6hdr * : ipv6info_t *, ipv6_hdr(skb), -+ const char * : string, dropreason); - kfree_skb(skb); -- return -EINVAL; -+ return ret; - } - - static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) -@@ -167,6 +181,15 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) - skb->dev = dev; - - if (unlikely(idev->cnf.disable_ipv6)) { -+ DTRACE_IP(drop__out, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, skb->sk, -+ void_ip_t * : ipinfo_t *, NULL, -+ struct net_device * : ifinfo_t *, skb->dev, -+ struct iphdr * : ipv4info_t *, NULL, -+ struct ipv6hdr * : ipv6info_t *, NULL, -+ const char * : string, "IPv6 is disabled"); -+ - IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); - kfree_skb(skb); - return 0; -@@ -203,8 +226,10 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, - struct ipv6hdr *hdr; - u8 proto = fl6->flowi6_proto; - int seg_len = skb->len; -+ const char *dropreason; - int hlimit = -1; - u32 mtu; -+ int err; - - head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); - if (opt) -@@ -213,10 +238,12 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, - if (unlikely(skb_headroom(skb) < head_room)) { - struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); - if (!skb2) { -+ dropreason = "out of memory"; - IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUTDISCARDS); - kfree_skb(skb); -- return -ENOBUFS; -+ err = -ENOBUFS; -+ goto drop; - } - if (skb->sk) - skb_set_owner_w(skb2, skb->sk); -@@ -273,6 +300,14 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, - if (unlikely(!skb)) - return 0; - -+ DTRACE_IP(send, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, skb->sk, -+ void_ip_t * : ipinfo_t *, hdr, -+ struct net_device * : ifinfo_t *, skb->dev, -+ struct iphdr * : ipv4info_t *, NULL, -+ struct ipv6hdr * : ipv6info_t *, hdr); -+ - /* hooks should never assume socket lock is held. - * we promote our socket to non const - */ -@@ -287,9 +322,21 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, - */ - ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu); - -+ dropreason = "fragmentation failure"; - IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); -+ err = -EMSGSIZE; -+drop: -+ DTRACE_IP(drop__out, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, skb->sk, -+ void_ip_t * : ipinfo_t *, NULL, -+ struct net_device * : ifinfo_t *, skb->dev, -+ struct iphdr * : ipv4info_t *, NULL, -+ struct ipv6hdr * : ipv6info_t *, NULL, -+ const char * : string, dropreason); -+ - kfree_skb(skb); -- return -EMSGSIZE; -+ return err; - } - EXPORT_SYMBOL(ip6_xmit); - -@@ -424,22 +471,33 @@ int ip6_forward(struct sk_buff *skb) - struct ipv6hdr *hdr = ipv6_hdr(skb); - struct inet6_skb_parm *opt = IP6CB(skb); - struct net *net = dev_net(dst->dev); -+ const char *dropreason; -+ int err = -EINVAL; - u32 mtu; - -- if (net->ipv6.devconf_all->forwarding == 0) -+ if (net->ipv6.devconf_all->forwarding == 0) { -+ dropreason = "forwarding disabled"; - goto error; -+ } - -- if (skb->pkt_type != PACKET_HOST) -+ if (skb->pkt_type != PACKET_HOST) { -+ dropreason = "non-host packet type cannot be forwarded"; - goto drop; -+ } - -- if (unlikely(skb->sk)) -+ if (unlikely(skb->sk)) { -+ dropreason = "socket found for packet to be forwarded"; - goto drop; -+ } - -- if (skb_warn_if_lro(skb)) -+ if (skb_warn_if_lro(skb)) { -+ dropreason = "LRO warning"; - goto drop; -+ } - - if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { - __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); -+ dropreason = "forwarding disabled by policy"; - goto drop; - } - -@@ -469,11 +527,13 @@ int ip6_forward(struct sk_buff *skb) - if (hdr->hop_limit <= 1) { - /* Force OUTPUT device used as source address */ - skb->dev = dst->dev; -+ - icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); - __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); - -- kfree_skb(skb); -- return -ETIMEDOUT; -+ dropreason = "hoplimit exceeded"; -+ err = -ETIMEDOUT; -+ goto drop; - } - - /* XXX: idev->cnf.proxy_ndp? */ -@@ -483,6 +543,7 @@ int ip6_forward(struct sk_buff *skb) - if (proxied > 0) - return ip6_input(skb); - else if (proxied < 0) { -+ dropreason = "proxy router cannot forward"; - __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); - goto drop; - } -@@ -490,6 +551,7 @@ int ip6_forward(struct sk_buff *skb) - - if (!xfrm6_route_forward(skb)) { - __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); -+ dropreason = "forwarding disabled for destination"; - goto drop; - } - dst = skb_dst(skb); -@@ -529,9 +591,12 @@ int ip6_forward(struct sk_buff *skb) - - /* This check is security critical. */ - if (addrtype == IPV6_ADDR_ANY || -- addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) -+ addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) { -+ dropreason = "invalid address type for forwarding"; - goto error; -+ } - if (addrtype & IPV6_ADDR_LINKLOCAL) { -+ dropreason = "invalid address type for forwarding"; - icmpv6_send(skb, ICMPV6_DEST_UNREACH, - ICMPV6_NOT_NEIGHBOUR, 0); - goto error; -@@ -545,17 +610,20 @@ int ip6_forward(struct sk_buff *skb) - if (ip6_pkt_too_big(skb, mtu)) { - /* Again, force OUTPUT device used as source address */ - skb->dev = dst->dev; -+ - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS); - __IP6_INC_STATS(net, ip6_dst_idev(dst), - IPSTATS_MIB_FRAGFAILS); -- kfree_skb(skb); -- return -EMSGSIZE; -+ dropreason = "packet too big"; -+ err = -EMSGSIZE; -+ goto drop; - } - - if (skb_cow(skb, dst->dev->hard_header_len)) { - __IP6_INC_STATS(net, ip6_dst_idev(dst), - IPSTATS_MIB_OUTDISCARDS); -+ dropreason = "copy-on-write failed"; - goto drop; - } - -@@ -572,6 +640,15 @@ int ip6_forward(struct sk_buff *skb) - error: - __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); - drop: -+ DTRACE_IP(drop__out, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, skb->sk, -+ void_ip_t * : ipinfo_t *, hdr, -+ struct net_device * : ifinfo_t *, skb->dev, -+ struct iphdr * : ipv4info_t *, NULL, -+ struct ipv6hdr * : ipv6info_t *, hdr, -+ const char * : string, dropreason); -+ - kfree_skb(skb); - return -EINVAL; - } -@@ -1336,6 +1413,7 @@ static int __ip6_append_data(struct sock *sk, - unsigned int maxnonfragsize, headersize; - unsigned int wmem_alloc_delta = 0; - bool paged, extra_uref = false; -+ const char *dropreason; - - skb = skb_peek_tail(queue); - if (!skb) { -@@ -1375,6 +1453,7 @@ static int __ip6_append_data(struct sock *sk, - sk->sk_protocol == IPPROTO_RAW)) { - ipv6_local_rxpmtu(sk, fl6, mtu - headersize + - sizeof(struct ipv6hdr)); -+ dropreason = "fragmentation needed but disabled"; - goto emsgsize; - } - -@@ -1387,7 +1466,9 @@ static int __ip6_append_data(struct sock *sk, - emsgsize: - pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0); - ipv6_local_error(sk, EMSGSIZE, fl6, pmtu); -- return -EMSGSIZE; -+ dropreason = "packet too big"; -+ err = -EMSGSIZE; -+ goto trace_drop; - } - - /* CHECKSUM_PARTIAL only with no extension headers and when -@@ -1402,8 +1483,11 @@ static int __ip6_append_data(struct sock *sk, - - if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) { - uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb)); -- if (!uarg) -- return -ENOBUFS; -+ if (!uarg) { -+ err = -ENOBUFS; -+ dropreason = "out of memory"; -+ goto error; -+ } - extra_uref = !skb_zcopy(skb); /* only ref on new uarg */ - if (rt->dst.dev->features & NETIF_F_SG && - csummode == CHECKSUM_PARTIAL) { -@@ -1505,6 +1589,7 @@ static int __ip6_append_data(struct sock *sk, - copy = datalen - transhdrlen - fraggap - pagedlen; - if (copy < 0) { - err = -EINVAL; -+ dropreason = "invalid fragment"; - goto error; - } - if (transhdrlen) { -@@ -1517,11 +1602,13 @@ static int __ip6_append_data(struct sock *sk, - 2 * sk->sk_sndbuf) - skb = alloc_skb(alloclen + hh_len, - sk->sk_allocation); -- if (unlikely(!skb)) -- err = -ENOBUFS; - } -- if (!skb) -+ if (unlikely(!skb)) { -+ err = -ENOBUFS; -+ dropreason = "out of memory"; - goto error; -+ } -+ - /* - * Fill in the control structures - */ -@@ -1553,7 +1640,9 @@ static int __ip6_append_data(struct sock *sk, - getfrag(from, data + transhdrlen, offset, - copy, fraggap, skb) < 0) { - err = -EFAULT; -+ dropreason = "could not get fragment"; - kfree_skb(skb); -+ skb = NULL; - goto error; - } - -@@ -1597,20 +1686,25 @@ static int __ip6_append_data(struct sock *sk, - offset, copy, off, skb) < 0) { - __skb_trim(skb, off); - err = -EFAULT; -+ dropreason = "could not get fragment"; - goto error; - } - } else if (!uarg || !uarg->zerocopy) { - int i = skb_shinfo(skb)->nr_frags; - - err = -ENOMEM; -- if (!sk_page_frag_refill(sk, pfrag)) -+ if (!sk_page_frag_refill(sk, pfrag)) { -+ dropreason = "out of memory"; - goto error; -+ } - - if (!skb_can_coalesce(skb, i, pfrag->page, - pfrag->offset)) { - err = -EMSGSIZE; -- if (i == MAX_SKB_FRAGS) -+ if (i == MAX_SKB_FRAGS) { -+ dropreason = "too many fragments"; - goto error; -+ } - - __skb_fill_page_desc(skb, i, pfrag->page, - pfrag->offset, 0); -@@ -1620,8 +1714,10 @@ static int __ip6_append_data(struct sock *sk, - copy = min_t(int, copy, pfrag->size - pfrag->offset); - if (getfrag(from, - page_address(pfrag->page) + pfrag->offset, -- offset, copy, skb->len, skb) < 0) -+ offset, copy, skb->len, skb) < 0) { -+ dropreason = "could not get fragment"; - goto error_efault; -+ } - - pfrag->offset += copy; - skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); -@@ -1631,8 +1727,10 @@ static int __ip6_append_data(struct sock *sk, - wmem_alloc_delta += copy; - } else { - err = skb_zerocopy_iter_dgram(skb, from, copy); -- if (err < 0) -+ if (err < 0) { -+ dropreason = "skb iteration failure\n"; - goto error; -+ } - } - offset += copy; - length -= copy; -@@ -1649,6 +1747,16 @@ static int __ip6_append_data(struct sock *sk, - sock_zerocopy_put_abort(uarg, extra_uref); - cork->length -= length; - IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); -+trace_drop: -+ DTRACE_IP(drop__out, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, skb ? skb->sk : NULL, -+ void_ip_t * : ipinfo_t *, NULL, -+ struct net_device * : ifinfo_t *, skb ? skb->dev : NULL, -+ struct iphdr * : ipv4info_t *, NULL, -+ struct ipv6hdr * : ipv6info_t *, NULL, -+ const char * : string, dropreason); -+ - refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); - return err; - } -@@ -1796,9 +1904,20 @@ int ip6_send_skb(struct sk_buff *skb) - if (err) { - if (err > 0) - err = net_xmit_errno(err); -- if (err) -+ if (err) { -+ /* skb may have been freed */ -+ DTRACE_IP(drop__out, -+ struct sk_buff * : pktinfo_t *, NULL, -+ struct sock * : csinfo_t *, NULL, -+ void_ip_t * : ipinfo_t *, NULL, -+ struct net_device * : ifinfo_t *, NULL, -+ struct iphdr * : ipv4info_t *, NULL, -+ struct ipv6hdr * : ipv6info_t *, NULL, -+ const char * : string, "out of memory"); -+ - IP6_INC_STATS(net, rt->rt6i_idev, - IPSTATS_MIB_OUTDISCARDS); -+ } - } - - return err; -@@ -1824,9 +1943,19 @@ static void __ip6_flush_pending_frames(struct sock *sk, - struct sk_buff *skb; - - while ((skb = __skb_dequeue_tail(queue)) != NULL) { -- if (skb_dst(skb)) -+ if (skb_dst(skb)) { -+ DTRACE_IP(drop__out, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, skb->sk, -+ void_ip_t * : ipinfo_t *, ipv6_hdr(skb), -+ struct net_device * : ifinfo_t *, skb->dev, -+ struct iphdr * : ipv4info_t *, NULL, -+ struct ipv6hdr * : ipv6info_t *, -+ ipv6_hdr(skb), -+ const char * : string, "flushing pending frames"); - IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUTDISCARDS); -+ } - kfree_skb(skb); - } - -diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c -index eaa4c2cc2fbb..47823557626e 100644 ---- a/net/ipv6/mcast.c -+++ b/net/ipv6/mcast.c -@@ -60,6 +60,8 @@ - - #include <net/ip6_checksum.h> - -+#include <linux/sdt.h> -+ - /* Ensure that we have struct in6_addr aligned on 32bit word. */ - static int __mld2_query_bugs[] __attribute__((__unused__)) = { - BUILD_BUG_ON_ZERO(offsetof(struct mld2_query, mld2q_srcs) % 4), -@@ -1647,6 +1649,7 @@ static void mld_sendpack(struct sk_buff *skb) - int payload_len, mldlen; - struct inet6_dev *idev; - struct net *net = dev_net(skb->dev); -+ const char *dropreason; - int err; - struct flowi6 fl6; - struct dst_entry *dst; -@@ -1676,26 +1679,45 @@ static void mld_sendpack(struct sk_buff *skb) - dst = NULL; - } - skb_dst_set(skb, dst); -- if (err) -- goto err_out; -+ if (err) { -+ kfree_skb(skb); -+ skb = NULL; -+ dropreason = "out of memory"; -+ goto out; -+ } -+ -+ DTRACE_IP(send, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, skb->sk, -+ void_ip_t * : ipinfo_t *, ipv6_hdr(skb), -+ struct net_device * : ifinfo_t *, skb->dev, -+ struct iphdr * : ipv4info_t *, NULL, -+ struct ipv6hdr * : ipv6info_t *, ipv6_hdr(skb)); - - err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, - net, net->ipv6.igmp_sk, skb, NULL, skb->dev, - dst_output); -+ dropreason = "multicast send error"; - out: - if (!err) { - ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT); - ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); - } else { -+ /* skb may have been freed */ -+ DTRACE_IP(drop__out, -+ struct sk_buff * : pktinfo_t *, NULL, -+ struct sock * : csinfo_t *, NULL, -+ void_ip_t * : ipinfo_t *, NULL, -+ struct net_device * : ifinfo_t *, idev->dev, -+ struct iphdr * : ipv4info_t *, NULL, -+ struct ipv6hdr * : ipv6info_t *, NULL, -+ const char * : string, dropreason); -+ - IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); - } - - rcu_read_unlock(); - return; -- --err_out: -- kfree_skb(skb); -- goto out; - } - - static int grec_size(struct ifmcaddr6 *pmc, int type, int gdel, int sdel) -@@ -1982,7 +2004,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) - { - struct net *net = dev_net(dev); - struct sock *sk = net->ipv6.igmp_sk; -- struct inet6_dev *idev; -+ struct inet6_dev *idev = NULL; - struct sk_buff *skb; - struct mld_msg *hdr; - const struct in6_addr *snd_addr, *saddr; -@@ -1993,6 +2015,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) - u8 ra[8] = { IPPROTO_ICMPV6, 0, - IPV6_TLV_ROUTERALERT, 2, 0, 0, - IPV6_TLV_PADN, 0 }; -+ const char *dropreason; - struct flowi6 fl6; - struct dst_entry *dst; - -@@ -2014,10 +2037,8 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) - - if (!skb) { - rcu_read_lock(); -- IP6_INC_STATS(net, __in6_dev_get(dev), -- IPSTATS_MIB_OUTDISCARDS); -- rcu_read_unlock(); -- return; -+ dropreason = "out of memory"; -+ goto out; - } - skb->priority = TC_PRIO_CONTROL; - skb_reserve(skb, hlen); -@@ -2052,26 +2073,43 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) - dst = icmp6_dst_alloc(skb->dev, &fl6); - if (IS_ERR(dst)) { - err = PTR_ERR(dst); -- goto err_out; -+ kfree_skb(skb); -+ skb = NULL; -+ dropreason = "out of memory"; -+ goto out; - } - - skb_dst_set(skb, dst); -+ DTRACE_IP(send, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, skb->sk, -+ void_ip_t * : ipinfo_t *, ipv6_hdr(skb), -+ struct net_device * : ifinfo_t *, skb->dev, -+ struct iphdr * : ipv4info_t *, NULL, -+ struct ipv6hdr * : ipv6info_t *, ipv6_hdr(skb)); - err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, - net, sk, skb, NULL, skb->dev, - dst_output); -+ dropreason = "multicast send error"; - out: - if (!err) { - ICMP6MSGOUT_INC_STATS(net, idev, type); - ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); -- } else -+ } else { -+ /* skb may have been freed */ -+ DTRACE_IP(drop__out, -+ struct sk_buff * : pktinfo_t *, NULL, -+ struct sock * : csinfo_t *, sk, -+ void_ip_t * : ipinfo_t *, NULL, -+ struct net_device * : ifinfo_t *, idev->dev, -+ struct iphdr * : ipv4info_t *, NULL, -+ struct ipv6hdr * : ipv6info_t *, NULL, -+ const char * : string, dropreason); - IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); -+ } - - rcu_read_unlock(); - return; -- --err_out: -- kfree_skb(skb); -- goto out; - } - - static void mld_send_initial_cr(struct inet6_dev *idev) -diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c -index 53caf59c591e..06817a86e9c0 100644 ---- a/net/ipv6/ndisc.c -+++ b/net/ipv6/ndisc.c -@@ -68,6 +68,7 @@ - - #include <linux/netfilter.h> - #include <linux/netfilter_ipv6.h> -+#include <linux/sdt.h> - - static u32 ndisc_hash(const void *pkey, - const struct net_device *dev, -@@ -502,6 +503,14 @@ static void ndisc_send_skb(struct sk_buff *skb, - idev = __in6_dev_get(dst->dev); - IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); - -+ DTRACE_IP(send, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, skb->sk, -+ void_ip_t * : ipinfo_t *, ipv6_hdr(skb), -+ struct net_device * : ifinfo_t *, skb->dev, -+ struct iphdr * : ipv4info_t *, NULL, -+ struct ipv6hdr * : ipv6info_t *, ipv6_hdr(skb)); -+ - err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, - net, sk, skb, NULL, dst->dev, - dst_output); -diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c -index af36acc1a644..811a88767a5c 100644 ---- a/net/ipv6/output_core.c -+++ b/net/ipv6/output_core.c -@@ -10,6 +10,7 @@ - #include <net/addrconf.h> - #include <net/secure_seq.h> - #include <linux/netfilter.h> -+#include <linux/sdt.h> - - static u32 __ipv6_select_ident(struct net *net, - const struct in6_addr *dst, -@@ -164,6 +165,14 @@ int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) - - skb->protocol = htons(ETH_P_IPV6); - -+ DTRACE_IP(send, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, skb->sk, -+ void_ip_t * : ipinfo_t *, ipv6_hdr(skb), -+ struct net_device * : ifinfo_t *, skb->dev, -+ struct iphdr * : ipv4info_t *, NULL, -+ struct ipv6hdr * : ipv6info_t *, ipv6_hdr(skb)); -+ - return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, - net, sk, skb, NULL, skb_dst(skb)->dev, - dst_output); -diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c -index dfe5e603ffe1..cd5f68d12fe9 100644 ---- a/net/ipv6/raw.c -+++ b/net/ipv6/raw.c -@@ -58,6 +58,7 @@ - #include <linux/proc_fs.h> - #include <linux/seq_file.h> - #include <linux/export.h> -+#include <linux/sdt.h> - - #define ICMPV6_HDRLEN 4 /* ICMPv6 header, RFC 4443 Section 2.1 */ - -@@ -622,26 +623,34 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, - struct ipv6_pinfo *np = inet6_sk(sk); - struct net *net = sock_net(sk); - struct ipv6hdr *iph; -- struct sk_buff *skb; -+ struct sk_buff *skb = NULL; - int err; - struct rt6_info *rt = (struct rt6_info *)*dstp; - int hlen = LL_RESERVED_SPACE(rt->dst.dev); - int tlen = rt->dst.dev->needed_tailroom; -+ const char *dropreason; - - if (length > rt->dst.dev->mtu) { - ipv6_local_error(sk, EMSGSIZE, fl6, rt->dst.dev->mtu); -- return -EMSGSIZE; -+ dropreason = "packet too big"; -+ err = -EMSGSIZE; -+ goto error_check; -+ } -+ if (length < sizeof(struct ipv6hdr)) { -+ dropreason = "packet too short"; -+ err = -EINVAL; -+ goto error_check; - } -- if (length < sizeof(struct ipv6hdr)) -- return -EINVAL; - if (flags&MSG_PROBE) - goto out; - - skb = sock_alloc_send_skb(sk, - length + hlen + tlen + 15, - flags & MSG_DONTWAIT, &err); -- if (!skb) -+ if (!skb) { -+ dropreason = "out of memory"; - goto error; -+ } - skb_reserve(skb, hlen); - - skb->protocol = htons(ETH_P_IPV6); -@@ -665,7 +674,8 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, - if (err) { - err = -EFAULT; - kfree_skb(skb); -- goto error; -+ dropreason = "could not copy msg"; -+ goto error_check; - } - - skb_dst_set(skb, &rt->dst); -@@ -684,6 +694,13 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, - */ - rcu_read_lock(); - IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); -+ DTRACE_IP(send, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, skb->sk, -+ void_ip_t * : ipinfo_t *, ipv6_hdr(skb), -+ struct net_device * : ifinfo_t *, skb->dev, -+ struct iphdr * : ipv4info_t *, NULL, -+ struct ipv6hdr * : ipv6info_t *, ipv6_hdr(skb)); - err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, - NULL, rt->dst.dev, dst_output); - if (err > 0) -@@ -691,6 +708,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, - if (err) { - IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); - rcu_read_unlock(); -+ dropreason = "raw send error"; - goto error_check; - } - rcu_read_unlock(); -@@ -700,6 +718,14 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, - error: - IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); - error_check: -+ DTRACE_IP(drop__out, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, skb ? skb->sk : NULL, -+ void_ip_t * : ipinfo_t *, skb ? ipv6_hdr(skb) : NULL, -+ struct net_device * : ifinfo_t *, skb ? skb->dev : NULL, -+ struct iphdr * : ipv4info_t *, NULL, -+ struct ipv6hdr * : ipv6info_t *, skb ? ipv6_hdr(skb) : NULL, -+ const char * : string, dropreason); - if (err == -ENOBUFS && !np->recverr) - err = 0; - return err; -diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c -index eaf09e6b7844..7286e3103e7e 100644 ---- a/net/ipv6/tcp_ipv6.c -+++ b/net/ipv6/tcp_ipv6.c -@@ -65,6 +65,7 @@ - - #include <crypto/hash.h> - #include <linux/scatterlist.h> -+#include <linux/sdt.h> - - #include <trace/events/tcp.h> - -@@ -485,6 +486,20 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - return 0; - } - -+/* Since we want to trace send events in TCP prior to pushing the segment to -+ * IP - where the IP header is added - we need to construct an argument -+ * containing relevant IP info so that TCP probe consumers can utilize it. -+ */ -+static inline void dtrace_tcp_build_ipv6hdr(struct in6_addr *saddr, -+ struct in6_addr *daddr, -+ struct ipv6hdr *ip6h) -+{ -+ ip6h->version = 6; -+ ip6h->payload_len = 0; -+ ip6h->nexthdr = IPPROTO_TCP; -+ ip6h->saddr = *saddr; -+ ip6h->daddr = *daddr; -+} - - static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, - struct flowi *fl, -@@ -518,6 +533,32 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, - opt = ireq->ipv6_opt; - if (!opt) - opt = rcu_dereference(np->opt); -+ -+ if (DTRACE_TCP_ENABLED(send)) { -+ struct ipv6hdr ip6h; -+ -+ dtrace_tcp_build_ipv6hdr(&ireq->ir_v6_loc_addr, -+ &ireq->ir_v6_rmt_addr, &ip6h); -+ -+ /* Do not supply tcp sk - addresses/ports are not -+ * committed yet - instead translators will fill them -+ * in from IP/TCP data. -+ */ -+ DTRACE_TCP_NOCHECK(send, -+ struct sk_buff * : pktinfo_t *, -+ NULL, -+ struct sock * : csinfo_t *, sk, -+ __dtrace_tcp_void_ip_t * : -+ ipinfo_t *, &ip6h, -+ struct tcp_sock * : tcpsinfo_t *, -+ NULL, -+ struct tcphdr * : tcpinfo_t *, -+ tcp_hdr(skb), -+ int : tcplsinfo_t *, TCP_LISTEN, -+ int, TCP_LISTEN, -+ int, DTRACE_NET_PROBE_OUTBOUND); -+ } -+ - err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass, - sk->sk_priority); - rcu_read_unlock(); -@@ -947,6 +988,48 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 - dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL); - if (!IS_ERR(dst)) { - skb_dst_set(buff, dst); -+ if (DTRACE_TCP_ENABLED(send) || -+ DTRACE_TCP_ENABLED(accept__refused)) { -+ struct ipv6hdr ip6h; -+ -+ dtrace_tcp_build_ipv6hdr(&fl6.saddr, &fl6.daddr, -+ &ip6h); -+ -+ /* Do not supply tcp sk - addresses/ports are not -+ * committed yet - instead translators will fill them -+ * in from IP/TCP data. -+ */ -+ DTRACE_TCP_NOCHECK(send, -+ struct sk_buff * : pktinfo_t *, -+ NULL, -+ struct sock * : csinfo_t *, NULL, -+ __dtrace_tcp_void_ip_t * : -+ ipinfo_t *, &ip6h, -+ struct tcp_sock * : tcpsinfo_t *, -+ NULL, -+ struct tcphdr * : tcpinfo_t *, t1, -+ int : tcplsinfo_t *, TCP_CLOSE, -+ int, TCP_CLOSE, -+ int, DTRACE_NET_PROBE_OUTBOUND); -+ if (rst && th->syn && th->ack == 0) -+ DTRACE_TCP_NOCHECK(accept__refused, -+ struct sk_buff * : -+ pktinfo_t *, NULL, -+ struct sock * : csinfo_t *, -+ NULL, -+ __dtrace_tcp_void_ip_t * : -+ ipinfo_t *, &ip6h, -+ struct tcp_sock * : -+ tcpsinfo_t *, NULL, -+ struct tcphdr * : -+ tcpinfo_t *, t1, -+ int : tcplsinfo_t *, -+ TCP_CLOSE, -+ int, TCP_CLOSE, -+ int, -+ DTRACE_NET_PROBE_OUTBOUND); -+ } -+ - ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass, - priority); - TCP_INC_STATS(net, TCP_MIB_OUTSEGS); -@@ -1544,7 +1627,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) - const struct tcphdr *th; - const struct ipv6hdr *hdr; - bool refcounted; -- struct sock *sk; -+ struct sock *sk = NULL; - int ret; - struct net *net = dev_net(skb->dev); - -@@ -1579,6 +1662,15 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) - if (!sk) - goto no_tcp_socket; - -+ DTRACE_TCP(receive, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, sk, -+ __dtrace_tcp_void_ip_t * : ipinfo_t *, hdr, -+ struct tcp_sock * : tcpsinfo_t *, tcp_sk(sk), -+ struct tcphdr * : tcpinfo_t *, th, -+ int : tcplsinfo_t *, sk ? sk->sk_state : TCP_CLOSE, -+ int, sk ? sk->sk_state : TCP_CLOSE, -+ int, DTRACE_NET_PROBE_INBOUND); - process: - if (sk->sk_state == TCP_TIME_WAIT) - goto do_time_wait; -@@ -1698,6 +1790,18 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) - } - - discard_it: -+ if (DTRACE_TCP_ENABLED(receive) && skb->pkt_type == PACKET_HOST) -+ DTRACE_TCP_NOCHECK(receive, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, sk, -+ __dtrace_tcp_void_ip_t * : ipinfo_t *, -+ ipv6_hdr(skb), -+ struct tcp_sock * : tcpsinfo_t *, tcp_sk(sk), -+ struct tcphdr * : tcpinfo_t *, tcp_hdr(skb), -+ int : tcplsinfo_t *, -+ sk ? sk->sk_state : TCP_CLOSE, -+ int, sk ? sk->sk_state : TCP_CLOSE, -+ int, DTRACE_NET_PROBE_INBOUND); - kfree_skb(skb); - return 0; - -diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c -index 5dc439a391fe..e2fff11faf67 100644 ---- a/net/ipv6/udp.c -+++ b/net/ipv6/udp.c -@@ -51,6 +51,7 @@ - - #include <linux/proc_fs.h> - #include <linux/seq_file.h> -+#include <linux/sdt.h> - #include <trace/events/skb.h> - #include "udp_impl.h" - -@@ -331,8 +332,15 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - kfree_skb(skb); - return err; - } -- if (!peeking) -+ if (!peeking) { - SNMP_INC_STATS(mib, UDP_MIB_INDATAGRAMS); -+ DTRACE_UDP(receive, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, sk, -+ void_ip_t * : ipinfo_t *, ip_hdr(skb), -+ struct udp_sock * : udpsinfo_t *, udp_sk(sk), -+ struct udphdr * : udpinfo_t *, udp_hdr(skb)); -+ } - - sock_recv_ts_and_drops(msg, sk, skb); - -@@ -630,6 +638,15 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) - - ret = encap_rcv(sk, skb); - if (ret <= 0) { -+ DTRACE_UDP(receive, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, sk, -+ void_ip_t * : ipinfo_t *, -+ ip_hdr(skb), -+ struct udp_sock * : udpsinfo_t *, -+ udp_sk(sk), -+ struct udphdr * : udpinfo_t *, -+ udp_hdr(skb)); - __UDP_INC_STATS(sock_net(sk), - UDP_MIB_INDATAGRAMS, - is_udplite); -@@ -1177,6 +1194,13 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, - err = 0; - } - } else { -+ DTRACE_UDP(send, -+ struct sk_buff * : pktinfo_t *, skb, -+ struct sock * : csinfo_t *, sk, -+ void_ip_t * : ipinfo_t *, ip_hdr(skb), -+ struct udp_sock * : udpsinfo_t *, udp_sk(sk), -+ struct udphdr * : udpinfo_t *, uh); -+ - UDP6_INC_STATS(sock_net(sk), - UDP_MIB_OUTDATAGRAMS, is_udplite); - } --- -2.27.0 - diff --git a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0018-dtrace-add-rcu_irq_exit-and-rcu_nmi_exit_common-to-F.patch b/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0018-dtrace-add-rcu_irq_exit-and-rcu_nmi_exit_common-to-F.patch deleted file mode 100644 index ef931075b21c..000000000000 --- a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0018-dtrace-add-rcu_irq_exit-and-rcu_nmi_exit_common-to-F.patch +++ /dev/null @@ -1,41 +0,0 @@ -From e0d759914d4fbf64ba64c6a34ea1f1cf607d7bea Mon Sep 17 00:00:00 2001 -From: David Mc Lean <david.mclean@oracle.com> -Date: Fri, 9 Aug 2019 12:47:13 -0700 -Subject: [PATCH 18/20] dtrace: add rcu_irq_exit and rcu_nmi_exit_common to FBT - blacklist - -Additional FBT blacklist entry needed. Without this change, systems under -test were seen to fail at test/stress/dtrace-util/tst.DestructWithModule.sh -and test/stress/fbtsafety/ tests. - -rcu_irq_exit is being added to the blacklist because it inlines -rcu_nmi_exit_common which is unsafe for FBT. - -The symptom seen has been a system hang, requiring a hard reboot. -This failure was seen to be easily repeatable. - -rcu_nmi_exit_common is being added also, just in case it is ever used -without being inlined. - -Signed-off-by: David Mc Lean <david.mclean@oracle.com> -Reviewed-by: Kris Van Hees <kris.van.hees@oracle.com> ---- - arch/x86/kernel/fbt_blacklist.h | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/arch/x86/kernel/fbt_blacklist.h b/arch/x86/kernel/fbt_blacklist.h -index fd599859b47e..1663b0b1be28 100644 ---- a/arch/x86/kernel/fbt_blacklist.h -+++ b/arch/x86/kernel/fbt_blacklist.h -@@ -57,6 +57,8 @@ BL_DENTRY(void *, rcu_dynticks_curr_cpu_in_eqs) - BL_DENTRY(void *, rcu_dynticks_eqs_exit) - BL_DENTRY(void *, trace_rcu_dyntick) - BL_DENTRY(void *, rcu_nmi_exit) -+BL_DENTRY(void *, rcu_irq_exit) -+BL_DENTRY(void *, rcu_nmi_exit_common) - BL_DENTRY(void *, rcu_dynticks_eqs_enter) - BL_DENTRY(void *, ist_exit) - --- -2.27.0 - diff --git a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0019-dtrace-add-sample-script-for-building-DTrace-on-Fedo.patch b/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0019-dtrace-add-sample-script-for-building-DTrace-on-Fedo.patch deleted file mode 100644 index 6a2f8476a240..000000000000 --- a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0019-dtrace-add-sample-script-for-building-DTrace-on-Fedo.patch +++ /dev/null @@ -1,241 +0,0 @@ -From a77da5086114557551892384088c03618fba7abb Mon Sep 17 00:00:00 2001 -From: Eugene Loh <eugene.loh@oracle.com> -Date: Mon, 12 Aug 2019 20:51:06 -0700 -Subject: [PATCH 19/20] dtrace: add sample script for building DTrace on Fedora - -Signed-off-by: Eugene Loh <eugene.loh@oracle.com> ---- - samples/dtrace/DTrace-on-Fedora.sh | 221 +++++++++++++++++++++++++++++ - 1 file changed, 221 insertions(+) - create mode 100755 samples/dtrace/DTrace-on-Fedora.sh - -diff --git a/samples/dtrace/DTrace-on-Fedora.sh b/samples/dtrace/DTrace-on-Fedora.sh -new file mode 100755 -index 000000000000..3857027381a5 ---- /dev/null -+++ b/samples/dtrace/DTrace-on-Fedora.sh -@@ -0,0 +1,221 @@ -+#!/bin/sh -+ -+# Oracle Linux DTrace. -+# Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. -+# Licensed under the Universal Permissive License v 1.0 as shown at -+# http://oss.oracle.com/licenses/upl. -+# -+ -+# Oracle has been working in recent years on porting DTrace, the -+# dynamic tracing tool, to Linux. DTrace offers easy-to-use, powerful, -+# safe, and unintrusive tracing. Oracle's initial focus was the Oracle -+# Unbreakable Enterprise Kernel (UEK), but DTrace runs on upstream Linux -+# kernels and other distributions' Linux kernels as well. Note that at -+# the moment, Oracle is in the process of upstreaming DTrace-related work -+# and reimplementing DTrace itself on top of existing kernel infrastructure -+# such as eBPF. -+ -+# This script illustrates how to build DTrace on Fedora on x86. -+# It is intended as a tutorial rather than a robust, turn-key utility. -+# Read and understand the steps as you execute them. The steps are -+# similar to what one does to build DTrace on other Linux distributions. -+ -+# Useful references on building a custom Fedora kernel include: -+# https://fedoraproject.org/wiki/Building_a_custom_kernel -+# https://fedoraproject.org/wiki/Building_a_custom_kernel#Building_Vanilla_upstream_kernel -+# Roughly speaking, you should have about 20 Gbyte of disk space -+# available and expect to wait a few hours for the build to complete. -+ -+# The overall process is: -+# 1. download and build the CTF library for DTrace to use -+# 2. download Fedora kernel source code -+# 3. prepare DTrace patches to apply -+# 4. prepare the kernel source code -+# a. Linux base code -+# b. apply Linux patches (if any) -+# c. apply Fedora patches -+# d. apply DTrace patches -+# e. prepare makefile and config -+# 5. build the kernel -+# 6. reboot -+# 7. download and build the DTrace userspace utility -+ -+ -+# pick one -+# DTrace patches change relatively infrequently. -+# So DTrace_branch might not have to match your Fedora kernel version exactly. -+#fedora_release=f29; DTrace_branch=5.2.7 ; num_DTrace_patches=19 -+ fedora_release=f30; DTrace_branch=5.2.7 ; num_DTrace_patches=19 -+ -+# Step 1: download and build the CTF library for DTrace to use -+ -+sudo dnf install -y git -+git clone https://github.com/oracle/libdtrace-ctf.git -+cd libdtrace-ctf -+sudo dnf builddep -y libdtrace-ctf.spec # install dependencies -+make -+sudo make install -+cd .. -+ -+# Step 2: download Fedora kernel source code -+ -+sudo dnf install -y fedora-packager -+fedpkg co -a kernel # anonymous clone of Fedora patches -+cd kernel -+git checkout origin/$fedora_release -+fedpkg sources # download tarballs of kernel sources -+sudo dnf builddep -y kernel.spec # install dependencies -+cd .. -+ -+# Step 3: prepare DTrace patches to apply -+ -+# download DTrace kernel code -+git clone https://github.com/oracle/dtrace-linux-kernel.git -+cd dtrace-linux-kernel/ -+git checkout origin/$DTrace_branch -+ -+# The DTrace patches will be the most recent commits. -+# Make sure you use all of them but nothing before that. -+# Make sure the top patches are DTrace -+# and the next one after them is the Linux baseline you want. -+# E.g., the top commits here are DTrace, and the last one is Linux upstream: -+# [...] -+# 66f76fef08e3 dtrace: modular components and x86 support -+# 25f11bb97fb9 dtrace: core and x86 -+# 3c5af76fa5fb waitfd: new syscall implementing waitpid() over fds -+# e95e4350d02b kallsyms: introduce new /proc/kallmodsyms including builtin modules too -+# 86e43efc644c ctf: generate CTF information for the kernel -+# a3b22b9f11d9 (tag: v5.0-rc7) Linux 5.0-rc7 -+git log -n $(($num_DTrace_patches + 1)) --oneline -+ -+# generate the DTrace patches -+git format-patch -$num_DTrace_patches -+ -+cd .. -+ -+# Step 4: prepare the kernel source code -+ -+# Step 4a: Linux base code -+ -+if [ -e kernel/linux-*.xz ]; then -+ /usr/bin/xz -dc kernel/linux-*.tar.xz | /usr/bin/tar -xof - -+else -+ tar xzf kernel/linux-*.tar.gz -+fi -+ -+# make a git repo so patches can be applied -+cd linux-* -+git init -+git config user.email "kernel-team@fedoraproject.org" -+git config user.name "Fedora Kernel Team" -+git config gc.auto 0 -+git add . -+git commit -a -q -m "baseline" -+ -+# Step 4b: apply Linux patches (if any) -+ -+if [ -e ../kernel/patch-*.xz ]; then -+ xzcat ../kernel/patch-*.xz | patch -p1 -F1 -s -+ git commit -a -m "Stable update" -+fi -+ -+# Step 4c: apply Fedora patches -+ -+for x in `awk '/^Patch/ {print $2}' ../kernel/kernel.spec`; do -+ git am ../kernel/$x -+done -+ -+# Step 4d: apply DTrace patches -+ -+for x in ../dtrace-linux-kernel/00*.patch; do -+ git am $x -+ if [ $? -ne 0 ]; then -+ echo DTrace patch did not apply cleanly -+ exit 1 -+ fi -+done -+ -+# Step 4e: prepare makefile and config -+ -+# modify the version tag in the Makefile -+sed -i.old \ -+ 's/^EXTRAVERSION =.*$/EXTRAVERSION = -200.DTrace_'$fedora_release'.x86_64/' \ -+ Makefile -+ -+# use the Fedora config file -+cp ../kernel/kernel-x86_64.config .config -+ -+# modify the config file for DTrace -+sed -i \ -+ -e 's/# CONFIG_UNWINDER_FRAME_POINTER is not set/CONFIG_UNWINDER_FRAME_POINTER=y/' \ -+ -e 's/CONFIG_UNWINDER_ORC=y/# CONFIG_UNWINDER_ORC is not set/' .config -+echo "CONFIG_DTRACE=y" >> .config -+echo "CONFIG_DT_CORE=m" >> .config -+echo "CONFIG_DT_FASTTRAP=m" >> .config -+echo "CONFIG_DT_PROFILE=m" >> .config -+echo "CONFIG_DT_SDT=m" >> .config -+echo "CONFIG_DT_SDT_PERF=y" >> .config -+echo "CONFIG_DT_FBT=m" >> .config -+echo "CONFIG_DT_SYSTRACE=m" >> .config -+echo "CONFIG_DT_DT_TEST=m" >> .config -+echo "CONFIG_DT_DT_PERF=m" >> .config -+echo "CONFIG_DT_DEBUG=y" >> .config -+echo "# CONFIG_DT_DEBUG_MUTEX is not set" >> .config -+ -+# Step 5: build the kernel -+ -+# (might take hours) -+make olddefconfig -+make -j4 -+make -j4 ctf -+ -+# install -+sudo make modules_install -+sudo make install -+sudo make INSTALL_HDR_PATH=/usr headers_install -+cd .. -+ -+# Step 6: reboot -+ -+sudo reboot -+ -+# Step 7: download and build the DTrace userspace utility -+ -+git clone https://github.com/oracle/dtrace-utils.git -+cd dtrace-utils -+ -+# The DTrace packages are missing from Fedora repos, -+# and we just built that software ourselves. -+# So eliminate those packages from the .spec file -+# before calling dnf builddep. -+sed -i.old \ -+ -e 's/-devel libdtrace-ctf-devel >= [0-9\.]*/-devel/' \ -+ -e '/^BuildRequires: dtrace-kernel-headers = [0-9\.]*$/d' dtrace-utils.spec -+sudo dnf builddep -y dtrace-utils.spec -+ -+make -+sudo make install -+cd .. -+ -+exit 0 -+ -+# Now, we can use DTrace on Fedora! (Notice that it is installed at /usr/sbin/dtrace. -+# Some other utility is at /usr/bin/dtrace.) You must be logged in as -+# root to use DTrace. The first thing to do is to list the available probes: -+# -+# # /usr/sbin/dtrace -l -+# ID PROVIDER MODULE FUNCTION NAME -+# 1 dtrace BEGIN -+# 2 dtrace END -+# 3 dtrace ERROR -+# 5 fbt isofs isofs_hashi entry -+# 6 fbt isofs isofs_hashi return -+# 7 fbt isofs isofs_statfs entry -+# 8 fbt isofs isofs_statfs return -+# 9 fbt isofs isofs_iget5_test entry -+# 10 fbt isofs isofs_iget5_test return -+# [...thousands of lines omitted...] -+# -+# Next, check out the Oracle DTrace Guide for simple examples and more information. -+# https://docs.oracle.com/cd/E52668_01/E38608/html/index.html -+ --- -2.27.0 - diff --git a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0020-locking-publicize-mutex_owner-and-mutex_owned-again.patch b/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0020-locking-publicize-mutex_owner-and-mutex_owned-again.patch deleted file mode 100644 index 5d56774b3af6..000000000000 --- a/sys-kernel/debian-sources/files/5.6.14/dtrace-patches/0020-locking-publicize-mutex_owner-and-mutex_owned-again.patch +++ /dev/null @@ -1,86 +0,0 @@ -From 3d97a4c07b02433500fa7aa35ef5543ec44887f8 Mon Sep 17 00:00:00 2001 -From: Nick Alcock <nick.alcock@oracle.com> -Date: Mon, 9 Dec 2019 16:51:44 +0000 -Subject: [PATCH 20/20] locking: publicize mutex_owner and mutex_owned again - -DTrace uses both of them. - -Signed-off-by: Nick Alcock <nick.alcock@oracle.com> ---- - dtrace/dtrace_dif.c | 2 +- - include/linux/mutex.h | 16 +++++----------- - kernel/locking/mutex.c | 19 +++++++++++++++++++ - 3 files changed, 25 insertions(+), 12 deletions(-) - -diff --git a/dtrace/dtrace_dif.c b/dtrace/dtrace_dif.c -index ae7f01b4ed9b..798302d322a3 100644 ---- a/dtrace/dtrace_dif.c -+++ b/dtrace/dtrace_dif.c -@@ -2439,7 +2439,7 @@ static void dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, - break; - - #ifdef CONFIG_SMP -- regs[rd] = (uintptr_t)__mutex_owner(&mtx); -+ regs[rd] = (uintptr_t)mutex_owner(&mtx); - #else - regs[rd] = 0; - #endif -diff --git a/include/linux/mutex.h b/include/linux/mutex.h -index f2861d68ade1..b65927b2f9d3 100644 ---- a/include/linux/mutex.h -+++ b/include/linux/mutex.h -@@ -214,16 +214,10 @@ enum mutex_trylock_recursive_enum { - extern /* __deprecated */ __must_check enum mutex_trylock_recursive_enum - mutex_trylock_recursive(struct mutex *lock); - --#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP) --static inline int mutex_owned(struct mutex *lock) --{ -- return mutex_is_locked(lock) && __mutex_owner(lock) == current; --} --#else --static inline int mutex_owned(struct mutex *lock) --{ -- return mutex_is_locked(lock); --} --#endif -+extern int -+mutex_owned(struct mutex *lock); -+ -+extern struct task_struct * -+mutex_owner(struct mutex *lock); - - #endif /* __LINUX_MUTEX_H */ -diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c -index e784dd89d924..b856d7ffc51c 100644 ---- a/kernel/locking/mutex.c -+++ b/kernel/locking/mutex.c -@@ -97,6 +97,25 @@ mutex_trylock_recursive(struct mutex *lock) - } - EXPORT_SYMBOL(mutex_trylock_recursive); - -+struct task_struct *mutex_owner(struct mutex *lock) -+{ -+ return __mutex_owner (lock); -+} -+EXPORT_SYMBOL(mutex_owner); -+ -+#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP) -+int mutex_owned(struct mutex *lock) -+{ -+ return mutex_is_locked(lock) && __mutex_owner(lock) == current; -+} -+#else -+int mutex_owned(struct mutex *lock) -+{ -+ return mutex_is_locked(lock); -+} -+#endif -+EXPORT_SYMBOL(mutex_owned); -+ - static inline unsigned long __owner_flags(unsigned long owner) - { - return owner & MUTEX_FLAGS; --- -2.27.0 - diff --git a/sys-kernel/debian-sources/files/5.6.7/debian-sources-5.6.7-fix-bluetooth-polling.patch b/sys-kernel/debian-sources/files/5.6.7/debian-sources-5.6.7-fix-bluetooth-polling.patch new file mode 100644 index 000000000000..b7e7ddb275e5 --- /dev/null +++ b/sys-kernel/debian-sources/files/5.6.7/debian-sources-5.6.7-fix-bluetooth-polling.patch @@ -0,0 +1,45 @@ +Update the default BLE connection parameters. + +Commit c49a8682fc5d298d44e8d911f4fa14690ea9485e introduced a bounds +check on connection interval update requests, but the default min/max +values were left at 24-40 (30-50ms) which caused problems for devices +that want to negotiate connection intervals outside of those bounds. + +Setting the default min/max connection interval to the full allowable +range in the bluetooth specification restores the default Linux behavior +of allowing remote devices to negotiate their desired connection +interval, while still permitting the system administrator to later +narrow the range. + +The default supervision timeout must also be modified to accommodate +the max connection interval increase. The new default value meets the +requirements of the bluetooth specification and the conditions in +the hci_check_conn_params function. + +The downside to modifying the default supervision timeout is that +it will take longer (about 10 seconds) to detect a link loss condition. + +Fixes c49a8682fc5d: (validate BLE connection interval updates) + +Signed-off-by: Carey Sonsino <csonsino@xxxxxxxxx> + +--- + +diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c +index 04bc79359a17..895d17ec9291 100644 +--- a/net/bluetooth/hci_core.c ++++ b/net/bluetooth/hci_core.c +@@ -3181,10 +3181,10 @@ struct hci_dev *hci_alloc_dev(void) + hdev->le_adv_max_interval = 0x0800; + hdev->le_scan_interval = 0x0060; + hdev->le_scan_window = 0x0030; +- hdev->le_conn_min_interval = 0x0018; +- hdev->le_conn_max_interval = 0x0028; ++ hdev->le_conn_min_interval = 0x0006; ++ hdev->le_conn_max_interval = 0x0c80; + hdev->le_conn_latency = 0x0000; +- hdev->le_supv_timeout = 0x002a; ++ hdev->le_supv_timeout = 0x03ea; + hdev->le_def_tx_len = 0x001b; + hdev->le_def_tx_time = 0x0148; + hdev->le_max_tx_len = 0x001b; diff --git a/sys-kernel/debian-sources/files/5.6.7/debian-sources-5.6.7-ikconfig.patch b/sys-kernel/debian-sources/files/5.6.7/debian-sources-5.6.7-ikconfig.patch new file mode 100644 index 000000000000..7fd83ab8ddd4 --- /dev/null +++ b/sys-kernel/debian-sources/files/5.6.7/debian-sources-5.6.7-ikconfig.patch @@ -0,0 +1,13 @@ +diff -Nuar debian/config/config debian-fixed/config/config +--- debian/config/config 2018-12-16 17:37:36.000000000 +0000 ++++ debian-fixed/config/config 2018-12-21 19:38:42.417410783 +0000 +@@ -5970,7 +5970,8 @@ + CONFIG_TASK_XACCT=y + CONFIG_TASK_IO_ACCOUNTING=y + CONFIG_CPU_ISOLATION=y +-# CONFIG_IKCONFIG is not set ++CONFIG_IKCONFIG=y ++CONFIG_IKCONFIG_PROC=y + CONFIG_LOG_BUF_SHIFT=17 + CONFIG_LOG_CPU_MAX_BUF_SHIFT=12 + CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=13 diff --git a/sys-kernel/debian-sources/files/5.6.7/debian-sources-5.6.7-mcelog.patch b/sys-kernel/debian-sources/files/5.6.7/debian-sources-5.6.7-mcelog.patch new file mode 100644 index 000000000000..dd219db89ab4 --- /dev/null +++ b/sys-kernel/debian-sources/files/5.6.7/debian-sources-5.6.7-mcelog.patch @@ -0,0 +1,14 @@ +diff -Nuar debian/config/config debian-fixed/config/config +--- debian/config/config 2017-10-28 22:51:59.000000000 +0300 ++++ debian-fixed/config/config 2017-11-13 06:47:04.809885320 +0200 +@@ -14,6 +14,10 @@ + CONFIG_STRICT_KERNEL_RWX=y + + ## ++## file: arch/x86/Kconfig ++## ++CONFIG_X86_MCELOG_LEGACY=y ++ + ## file: block/Kconfig + ## + CONFIG_BLOCK=y diff --git a/sys-kernel/debian-sources/files/5.6.7/debian-sources-5.6.7-nocerts.patch b/sys-kernel/debian-sources/files/5.6.7/debian-sources-5.6.7-nocerts.patch new file mode 100644 index 000000000000..7f5c79b0d802 --- /dev/null +++ b/sys-kernel/debian-sources/files/5.6.7/debian-sources-5.6.7-nocerts.patch @@ -0,0 +1,12 @@ +diff -urN debian/config/config debian.fixed/config/config +--- debian/config/config 2019-10-19 18:35:10.000000000 -0400 ++++ debian.fixed/config/config 2019-10-22 01:13:53.262687881 -0400 +@@ -68,7 +68,7 @@ + #. Actually a file containing X.509 certificates, not keys. + #. Whenever the filename changes, this also needs to be updated in + #. debian/featureset-*/config +-CONFIG_SYSTEM_TRUSTED_KEYS="debian/certs/debian-uefi-certs.pem" ++CONFIG_SYSTEM_TRUSTED_KEYS="" + #. Add secondary keyring with keys from UEFI db and MOK. + CONFIG_SECONDARY_TRUSTED_KEYRING=y + CONFIG_SYSTEM_BLACKLIST_KEYRING=y diff --git a/sys-kernel/debian-sources/files/5.6.7/debian-sources-5.6.7-xfs-libcrc32c-fix.patch b/sys-kernel/debian-sources/files/5.6.7/debian-sources-5.6.7-xfs-libcrc32c-fix.patch new file mode 100644 index 000000000000..da144c5e9b74 --- /dev/null +++ b/sys-kernel/debian-sources/files/5.6.7/debian-sources-5.6.7-xfs-libcrc32c-fix.patch @@ -0,0 +1,27 @@ +diff -Nuar debian/config/config debian-fix/config/config +--- debian/config/config 2014-04-29 01:57:15.000000000 +0000 ++++ debian-fix/config/config 2014-05-16 15:02:38.622819509 +0000 +@@ -1,4 +1,4 @@ +-## ++### + ## file: arch/Kconfig + ## + CONFIG_KPROBES=y +@@ -4710,7 +4710,7 @@ + ## + ## file: fs/xfs/Kconfig + ## +-CONFIG_XFS_FS=m ++CONFIG_XFS_FS=y + CONFIG_XFS_QUOTA=y + CONFIG_XFS_POSIX_ACL=y + CONFIG_XFS_RT=y +@@ -4909,7 +4909,7 @@ + CONFIG_CRC32=y + # CONFIG_CRC32_SELFTEST is not set + CONFIG_CRC7=m +-CONFIG_LIBCRC32C=m ++CONFIG_LIBCRC32C=y + # CONFIG_CRC8 is not set + # CONFIG_RANDOM32_SELFTEST is not set + CONFIG_CORDIC=m diff --git a/sys-kernel/debian-sources/files/5.6.7/export_kernel_fpu_functions_5_3.patch b/sys-kernel/debian-sources/files/5.6.7/export_kernel_fpu_functions_5_3.patch new file mode 100644 index 000000000000..af71d043e612 --- /dev/null +++ b/sys-kernel/debian-sources/files/5.6.7/export_kernel_fpu_functions_5_3.patch @@ -0,0 +1,43 @@ +From 1e010beda2896bdf3082fb37a3e49f8ce20e04d8 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= <joerg@thalheim.io> +Date: Thu, 2 May 2019 05:28:08 +0100 +Subject: [PATCH] x86/fpu: Export kernel_fpu_{begin,end}() with + EXPORT_SYMBOL_GPL +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +We need these symbols in zfs as the fpu implementation breaks userspace: + +https://github.com/zfsonlinux/zfs/issues/9346 +Signed-off-by: Jörg Thalheim <joerg@thalheim.io> +--- + arch/x86/kernel/fpu/core.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c +index 12c70840980e..352538b3bb5d 100644 +--- a/arch/x86/kernel/fpu/core.c ++++ b/arch/x86/kernel/fpu/core.c +@@ -102,7 +102,7 @@ void kernel_fpu_begin(void) + } + __cpu_invalidate_fpregs_state(); + } +-EXPORT_SYMBOL_GPL(kernel_fpu_begin); ++EXPORT_SYMBOL(kernel_fpu_begin); + + void kernel_fpu_end(void) + { +@@ -111,7 +111,7 @@ void kernel_fpu_end(void) + this_cpu_write(in_kernel_fpu, false); + preempt_enable(); + } +-EXPORT_SYMBOL_GPL(kernel_fpu_end); ++EXPORT_SYMBOL(kernel_fpu_end); + + /* + * Save the FPU state (mark it for reload if necessary): +-- +2.23.0 + + diff --git a/sys-kernel/debian-sources/files/config-extract b/sys-kernel/debian-sources/files/config-extract index ddbafee3ecce..4860a6459873 100755 --- a/sys-kernel/debian-sources/files/config-extract +++ b/sys-kernel/debian-sources/files/config-extract @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python2 import os,sys,re import getopt @@ -10,7 +10,7 @@ re_item = re.compile("[A-Z_]*='[^']*'") try: f=open("debian/rules.gen","r") except: - print("Unable to open debian/rules.gen; can't continue.") + print "Unable to open debian/rules.gen; can't continue." sys.exit(1) lines=f.readlines() f.close() @@ -27,7 +27,7 @@ while line < len(lines): if not head_match: line += 1 continue - config_name = head_match.group(1) + config_name = head_match.group(1) line += 1 if not re_flav.findall(lines[line]): continue @@ -57,7 +57,7 @@ for config in configlist: archdict[cs[0]][cs[1]] = [] archdict[cs[0]][cs[1]].append(cs[2]) -arches = list(archdict.keys()) +arches = archdict.keys() arches.sort() features = [ None ] @@ -68,7 +68,7 @@ for arch in arches: PROG="config-extract" def usage(): - print("""This work is free software. + print """This work is free software. Copyright 2011 Funtoo Technologies. You can redistribute and/or modify it under the terms of the GNU General Public License version 3 as published by the Free @@ -77,7 +77,7 @@ license that has been publicly approved for use with this program by Funtoo Technologies (or its successors, if any.) usage: %s [options] arch [featureset] [subarch] - + -h --help print this usage and exit -l --list list all available kernel configurations -o --outfile specify kernel config outfile -- @@ -101,13 +101,13 @@ pick one (and the program will remind you to do this.) The kernel configuration will be written to ".config" in the current directory, or the location you specified using the -o/--outfile option. -""" % PROG) +""" % PROG sys.exit(2) try: opts, args = getopt.getopt(sys.argv[1:], "o:hl", ["help", "list","outfile="]) -except getopt.GetoptError as err: - print(str(err)) +except getopt.GetoptError, err: + print str(err) usage() mode="run" @@ -124,10 +124,10 @@ for o,a in opts: if mode == "run": if len(args) < 1 or len(args) > 3: if len(args) == 0: - print("Please specify an arch - one of: "+", ".join(arches)) + print "Please specify an arch - one of: "+", ".join(arches) sys.exit(2) else: - print("Too many arguments.") + print "Too many arguments." usage() arch = args[0] if outfile == None: @@ -145,30 +145,30 @@ if mode == "run": # print out optimized list of available kernel configurations: if mode=="list": - print() + print for flav in features: label = flav if label == None: label = "standard" - print("====== %s featureset ======" % label) - print() + print "====== %s featureset ======" % label + print for arch in arches: if flav in archdict[arch]: if len(archdict[arch][flav]) == 1: - print(arch.rjust(12)) + print arch.rjust(12) else: flavlist = archdict[arch][flav] flavlist.sort() - variants = ", ".join(flavlist) - print(arch.rjust(12) + ":", variants) - print() + variants = ", ".join(flavlist) + print arch.rjust(12) + ":", variants + print sys.exit(0) # featureset defaults to None. if featureset not in archdict[arch]: - print("Error: There is no '%s' featureset kernel config for arch '%s'. Exiting." % ( featureset, arch )) - print(archdict[arch]) + print "Error: There is no '%s' featureset kernel config for arch '%s'. Exiting." % ( featureset, arch ) + print archdict[arch] sys.exit(2) # If a subarch is not specified (None), then we will auto-pick the subarch if only one is available. @@ -178,13 +178,13 @@ if subarch == None: if len(archdict[arch][featureset]) == 1: subarch = archdict[arch][featureset][0] else: - print("Error: there is more than one 'sub-architecture' for this arch.") - print("Please specify [arch] [featureset] [subarch], with one of these subarches:") - print(", ".join(archdict[arch][featureset])) + print "Error: there is more than one 'sub-architecture' for this arch." + print "Please specify [arch] [featureset] [subarch], with one of these subarches:" + print ", ".join(archdict[arch][featureset]) sys.exit(2) else: if subarch not in archdict[arch][featureset]: - print("Error: specified sub-architecture '%s' is not available for this arch. Exiting." % subarch) + print "Error: specified sub-architecture '%s' is not available for this arch. Exiting." % subarch sys.exit(2) # We've done all our arg processing, now let's construct the master_key that we will use to look up the @@ -200,20 +200,20 @@ if subarch == None: else: master_key += "_%s" % subarch if master_key not in configdict: - print("Master key lookup failed; can't continue. Please report this bug.") + print "Master key lookup failed; can't continue. Please report this bug." sys.exit(1) if "KCONFIG" not in configdict[master_key]: - print("Unable to find KCONFIG option; can't continue. Please report this bug.") + print "Unable to find KCONFIG option; can't continue. Please report this bug." sys.exit(1) -cmd = "python debian/bin/kconfig.py '%s' %s" % ( outfile, configdict[master_key]["KCONFIG"] ) +cmd = "python2 debian/bin/kconfig.py '%s' %s" % ( outfile, configdict[master_key]["KCONFIG"] ) if "KCONFIG_OPTIONS" in configdict[master_key]: cmd += " %s" % configdict[master_key]["KCONFIG_OPTIONS"] os.environ["PYTHONPATH"] = "debian/lib/python" retval = os.system(cmd) if retval == 0: - print("Wrote %s kernel configuration to %s." % ( master_key, outfile )) + print "Wrote %s kernel configuration to %s." % ( master_key, outfile ) sys.exit(0) else: - print("There was an error extracting the Debian kernel config.") + print "There was an error extracting the Debian kernel config." sys.exit(1) diff --git a/sys-kernel/debian-sources/metadata.xml b/sys-kernel/debian-sources/metadata.xml index 1bff4ad25c0e..2457d2f16d57 100644 --- a/sys-kernel/debian-sources/metadata.xml +++ b/sys-kernel/debian-sources/metadata.xml @@ -7,5 +7,5 @@ <flag name='binary'>Builds and installs kernel automatically</flag> <flag name='rt'>Applies the CONFIG_PREEMPT_RT patch series</flag> </use> - <origin>xor-overlay</origin> + <origin>bradlyatc-overlay</origin> </pkgmetadata> |
