i3 my balls

2025-11-30 20:44:36 -05:00
parent cc136299b7
commit c5a9256f19
2010 changed files with 194 additions and 923748 deletions
--- a/i3/.config/i3/config
+++ b/i3/.config/i3/config
@@ -0,0 +1,126 @@
 set $mod Mod4
 font pango:monospace 8
 exec --no-startup-id dex-autostart --autostart --environment i3
 exec --no-startup-id xss-lock --transfer-sleep-lock -- i3lock --nofork
 exec --no-startup-id nm-applet
 # Use pactl to adjust volume in PulseAudio.
 set $refresh_i3status killall -SIGUSR1 i3status
 bindsym XF86AudioRaiseVolume exec --no-startup-id pactl set-sink-volume @DEFAULT_SINK@ +10% && $refresh_i3status
 bindsym XF86AudioLowerVolume exec --no-startup-id pactl set-sink-volume @DEFAULT_SINK@ -10% && $refresh_i3status
 bindsym XF86AudioMute exec --no-startup-id pactl set-sink-mute @DEFAULT_SINK@ toggle && $refresh_i3status
 bindsym XF86AudioMicMute exec --no-startup-id pactl set-source-mute @DEFAULT_SOURCE@ toggle && $refresh_i3status
 # Use Mouse+$mod to drag floating windows to their wanted position
 floating_modifier $mod
 # move tiling windows via drag & drop by left-clicking into the title bar,
 # or left-clicking anywhere into the window while holding the floating modifier.
 tiling_drag modifier titlebar
 # start a terminal
 bindsym $mod+Return exec wezterm
 # kill focused window
 bindsym $mod+w kill
 # start dmenu (a program launcher)
 bindsym $mod+space exec --no-startup-id dmenu_run
 # change focus
 bindsym $mod+h focus left
 bindsym $mod+j focus down
 bindsym $mod+k focus up
 bindsym $mod+l focus right
 # move focused window
 bindsym $mod+Shift+h move left
 bindsym $mod+Shift+j move down
 bindsym $mod+Shift+k move up
 bindsym $mod+Shift+l move right
 # enter fullscreen mode for the focused container
 bindsym $mod+f fullscreen toggle
 bindsym $mod+v floating toggle
 # Define names for default workspaces for which we configure key bindings later on.
 # We use variables to avoid repeating the names in multiple places.
 set $ws1 "1"
 set $ws2 "2"
 set $ws3 "3"
 set $ws4 "4"
 set $ws5 "5"
 set $ws6 "6"
 set $ws7 "7"
 set $ws8 "8"
 set $ws9 "9"
 set $ws10 "10"
 # switch to workspace
 bindsym $mod+1 workspace number $ws1
 bindsym $mod+2 workspace number $ws2
 bindsym $mod+3 workspace number $ws3
 bindsym $mod+4 workspace number $ws4
 bindsym $mod+5 workspace number $ws5
 bindsym $mod+6 workspace number $ws6
 bindsym $mod+7 workspace number $ws7
 bindsym $mod+8 workspace number $ws8
 bindsym $mod+9 workspace number $ws9
 bindsym $mod+0 workspace number $ws10
 # move focused container to workspace
 bindsym $mod+Shift+1 move container to workspace number $ws1
 bindsym $mod+Shift+2 move container to workspace number $ws2
 bindsym $mod+Shift+3 move container to workspace number $ws3
 bindsym $mod+Shift+4 move container to workspace number $ws4
 bindsym $mod+Shift+5 move container to workspace number $ws5
 bindsym $mod+Shift+6 move container to workspace number $ws6
 bindsym $mod+Shift+7 move container to workspace number $ws7
 bindsym $mod+Shift+8 move container to workspace number $ws8
 bindsym $mod+Shift+9 move container to workspace number $ws9
 bindsym $mod+Shift+0 move container to workspace number $ws10
 # restart i3 inplace (preserves your layout/session, can be used to upgrade i3)
 bindsym $mod+Shift+r restart
 # exit i3 (logs you out of your X session)
 bindsym $mod+Shift+e exec "i3-nagbar -t warning -m 'You pressed the exit shortcut. Do you really want to exit i3? This will end your X session.' -B 'Yes, exit i3' 'i3-msg exit'"
 # resize window (you can also use the mouse for that)
 mode "resize" {
        # These bindings trigger as soon as you enter the resize mode
        # Pressing left will shrink the window’s width.
        # Pressing right will grow the window’s width.
        # Pressing up will shrink the window’s height.
        # Pressing down will grow the window’s height.
        bindsym h resize shrink width 10 px or 10 ppt
        bindsym j resize grow height 10 px or 10 ppt
        bindsym k resize shrink height 10 px or 10 ppt
        bindsym l resize grow width 10 px or 10 ppt
        # back to normal: Enter or Escape or $mod+r
        bindsym Return mode "default"
        bindsym Escape mode "default"
        bindsym $mod+r mode "default"
 }
 bindsym $mod+r mode "resize"
 # Start i3bar to display a workspace bar (plus the system information i3status
 # finds out, if available)
 bar {
        status_command i3status
 }
 # external programs
 bindsym $mod+d exec flatpak run dev.vencord.Vesktop
 bindsym $mod+b exec firefox
 bindsym $mod+s exec flatpak run org.vinegarhq.Sober
 bindsym $mod+g exec steam
 bindsym $mod+m exec flatpak run org.prismlauncher.PrismLauncher
 bindsym $mod+Shift+f exec nautilus
 bindsym --release $mod+Shift+s exec sh -c "scrot -s /tmp/screenshot.png && xclip -selection clipboard -t image/png -i /tmp/screenshot.png && cp /tmp/screenshot.png ~/latest.png"
--- a/i3/.config/i3status/config
+++ b/i3/.config/i3status/config
@@ -0,0 +1,61 @@
 # i3status configuration file.
 # see "man i3status" for documentation.
 # It is important that this file is edited as UTF-8.
 # The following line should contain a sharp s:
 # ß
 # If the above line is not correctly displayed, fix your editor first!
 general {
        colors = true
        interval = 5
 }
 order += "ipv6"
 order += "cpu_temperature 0"
 order += "disk /"
 order += "wireless _first_"
 order += "ethernet _first_"
 # order += "battery all"
 order += "load"
 order += "tztime local"
 cpu_temperature 0 {
        format = "Tea: %degrees °C"
        path = "/sys/class/hwmon/hwmon1/temp1_input"
        max_threshold = 80000
 }
 wireless _first_ {
        # format_up = "W: (%quality at %essid) %ip"
        format_up = "W: (%quality) Leaked IP: %ip"
        format_down = "W: down"
 }
 ethernet _first_ {
        # if you use %speed, i3status requires root privileges
        # format_up = "E: %ip (%speed)"
        format_up = "E: Leaked IP: %ip (%speed)"
        format_down = "E: down"
 }
 battery all {
        format = "Fairy Dust: %percentage %status %remaining"
 }
 tztime local {
        format = "%d %H:%M:%S"
 }
 load {
        format = "Hot Loads: %1min"
 }
 disk "/" {
        format = "Penger Folder: %avail"
 }
 ipv6 {
        format_up = "Useless Protocol: %ipv6"
        format_down = "Useless Protocol: Down"
 }
--- a/tmux/.tmux.conf
+++ b/tmux/.tmux.conf
@@ -1,16 +1,6 @@
-set -g default-terminal "tmux-256color"
+unbind C-b
 set -ag terminal-overrides ",xterm-256color:RGB"
 set -g prefix C-s
-set -g base-index 1
+bind C-s send-prefix
 set -g renumber-windows on   
 set -g mode-keys vi
-set -g status-position top
+set -g status-keys vi
-set -g status-justify absolute-centre
+set -s escape-time 0
 set -g status-style "bg=default"
 set -g window-status-current-style "fg=blue bold"
 set -g status-right ""
 set -g status-left "#S"
 bind r source-file "~/.config/tmux/tmux.conf"
 bind b set -g status
 bind G neww -n "lazygit" lazygit
--- a/wezterm/.config/wezterm/wezterm.lua
+++ b/wezterm/.config/wezterm/wezterm.lua
@@ -23,7 +23,7 @@ config.window_padding = {
 config.scrollback_lines = 1000
-config.font = wezterm.font("ComicShannsMono Nerd Font")
+-- config.font = wezterm.font("ComicShannsMono")
 config.font_size = 20.0
 config.front_end = "WebGpu"
--- a/zsh/.local/share/nvim/lazy/cmp-nvim-lsp
+++ b/zsh/.local/share/nvim/lazy/cmp-nvim-lsp
--- a/zsh/.local/share/nvim/lazy/cmp-path
+++ b/zsh/.local/share/nvim/lazy/cmp-path
--- a/zsh/.local/share/nvim/lazy/gruber-darker.nvim
+++ b/zsh/.local/share/nvim/lazy/gruber-darker.nvim
--- a/zsh/.local/share/nvim/lazy/lazy.nvim
+++ b/zsh/.local/share/nvim/lazy/lazy.nvim
--- a/zsh/.local/share/nvim/lazy/mason-lspconfig.nvim
+++ b/zsh/.local/share/nvim/lazy/mason-lspconfig.nvim
--- a/zsh/.local/share/nvim/lazy/mason.nvim
+++ b/zsh/.local/share/nvim/lazy/mason.nvim
--- a/zsh/.local/share/nvim/lazy/nvim-cmp
+++ b/zsh/.local/share/nvim/lazy/nvim-cmp
--- a/zsh/.local/share/nvim/lazy/nvim-lspconfig
+++ b/zsh/.local/share/nvim/lazy/nvim-lspconfig
--- a/zsh/.local/share/nvim/lazy/plenary.nvim
+++ b/zsh/.local/share/nvim/lazy/plenary.nvim
--- a/zsh/.local/share/nvim/lazy/snacks.nvim
+++ b/zsh/.local/share/nvim/lazy/snacks.nvim
--- a/zsh/.local/share/nvim/lazy/telescope.nvim
+++ b/zsh/.local/share/nvim/lazy/telescope.nvim
--- a/zsh/.local/share/nvim/lazy/vim-fugitive
+++ b/zsh/.local/share/nvim/lazy/vim-fugitive
--- a/zsh/.local/share/nvim/mason/bin/clangd
+++ b/zsh/.local/share/nvim/mason/bin/clangd
@@ -1 +0,0 @@
 ../packages/clangd/clangd_21.1.0/bin/clangd
--- a/zsh/.local/share/nvim/mason/bin/gopls
+++ b/zsh/.local/share/nvim/mason/bin/gopls
@@ -1 +0,0 @@
 ../packages/gopls/gopls
--- a/zsh/.local/share/nvim/mason/bin/lua-language-server
+++ b/zsh/.local/share/nvim/mason/bin/lua-language-server
@@ -1 +0,0 @@
 ../packages/lua-language-server/lua-language-server
--- a/zsh/.local/share/nvim/mason/bin/ols
+++ b/zsh/.local/share/nvim/mason/bin/ols
@@ -1 +0,0 @@
 ../packages/ols/ols-x86_64-unknown-linux-gnu
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/LICENSE.TXT
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/LICENSE.TXT
@@ -1,279 +0,0 @@
 ==============================================================================
 The LLVM Project is under the Apache License v2.0 with LLVM Exceptions:
 ==============================================================================
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/
    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
    1. Definitions.
      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.
      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.
      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.
      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.
      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.
      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.
      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).
      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.
      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."
      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.
    2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.
    3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.
    4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:
      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and
      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and
      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and
      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.
      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.
    5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.
    6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.
    7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.
    8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.
    9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.
    END OF TERMS AND CONDITIONS
    APPENDIX: How to apply the Apache License to your work.
      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.
    Copyright [yyyy] [name of copyright owner]
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
    You may obtain a copy of the License at
       http://www.apache.org/licenses/LICENSE-2.0
    Unless required by applicable law or agreed to in writing, software
    distributed under the License is distributed on an "AS IS" BASIS,
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
 ---- LLVM Exceptions to the Apache 2.0 License ----
 As an exception, if, as a result of your compiling your source code, portions
 of this Software are embedded into an Object form of such source code, you
 may redistribute such embedded portions in such Object form without complying
 with the conditions of Sections 4(a), 4(b) and 4(d) of the License.
 In addition, if you combine or link compiled forms of this Software with
 software that is licensed under the GPLv2 ("Combined Software") and if a
 court of competent jurisdiction determines that the patent provision (Section
 3), the indemnity provision (Section 9) or other Section of the License
 conflicts with the conditions of the GPLv2, you may retroactively and
 prospectively choose to deem waived or otherwise exclude such Section(s) of
 the License, but only in their entirety and only with respect to the Combined
 Software.
 ==============================================================================
 Software from third parties included in the LLVM Project:
 ==============================================================================
 The LLVM Project contains third party software which is under different license
 terms. All such code will be identified clearly using at least one of two
 mechanisms:
 1) It will be in a separate directory tree with its own `LICENSE.txt` or
   `LICENSE` file at the top containing the specific license and restrictions
   which apply to that software, or
 2) It will contain specific license and restriction terms at the top of every
   file.
 ==============================================================================
 Legacy LLVM License (https://llvm.org/docs/DeveloperPolicy.html#legacy):
 ==============================================================================
 University of Illinois/NCSA
 Open Source License
 Copyright (c) 2003-2019 University of Illinois at Urbana-Champaign.
 All rights reserved.
 Developed by:
    LLVM Team
    University of Illinois at Urbana-Champaign
    http://llvm.org
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal with
 the Software without restriction, including without limitation the rights to
 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 of the Software, and to permit persons to whom the Software is furnished to do
 so, subject to the following conditions:
    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimers.
    * Redistributions in binary form must reproduce the above copyright notice,
      this list of conditions and the following disclaimers in the
      documentation and/or other materials provided with the distribution.
    * Neither the names of the LLVM Team, University of Illinois at
      Urbana-Champaign, nor the names of its contributors may be used to
      endorse or promote products derived from this Software without specific
      prior written permission.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
 FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
 CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
 SOFTWARE.
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/bin/clangd
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/bin/clangd
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__clang_cuda_builtin_vars.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__clang_cuda_builtin_vars.h
@@ -1,121 +0,0 @@
 /*===---- cuda_builtin_vars.h - CUDA built-in variables ---------------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef __CUDA_BUILTIN_VARS_H
 #define __CUDA_BUILTIN_VARS_H
 // Forward declares from vector_types.h.
 struct uint3;
 struct dim3;
 // The file implements built-in CUDA variables using __declspec(property).
 // https://msdn.microsoft.com/en-us/library/yhfk0thd.aspx
 // All read accesses of built-in variable fields get converted into calls to a
 // getter function which in turn calls the appropriate builtin to fetch the
 // value.
 //
 // Example:
 //    int x = threadIdx.x;
 // IR output:
 //  %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #3
 // PTX output:
 //  mov.u32     %r2, %tid.x;
 #define __CUDA_DEVICE_BUILTIN(FIELD, INTRINSIC)                                \
  __declspec(property(get = __fetch_builtin_##FIELD)) unsigned int FIELD;      \
  static inline __attribute__((always_inline))                                 \
      __attribute__((device)) unsigned int __fetch_builtin_##FIELD(void) {     \
    return INTRINSIC;                                                          \
  }
 #if __cplusplus >= 201103L
 #define __DELETE =delete
 #else
 #define __DELETE
 #endif
 // Make sure nobody can create instances of the special variable types.  nvcc
 // also disallows taking address of special variables, so we disable address-of
 // operator as well.
 #define __CUDA_DISALLOW_BUILTINVAR_ACCESS(TypeName)                            \
  __attribute__((device)) TypeName() __DELETE;                                 \
  __attribute__((device)) TypeName(const TypeName &) __DELETE;                 \
  __attribute__((device)) void operator=(const TypeName &) const __DELETE;     \
  __attribute__((device)) TypeName *operator&() const __DELETE
 struct __cuda_builtin_threadIdx_t {
  __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_tid_x());
  __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_tid_y());
  __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_tid_z());
  // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a
  // uint3).  This function is defined after we pull in vector_types.h.
  __attribute__((device)) operator dim3() const;
  __attribute__((device)) operator uint3() const;
 private:
  __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t);
 };
 struct __cuda_builtin_blockIdx_t {
  __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ctaid_x());
  __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ctaid_y());
  __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ctaid_z());
  // blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a
  // uint3).  This function is defined after we pull in vector_types.h.
  __attribute__((device)) operator dim3() const;
  __attribute__((device)) operator uint3() const;
 private:
  __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t);
 };
 struct __cuda_builtin_blockDim_t {
  __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ntid_x());
  __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ntid_y());
  __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ntid_z());
  // blockDim should be convertible to dim3 (in fact in nvcc, it *is* a
  // dim3).  This function is defined after we pull in vector_types.h.
  __attribute__((device)) operator dim3() const;
  __attribute__((device)) operator uint3() const;
 private:
  __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t);
 };
 struct __cuda_builtin_gridDim_t {
  __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_nctaid_x());
  __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_nctaid_y());
  __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_nctaid_z());
  // gridDim should be convertible to dim3 (in fact in nvcc, it *is* a
  // dim3).  This function is defined after we pull in vector_types.h.
  __attribute__((device)) operator dim3() const;
  __attribute__((device)) operator uint3() const;
 private:
  __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t);
 };
 #define __CUDA_BUILTIN_VAR                                                     \
  extern const __attribute__((device)) __attribute__((weak))
 __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;
 __CUDA_BUILTIN_VAR __cuda_builtin_blockIdx_t blockIdx;
 __CUDA_BUILTIN_VAR __cuda_builtin_blockDim_t blockDim;
 __CUDA_BUILTIN_VAR __cuda_builtin_gridDim_t gridDim;
 // warpSize should translate to read of %WARP_SZ but there's currently no
 // builtin to do so. According to PTX v4.2 docs 'to date, all target
 // architectures have a WARP_SZ value of 32'.
 __attribute__((device)) const int warpSize = 32;
 #undef __CUDA_DEVICE_BUILTIN
 #undef __CUDA_BUILTIN_VAR
 #undef __CUDA_DISALLOW_BUILTINVAR_ACCESS
 #undef __DELETE
 #endif /* __CUDA_BUILTIN_VARS_H */
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__clang_cuda_cmath.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__clang_cuda_cmath.h
@@ -1,512 +0,0 @@
 /*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef __CLANG_CUDA_CMATH_H__
 #define __CLANG_CUDA_CMATH_H__
 #ifndef __CUDA__
 #error "This file is for CUDA compilation only."
 #endif
 #ifndef __OPENMP_NVPTX__
 #include <limits>
 #endif
 // CUDA lets us use various std math functions on the device side.  This file
 // works in concert with __clang_cuda_math_forward_declares.h to make this work.
 //
 // Specifically, the forward-declares header declares __device__ overloads for
 // these functions in the global namespace, then pulls them into namespace std
 // with 'using' statements.  Then this file implements those functions, after
 // their implementations have been pulled in.
 //
 // It's important that we declare the functions in the global namespace and pull
 // them into namespace std with using statements, as opposed to simply declaring
 // these functions in namespace std, because our device functions need to
 // overload the standard library functions, which may be declared in the global
 // namespace or in std, depending on the degree of conformance of the stdlib
 // implementation.  Declaring in the global namespace and pulling into namespace
 // std covers all of the known knowns.
 #ifdef __OPENMP_NVPTX__
 #define __DEVICE__ static constexpr __attribute__((always_inline, nothrow))
 #else
 #define __DEVICE__ static __device__ __inline__ __attribute__((always_inline))
 #endif
 __DEVICE__ long long abs(long long __n) { return ::llabs(__n); }
 __DEVICE__ long abs(long __n) { return ::labs(__n); }
 __DEVICE__ float abs(float __x) { return ::fabsf(__x); }
 __DEVICE__ double abs(double __x) { return ::fabs(__x); }
 __DEVICE__ float acos(float __x) { return ::acosf(__x); }
 __DEVICE__ float asin(float __x) { return ::asinf(__x); }
 __DEVICE__ float atan(float __x) { return ::atanf(__x); }
 __DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); }
 __DEVICE__ float ceil(float __x) { return ::ceilf(__x); }
 __DEVICE__ float cos(float __x) { return ::cosf(__x); }
 __DEVICE__ float cosh(float __x) { return ::coshf(__x); }
 __DEVICE__ float exp(float __x) { return ::expf(__x); }
 __DEVICE__ float fabs(float __x) { return ::fabsf(__x); }
 __DEVICE__ float floor(float __x) { return ::floorf(__x); }
 __DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); }
 __DEVICE__ int fpclassify(float __x) {
  return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
                              FP_ZERO, __x);
 }
 __DEVICE__ int fpclassify(double __x) {
  return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
                              FP_ZERO, __x);
 }
 __DEVICE__ float frexp(float __arg, int *__exp) {
  return ::frexpf(__arg, __exp);
 }
 // For inscrutable reasons, the CUDA headers define these functions for us on
 // Windows.
 #if !defined(_MSC_VER) || defined(__OPENMP_NVPTX__)
 // For OpenMP we work around some old system headers that have non-conforming
 // `isinf(float)` and `isnan(float)` implementations that return an `int`. We do
 // this by providing two versions of these functions, differing only in the
 // return type. To avoid conflicting definitions we disable implicit base
 // function generation. That means we will end up with two specializations, one
 // per type, but only one has a base function defined by the system header.
 #if defined(__OPENMP_NVPTX__)
 #pragma omp begin declare variant match(                                       \
    implementation = {extension(disable_implicit_base)})
 // FIXME: We lack an extension to customize the mangling of the variants, e.g.,
 //        add a suffix. This means we would clash with the names of the variants
 //        (note that we do not create implicit base functions here). To avoid
 //        this clash we add a new trait to some of them that is always true
 //        (this is LLVM after all ;)). It will only influence the mangled name
 //        of the variants inside the inner region and avoid the clash.
 #pragma omp begin declare variant match(implementation = {vendor(llvm)})
 __DEVICE__ int isinf(float __x) { return ::__isinff(__x); }
 __DEVICE__ int isinf(double __x) { return ::__isinf(__x); }
 __DEVICE__ int isfinite(float __x) { return ::__finitef(__x); }
 __DEVICE__ int isfinite(double __x) { return ::__isfinited(__x); }
 __DEVICE__ int isnan(float __x) { return ::__isnanf(__x); }
 __DEVICE__ int isnan(double __x) { return ::__isnan(__x); }
 #pragma omp end declare variant
 #endif
 __DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
 __DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
 __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
 // For inscrutable reasons, __finite(), the double-precision version of
 // __finitef, does not exist when compiling for MacOS.  __isfinited is available
 // everywhere and is just as good.
 __DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); }
 __DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
 __DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
 #if defined(__OPENMP_NVPTX__)
 #pragma omp end declare variant
 #endif
 #endif
 __DEVICE__ bool isgreater(float __x, float __y) {
  return __builtin_isgreater(__x, __y);
 }
 __DEVICE__ bool isgreater(double __x, double __y) {
  return __builtin_isgreater(__x, __y);
 }
 __DEVICE__ bool isgreaterequal(float __x, float __y) {
  return __builtin_isgreaterequal(__x, __y);
 }
 __DEVICE__ bool isgreaterequal(double __x, double __y) {
  return __builtin_isgreaterequal(__x, __y);
 }
 __DEVICE__ bool isless(float __x, float __y) {
  return __builtin_isless(__x, __y);
 }
 __DEVICE__ bool isless(double __x, double __y) {
  return __builtin_isless(__x, __y);
 }
 __DEVICE__ bool islessequal(float __x, float __y) {
  return __builtin_islessequal(__x, __y);
 }
 __DEVICE__ bool islessequal(double __x, double __y) {
  return __builtin_islessequal(__x, __y);
 }
 __DEVICE__ bool islessgreater(float __x, float __y) {
  return __builtin_islessgreater(__x, __y);
 }
 __DEVICE__ bool islessgreater(double __x, double __y) {
  return __builtin_islessgreater(__x, __y);
 }
 __DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); }
 __DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); }
 __DEVICE__ bool isunordered(float __x, float __y) {
  return __builtin_isunordered(__x, __y);
 }
 __DEVICE__ bool isunordered(double __x, double __y) {
  return __builtin_isunordered(__x, __y);
 }
 __DEVICE__ float ldexp(float __arg, int __exp) {
  return ::ldexpf(__arg, __exp);
 }
 __DEVICE__ float log(float __x) { return ::logf(__x); }
 __DEVICE__ float log10(float __x) { return ::log10f(__x); }
 __DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }
 __DEVICE__ float pow(float __base, float __exp) {
  return ::powf(__base, __exp);
 }
 __DEVICE__ float pow(float __base, int __iexp) {
  return ::powif(__base, __iexp);
 }
 __DEVICE__ double pow(double __base, int __iexp) {
  return ::powi(__base, __iexp);
 }
 __DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); }
 __DEVICE__ bool signbit(double __x) { return ::__signbitd(__x); }
 __DEVICE__ float sin(float __x) { return ::sinf(__x); }
 __DEVICE__ float sinh(float __x) { return ::sinhf(__x); }
 __DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); }
 __DEVICE__ float tan(float __x) { return ::tanf(__x); }
 __DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
 // There was a redefinition error for this this overload in CUDA mode.
 // We restrict it to OpenMP mode for now, that is where it is actually needed
 // anyway.
 #ifdef __OPENMP_NVPTX__
 __DEVICE__ float remquo(float __n, float __d, int *__q) {
  return ::remquof(__n, __d, __q);
 }
 #endif
 // Notably missing above is nexttoward.  We omit it because
 // libdevice doesn't provide an implementation, and we don't want to be in the
 // business of implementing tricky libm functions in this header.
 #ifndef __OPENMP_NVPTX__
 // Now we've defined everything we promised we'd define in
 // __clang_cuda_math_forward_declares.h.  We need to do two additional things to
 // fix up our math functions.
 //
 // 1) Define __device__ overloads for e.g. sin(int).  The CUDA headers define
 //    only sin(float) and sin(double), which means that e.g. sin(0) is
 //    ambiguous.
 //
 // 2) Pull the __device__ overloads of "foobarf" math functions into namespace
 //    std.  These are defined in the CUDA headers in the global namespace,
 //    independent of everything else we've done here.
 // We can't use std::enable_if, because we want to be pre-C++11 compatible.  But
 // we go ahead and unconditionally define functions that are only available when
 // compiling for C++11 to match the behavior of the CUDA headers.
 template<bool __B, class __T = void>
 struct __clang_cuda_enable_if {};
 template <class __T> struct __clang_cuda_enable_if<true, __T> {
  typedef __T type;
 };
 // Defines an overload of __fn that accepts one integral argument, calls
 // __fn((double)x), and returns __retty.
 #define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn)                      \
  template <typename __T>                                                      \
  __DEVICE__                                                                   \
      typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,    \
                                      __retty>::type                           \
      __fn(__T __x) {                                                          \
    return ::__fn((double)__x);                                                \
  }
 // Defines an overload of __fn that accepts one two arithmetic arguments, calls
 // __fn((double)x, (double)y), and returns a double.
 //
 // Note this is different from OVERLOAD_1, which generates an overload that
 // accepts only *integral* arguments.
 #define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn)                      \
  template <typename __T1, typename __T2>                                      \
  __DEVICE__ typename __clang_cuda_enable_if<                                  \
      std::numeric_limits<__T1>::is_specialized &&                             \
          std::numeric_limits<__T2>::is_specialized,                           \
      __retty>::type                                                           \
  __fn(__T1 __x, __T2 __y) {                                                   \
    return __fn((double)__x, (double)__y);                                     \
  }
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2);
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign);
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim);
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax);
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin);
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod);
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot);
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater);
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal);
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf);
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless);
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal);
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater);
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan);
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered);
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint);
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter);
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow);
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder);
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint);
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round);
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma)
 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc);
 #undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1
 #undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2
 // Overloads for functions that don't match the patterns expected by
 // __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}.
 template <typename __T1, typename __T2, typename __T3>
 __DEVICE__ typename __clang_cuda_enable_if<
    std::numeric_limits<__T1>::is_specialized &&
        std::numeric_limits<__T2>::is_specialized &&
        std::numeric_limits<__T3>::is_specialized,
    double>::type
 fma(__T1 __x, __T2 __y, __T3 __z) {
  return std::fma((double)__x, (double)__y, (double)__z);
 }
 template <typename __T>
 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
                                           double>::type
 frexp(__T __x, int *__exp) {
  return std::frexp((double)__x, __exp);
 }
 template <typename __T>
 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
                                           double>::type
 ldexp(__T __x, int __exp) {
  return std::ldexp((double)__x, __exp);
 }
 template <typename __T1, typename __T2>
 __DEVICE__ typename __clang_cuda_enable_if<
    std::numeric_limits<__T1>::is_specialized &&
        std::numeric_limits<__T2>::is_specialized,
    double>::type
 remquo(__T1 __x, __T2 __y, int *__quo) {
  return std::remquo((double)__x, (double)__y, __quo);
 }
 template <typename __T>
 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
                                           double>::type
 scalbln(__T __x, long __exp) {
  return std::scalbln((double)__x, __exp);
 }
 template <typename __T>
 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
                                           double>::type
 scalbn(__T __x, int __exp) {
  return std::scalbn((double)__x, __exp);
 }
 // We need to define these overloads in exactly the namespace our standard
 // library uses (including the right inline namespace), otherwise they won't be
 // picked up by other functions in the standard library (e.g. functions in
 // <complex>).  Thus the ugliness below.
 #ifdef _LIBCPP_BEGIN_NAMESPACE_STD
 _LIBCPP_BEGIN_NAMESPACE_STD
 #else
 namespace std {
 #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #endif
 #endif
 // Pull the new overloads we defined above into namespace std.
 using ::acos;
 using ::acosh;
 using ::asin;
 using ::asinh;
 using ::atan;
 using ::atan2;
 using ::atanh;
 using ::cbrt;
 using ::ceil;
 using ::copysign;
 using ::cos;
 using ::cosh;
 using ::erf;
 using ::erfc;
 using ::exp;
 using ::exp2;
 using ::expm1;
 using ::fabs;
 using ::fdim;
 using ::floor;
 using ::fma;
 using ::fmax;
 using ::fmin;
 using ::fmod;
 using ::fpclassify;
 using ::frexp;
 using ::hypot;
 using ::ilogb;
 using ::isfinite;
 using ::isgreater;
 using ::isgreaterequal;
 using ::isless;
 using ::islessequal;
 using ::islessgreater;
 using ::isnormal;
 using ::isunordered;
 using ::ldexp;
 using ::lgamma;
 using ::llrint;
 using ::llround;
 using ::log;
 using ::log10;
 using ::log1p;
 using ::log2;
 using ::logb;
 using ::lrint;
 using ::lround;
 using ::nearbyint;
 using ::nextafter;
 using ::pow;
 using ::remainder;
 using ::remquo;
 using ::rint;
 using ::round;
 using ::scalbln;
 using ::scalbn;
 using ::signbit;
 using ::sin;
 using ::sinh;
 using ::sqrt;
 using ::tan;
 using ::tanh;
 using ::tgamma;
 using ::trunc;
 // Well this is fun: We need to pull these symbols in for libc++, but we can't
 // pull them in with libstdc++, because its ::isinf and ::isnan are different
 // than its std::isinf and std::isnan.
 #ifndef __GLIBCXX__
 using ::isinf;
 using ::isnan;
 #endif
 // Finally, pull the "foobarf" functions that CUDA defines in its headers into
 // namespace std.
 using ::acosf;
 using ::acoshf;
 using ::asinf;
 using ::asinhf;
 using ::atan2f;
 using ::atanf;
 using ::atanhf;
 using ::cbrtf;
 using ::ceilf;
 using ::copysignf;
 using ::cosf;
 using ::coshf;
 using ::erfcf;
 using ::erff;
 using ::exp2f;
 using ::expf;
 using ::expm1f;
 using ::fabsf;
 using ::fdimf;
 using ::floorf;
 using ::fmaf;
 using ::fmaxf;
 using ::fminf;
 using ::fmodf;
 using ::frexpf;
 using ::hypotf;
 using ::ilogbf;
 using ::ldexpf;
 using ::lgammaf;
 using ::llrintf;
 using ::llroundf;
 using ::log10f;
 using ::log1pf;
 using ::log2f;
 using ::logbf;
 using ::logf;
 using ::lrintf;
 using ::lroundf;
 using ::modff;
 using ::nearbyintf;
 using ::nextafterf;
 using ::powf;
 using ::remainderf;
 using ::remquof;
 using ::rintf;
 using ::roundf;
 using ::scalblnf;
 using ::scalbnf;
 using ::sinf;
 using ::sinhf;
 using ::sqrtf;
 using ::tanf;
 using ::tanhf;
 using ::tgammaf;
 using ::truncf;
 #ifdef _LIBCPP_END_NAMESPACE_STD
 _LIBCPP_END_NAMESPACE_STD
 #else
 #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
 _GLIBCXX_END_NAMESPACE_VERSION
 #endif
 } // namespace std
 #endif
 #endif // __OPENMP_NVPTX__
 #undef __DEVICE__
 #endif
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__clang_cuda_complex_builtins.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__clang_cuda_complex_builtins.h
@@ -1,285 +0,0 @@
 /*===-- __clang_cuda_complex_builtins - CUDA impls of runtime complex fns ---===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef __CLANG_CUDA_COMPLEX_BUILTINS
 #define __CLANG_CUDA_COMPLEX_BUILTINS
 // This header defines __muldc3, __mulsc3, __divdc3, and __divsc3.  These are
 // libgcc functions that clang assumes are available when compiling c99 complex
 // operations.  (These implementations come from libc++, and have been modified
 // to work with CUDA and OpenMP target offloading [in C and C++ mode].)
 #pragma push_macro("__DEVICE__")
 #if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__)
 #pragma omp declare target
 #define __DEVICE__ __attribute__((noinline, nothrow, cold, weak))
 #else
 #define __DEVICE__ __device__ inline
 #endif
 // To make the algorithms available for C and C++ in CUDA and OpenMP we select
 // different but equivalent function versions. TODO: For OpenMP we currently
 // select the native builtins as the overload support for templates is lacking.
 #if !defined(__OPENMP_NVPTX__) && !defined(__OPENMP_AMDGCN__)
 #define _ISNANd std::isnan
 #define _ISNANf std::isnan
 #define _ISINFd std::isinf
 #define _ISINFf std::isinf
 #define _ISFINITEd std::isfinite
 #define _ISFINITEf std::isfinite
 #define _COPYSIGNd std::copysign
 #define _COPYSIGNf std::copysign
 #define _SCALBNd std::scalbn
 #define _SCALBNf std::scalbn
 #define _ABSd std::abs
 #define _ABSf std::abs
 #define _LOGBd std::logb
 #define _LOGBf std::logb
 // Rather than pulling in std::max from algorithm everytime, use available ::max.
 #define _fmaxd max
 #define _fmaxf max
 #else
 #ifdef __AMDGCN__
 #define _ISNANd __ocml_isnan_f64
 #define _ISNANf __ocml_isnan_f32
 #define _ISINFd __ocml_isinf_f64
 #define _ISINFf __ocml_isinf_f32
 #define _ISFINITEd __ocml_isfinite_f64
 #define _ISFINITEf __ocml_isfinite_f32
 #define _COPYSIGNd __ocml_copysign_f64
 #define _COPYSIGNf __ocml_copysign_f32
 #define _SCALBNd __ocml_scalbn_f64
 #define _SCALBNf __ocml_scalbn_f32
 #define _ABSd __ocml_fabs_f64
 #define _ABSf __ocml_fabs_f32
 #define _LOGBd __ocml_logb_f64
 #define _LOGBf __ocml_logb_f32
 #define _fmaxd __ocml_fmax_f64
 #define _fmaxf __ocml_fmax_f32
 #else
 #define _ISNANd __nv_isnand
 #define _ISNANf __nv_isnanf
 #define _ISINFd __nv_isinfd
 #define _ISINFf __nv_isinff
 #define _ISFINITEd __nv_isfinited
 #define _ISFINITEf __nv_finitef
 #define _COPYSIGNd __nv_copysign
 #define _COPYSIGNf __nv_copysignf
 #define _SCALBNd __nv_scalbn
 #define _SCALBNf __nv_scalbnf
 #define _ABSd __nv_fabs
 #define _ABSf __nv_fabsf
 #define _LOGBd __nv_logb
 #define _LOGBf __nv_logbf
 #define _fmaxd __nv_fmax
 #define _fmaxf __nv_fmaxf
 #endif
 #endif
 #if defined(__cplusplus)
 extern "C" {
 #endif
 __DEVICE__ double _Complex __muldc3(double __a, double __b, double __c,
                                    double __d) {
  double __ac = __a * __c;
  double __bd = __b * __d;
  double __ad = __a * __d;
  double __bc = __b * __c;
  double _Complex z;
  __real__(z) = __ac - __bd;
  __imag__(z) = __ad + __bc;
  if (_ISNANd(__real__(z)) && _ISNANd(__imag__(z))) {
    int __recalc = 0;
    if (_ISINFd(__a) || _ISINFd(__b)) {
      __a = _COPYSIGNd(_ISINFd(__a) ? 1 : 0, __a);
      __b = _COPYSIGNd(_ISINFd(__b) ? 1 : 0, __b);
      if (_ISNANd(__c))
        __c = _COPYSIGNd(0, __c);
      if (_ISNANd(__d))
        __d = _COPYSIGNd(0, __d);
      __recalc = 1;
    }
    if (_ISINFd(__c) || _ISINFd(__d)) {
      __c = _COPYSIGNd(_ISINFd(__c) ? 1 : 0, __c);
      __d = _COPYSIGNd(_ISINFd(__d) ? 1 : 0, __d);
      if (_ISNANd(__a))
        __a = _COPYSIGNd(0, __a);
      if (_ISNANd(__b))
        __b = _COPYSIGNd(0, __b);
      __recalc = 1;
    }
    if (!__recalc &&
        (_ISINFd(__ac) || _ISINFd(__bd) || _ISINFd(__ad) || _ISINFd(__bc))) {
      if (_ISNANd(__a))
        __a = _COPYSIGNd(0, __a);
      if (_ISNANd(__b))
        __b = _COPYSIGNd(0, __b);
      if (_ISNANd(__c))
        __c = _COPYSIGNd(0, __c);
      if (_ISNANd(__d))
        __d = _COPYSIGNd(0, __d);
      __recalc = 1;
    }
    if (__recalc) {
      // Can't use std::numeric_limits<double>::infinity() -- that doesn't have
      // a device overload (and isn't constexpr before C++11, naturally).
      __real__(z) = __builtin_huge_val() * (__a * __c - __b * __d);
      __imag__(z) = __builtin_huge_val() * (__a * __d + __b * __c);
    }
  }
  return z;
 }
 __DEVICE__ float _Complex __mulsc3(float __a, float __b, float __c, float __d) {
  float __ac = __a * __c;
  float __bd = __b * __d;
  float __ad = __a * __d;
  float __bc = __b * __c;
  float _Complex z;
  __real__(z) = __ac - __bd;
  __imag__(z) = __ad + __bc;
  if (_ISNANf(__real__(z)) && _ISNANf(__imag__(z))) {
    int __recalc = 0;
    if (_ISINFf(__a) || _ISINFf(__b)) {
      __a = _COPYSIGNf(_ISINFf(__a) ? 1 : 0, __a);
      __b = _COPYSIGNf(_ISINFf(__b) ? 1 : 0, __b);
      if (_ISNANf(__c))
        __c = _COPYSIGNf(0, __c);
      if (_ISNANf(__d))
        __d = _COPYSIGNf(0, __d);
      __recalc = 1;
    }
    if (_ISINFf(__c) || _ISINFf(__d)) {
      __c = _COPYSIGNf(_ISINFf(__c) ? 1 : 0, __c);
      __d = _COPYSIGNf(_ISINFf(__d) ? 1 : 0, __d);
      if (_ISNANf(__a))
        __a = _COPYSIGNf(0, __a);
      if (_ISNANf(__b))
        __b = _COPYSIGNf(0, __b);
      __recalc = 1;
    }
    if (!__recalc &&
        (_ISINFf(__ac) || _ISINFf(__bd) || _ISINFf(__ad) || _ISINFf(__bc))) {
      if (_ISNANf(__a))
        __a = _COPYSIGNf(0, __a);
      if (_ISNANf(__b))
        __b = _COPYSIGNf(0, __b);
      if (_ISNANf(__c))
        __c = _COPYSIGNf(0, __c);
      if (_ISNANf(__d))
        __d = _COPYSIGNf(0, __d);
      __recalc = 1;
    }
    if (__recalc) {
      __real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d);
      __imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c);
    }
  }
  return z;
 }
 __DEVICE__ double _Complex __divdc3(double __a, double __b, double __c,
                                    double __d) {
  int __ilogbw = 0;
  // Can't use std::max, because that's defined in <algorithm>, and we don't
  // want to pull that in for every compile.  The CUDA headers define
  // ::max(float, float) and ::max(double, double), which is sufficient for us.
  double __logbw = _LOGBd(_fmaxd(_ABSd(__c), _ABSd(__d)));
  if (_ISFINITEd(__logbw)) {
    __ilogbw = (int)__logbw;
    __c = _SCALBNd(__c, -__ilogbw);
    __d = _SCALBNd(__d, -__ilogbw);
  }
  double __denom = __c * __c + __d * __d;
  double _Complex z;
  __real__(z) = _SCALBNd((__a * __c + __b * __d) / __denom, -__ilogbw);
  __imag__(z) = _SCALBNd((__b * __c - __a * __d) / __denom, -__ilogbw);
  if (_ISNANd(__real__(z)) && _ISNANd(__imag__(z))) {
    if ((__denom == 0.0) && (!_ISNANd(__a) || !_ISNANd(__b))) {
      __real__(z) = _COPYSIGNd(__builtin_huge_val(), __c) * __a;
      __imag__(z) = _COPYSIGNd(__builtin_huge_val(), __c) * __b;
    } else if ((_ISINFd(__a) || _ISINFd(__b)) && _ISFINITEd(__c) &&
               _ISFINITEd(__d)) {
      __a = _COPYSIGNd(_ISINFd(__a) ? 1.0 : 0.0, __a);
      __b = _COPYSIGNd(_ISINFd(__b) ? 1.0 : 0.0, __b);
      __real__(z) = __builtin_huge_val() * (__a * __c + __b * __d);
      __imag__(z) = __builtin_huge_val() * (__b * __c - __a * __d);
    } else if (_ISINFd(__logbw) && __logbw > 0.0 && _ISFINITEd(__a) &&
               _ISFINITEd(__b)) {
      __c = _COPYSIGNd(_ISINFd(__c) ? 1.0 : 0.0, __c);
      __d = _COPYSIGNd(_ISINFd(__d) ? 1.0 : 0.0, __d);
      __real__(z) = 0.0 * (__a * __c + __b * __d);
      __imag__(z) = 0.0 * (__b * __c - __a * __d);
    }
  }
  return z;
 }
 __DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) {
  int __ilogbw = 0;
  float __logbw = _LOGBf(_fmaxf(_ABSf(__c), _ABSf(__d)));
  if (_ISFINITEf(__logbw)) {
    __ilogbw = (int)__logbw;
    __c = _SCALBNf(__c, -__ilogbw);
    __d = _SCALBNf(__d, -__ilogbw);
  }
  float __denom = __c * __c + __d * __d;
  float _Complex z;
  __real__(z) = _SCALBNf((__a * __c + __b * __d) / __denom, -__ilogbw);
  __imag__(z) = _SCALBNf((__b * __c - __a * __d) / __denom, -__ilogbw);
  if (_ISNANf(__real__(z)) && _ISNANf(__imag__(z))) {
    if ((__denom == 0) && (!_ISNANf(__a) || !_ISNANf(__b))) {
      __real__(z) = _COPYSIGNf(__builtin_huge_valf(), __c) * __a;
      __imag__(z) = _COPYSIGNf(__builtin_huge_valf(), __c) * __b;
    } else if ((_ISINFf(__a) || _ISINFf(__b)) && _ISFINITEf(__c) &&
               _ISFINITEf(__d)) {
      __a = _COPYSIGNf(_ISINFf(__a) ? 1 : 0, __a);
      __b = _COPYSIGNf(_ISINFf(__b) ? 1 : 0, __b);
      __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d);
      __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d);
    } else if (_ISINFf(__logbw) && __logbw > 0 && _ISFINITEf(__a) &&
               _ISFINITEf(__b)) {
      __c = _COPYSIGNf(_ISINFf(__c) ? 1 : 0, __c);
      __d = _COPYSIGNf(_ISINFf(__d) ? 1 : 0, __d);
      __real__(z) = 0 * (__a * __c + __b * __d);
      __imag__(z) = 0 * (__b * __c - __a * __d);
    }
  }
  return z;
 }
 #if defined(__cplusplus)
 } // extern "C"
 #endif
 #undef _ISNANd
 #undef _ISNANf
 #undef _ISINFd
 #undef _ISINFf
 #undef _COPYSIGNd
 #undef _COPYSIGNf
 #undef _ISFINITEd
 #undef _ISFINITEf
 #undef _SCALBNd
 #undef _SCALBNf
 #undef _ABSd
 #undef _ABSf
 #undef _LOGBd
 #undef _LOGBf
 #undef _fmaxd
 #undef _fmaxf
 #if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__)
 #pragma omp end declare target
 #endif
 #pragma pop_macro("__DEVICE__")
 #endif // __CLANG_CUDA_COMPLEX_BUILTINS
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__clang_cuda_device_functions.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__clang_cuda_device_functions.h
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__clang_cuda_intrinsics.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__clang_cuda_intrinsics.h
@@ -1,994 +0,0 @@
 /*===--- __clang_cuda_intrinsics.h - Device-side CUDA intrinsic wrappers ---===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef __CLANG_CUDA_INTRINSICS_H__
 #define __CLANG_CUDA_INTRINSICS_H__
 #ifndef __CUDA__
 #error "This file is for CUDA compilation only."
 #endif
 // sm_30 intrinsics: __shfl_{up,down,xor}.
 #define __SM_30_INTRINSICS_H__
 #define __SM_30_INTRINSICS_HPP__
 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300
 #pragma push_macro("__MAKE_SHUFFLES")
 #define __MAKE_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic, __Mask,    \
                        __Type)                                                \
  inline __device__ int __FnName(int __val, __Type __offset,                   \
                                 int __width = warpSize) {                     \
    return __IntIntrinsic(__val, __offset,                                     \
                          ((warpSize - __width) << 8) | (__Mask));             \
  }                                                                            \
  inline __device__ float __FnName(float __val, __Type __offset,               \
                                   int __width = warpSize) {                   \
    return __FloatIntrinsic(__val, __offset,                                   \
                            ((warpSize - __width) << 8) | (__Mask));           \
  }                                                                            \
  inline __device__ unsigned int __FnName(unsigned int __val, __Type __offset, \
                                          int __width = warpSize) {            \
    return static_cast<unsigned int>(                                          \
        ::__FnName(static_cast<int>(__val), __offset, __width));               \
  }                                                                            \
  inline __device__ long long __FnName(long long __val, __Type __offset,       \
                                       int __width = warpSize) {               \
    struct __Bits {                                                            \
      int __a, __b;                                                            \
    };                                                                         \
    _Static_assert(sizeof(__val) == sizeof(__Bits));                           \
    _Static_assert(sizeof(__Bits) == 2 * sizeof(int));                         \
    __Bits __tmp;                                                              \
    memcpy(&__tmp, &__val, sizeof(__val));                                \
    __tmp.__a = ::__FnName(__tmp.__a, __offset, __width);                      \
    __tmp.__b = ::__FnName(__tmp.__b, __offset, __width);                      \
    long long __ret;                                                           \
    memcpy(&__ret, &__tmp, sizeof(__tmp));                                     \
    return __ret;                                                              \
  }                                                                            \
  inline __device__ long __FnName(long __val, __Type __offset,                 \
                                  int __width = warpSize) {                    \
    _Static_assert(sizeof(long) == sizeof(long long) ||                        \
                   sizeof(long) == sizeof(int));                               \
    if (sizeof(long) == sizeof(long long)) {                                   \
      return static_cast<long>(                                                \
          ::__FnName(static_cast<long long>(__val), __offset, __width));       \
    } else if (sizeof(long) == sizeof(int)) {                                  \
      return static_cast<long>(                                                \
          ::__FnName(static_cast<int>(__val), __offset, __width));             \
    }                                                                          \
  }                                                                            \
  inline __device__ unsigned long __FnName(                                    \
      unsigned long __val, __Type __offset, int __width = warpSize) {          \
    return static_cast<unsigned long>(                                         \
        ::__FnName(static_cast<long>(__val), __offset, __width));              \
  }                                                                            \
  inline __device__ unsigned long long __FnName(                               \
      unsigned long long __val, __Type __offset, int __width = warpSize) {     \
    return static_cast<unsigned long long>(                                    \
        ::__FnName(static_cast<long long>(__val), __offset, __width));         \
  }                                                                            \
  inline __device__ double __FnName(double __val, __Type __offset,             \
                                    int __width = warpSize) {                  \
    long long __tmp;                                                           \
    _Static_assert(sizeof(__tmp) == sizeof(__val));                            \
    memcpy(&__tmp, &__val, sizeof(__val));                                     \
    __tmp = ::__FnName(__tmp, __offset, __width);                              \
    double __ret;                                                              \
    memcpy(&__ret, &__tmp, sizeof(__ret));                                     \
    return __ret;                                                              \
  }
 __MAKE_SHUFFLES(__shfl, __nvvm_shfl_idx_i32, __nvvm_shfl_idx_f32, 0x1f, int);
 // We use 0 rather than 31 as our mask, because shfl.up applies to lanes >=
 // maxLane.
 __MAKE_SHUFFLES(__shfl_up, __nvvm_shfl_up_i32, __nvvm_shfl_up_f32, 0,
                unsigned int);
 __MAKE_SHUFFLES(__shfl_down, __nvvm_shfl_down_i32, __nvvm_shfl_down_f32, 0x1f,
                unsigned int);
 __MAKE_SHUFFLES(__shfl_xor, __nvvm_shfl_bfly_i32, __nvvm_shfl_bfly_f32, 0x1f,
                int);
 #pragma pop_macro("__MAKE_SHUFFLES")
 #endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300
 #if CUDA_VERSION >= 9000
 #if (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300)
 // __shfl_sync_* variants available in CUDA-9
 #pragma push_macro("__MAKE_SYNC_SHUFFLES")
 #define __MAKE_SYNC_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic,       \
                             __Mask, __Type)                                   \
  inline __device__ int __FnName(unsigned int __mask, int __val,               \
                                 __Type __offset, int __width = warpSize) {    \
    return __IntIntrinsic(__mask, __val, __offset,                             \
                          ((warpSize - __width) << 8) | (__Mask));             \
  }                                                                            \
  inline __device__ float __FnName(unsigned int __mask, float __val,           \
                                   __Type __offset, int __width = warpSize) {  \
    return __FloatIntrinsic(__mask, __val, __offset,                           \
                            ((warpSize - __width) << 8) | (__Mask));           \
  }                                                                            \
  inline __device__ unsigned int __FnName(unsigned int __mask,                 \
                                          unsigned int __val, __Type __offset, \
                                          int __width = warpSize) {            \
    return static_cast<unsigned int>(                                          \
        ::__FnName(__mask, static_cast<int>(__val), __offset, __width));       \
  }                                                                            \
  inline __device__ long long __FnName(unsigned int __mask, long long __val,   \
                                       __Type __offset,                        \
                                       int __width = warpSize) {               \
    struct __Bits {                                                            \
      int __a, __b;                                                            \
    };                                                                         \
    _Static_assert(sizeof(__val) == sizeof(__Bits));                           \
    _Static_assert(sizeof(__Bits) == 2 * sizeof(int));                         \
    __Bits __tmp;                                                              \
    memcpy(&__tmp, &__val, sizeof(__val));                                     \
    __tmp.__a = ::__FnName(__mask, __tmp.__a, __offset, __width);              \
    __tmp.__b = ::__FnName(__mask, __tmp.__b, __offset, __width);              \
    long long __ret;                                                           \
    memcpy(&__ret, &__tmp, sizeof(__tmp));                                     \
    return __ret;                                                              \
  }                                                                            \
  inline __device__ unsigned long long __FnName(                               \
      unsigned int __mask, unsigned long long __val, __Type __offset,          \
      int __width = warpSize) {                                                \
    return static_cast<unsigned long long>(                                    \
        ::__FnName(__mask, static_cast<long long>(__val), __offset, __width)); \
  }                                                                            \
  inline __device__ long __FnName(unsigned int __mask, long __val,             \
                                  __Type __offset, int __width = warpSize) {   \
    _Static_assert(sizeof(long) == sizeof(long long) ||                        \
                   sizeof(long) == sizeof(int));                               \
    if (sizeof(long) == sizeof(long long)) {                                   \
      return static_cast<long>(::__FnName(                                     \
          __mask, static_cast<long long>(__val), __offset, __width));          \
    } else if (sizeof(long) == sizeof(int)) {                                  \
      return static_cast<long>(                                                \
          ::__FnName(__mask, static_cast<int>(__val), __offset, __width));     \
    }                                                                          \
  }                                                                            \
  inline __device__ unsigned long __FnName(                                    \
      unsigned int __mask, unsigned long __val, __Type __offset,               \
      int __width = warpSize) {                                                \
    return static_cast<unsigned long>(                                         \
        ::__FnName(__mask, static_cast<long>(__val), __offset, __width));      \
  }                                                                            \
  inline __device__ double __FnName(unsigned int __mask, double __val,         \
                                    __Type __offset, int __width = warpSize) { \
    long long __tmp;                                                           \
    _Static_assert(sizeof(__tmp) == sizeof(__val));                            \
    memcpy(&__tmp, &__val, sizeof(__val));                                     \
    __tmp = ::__FnName(__mask, __tmp, __offset, __width);                      \
    double __ret;                                                              \
    memcpy(&__ret, &__tmp, sizeof(__ret));                                     \
    return __ret;                                                              \
  }
 __MAKE_SYNC_SHUFFLES(__shfl_sync, __nvvm_shfl_sync_idx_i32,
                     __nvvm_shfl_sync_idx_f32, 0x1f, int);
 // We use 0 rather than 31 as our mask, because shfl.up applies to lanes >=
 // maxLane.
 __MAKE_SYNC_SHUFFLES(__shfl_up_sync, __nvvm_shfl_sync_up_i32,
                     __nvvm_shfl_sync_up_f32, 0, unsigned int);
 __MAKE_SYNC_SHUFFLES(__shfl_down_sync, __nvvm_shfl_sync_down_i32,
                     __nvvm_shfl_sync_down_f32, 0x1f, unsigned int);
 __MAKE_SYNC_SHUFFLES(__shfl_xor_sync, __nvvm_shfl_sync_bfly_i32,
                     __nvvm_shfl_sync_bfly_f32, 0x1f, int);
 #pragma pop_macro("__MAKE_SYNC_SHUFFLES")
 inline __device__ void __syncwarp(unsigned int mask = 0xffffffff) {
  return __nvvm_bar_warp_sync(mask);
 }
 inline __device__ void __barrier_sync(unsigned int id) {
  __nvvm_barrier_sync(id);
 }
 inline __device__ void __barrier_sync_count(unsigned int id,
                                            unsigned int count) {
  __nvvm_barrier_sync_cnt(id, count);
 }
 inline __device__ int __all_sync(unsigned int mask, int pred) {
  return __nvvm_vote_all_sync(mask, pred);
 }
 inline __device__ int __any_sync(unsigned int mask, int pred) {
  return __nvvm_vote_any_sync(mask, pred);
 }
 inline __device__ int __uni_sync(unsigned int mask, int pred) {
  return __nvvm_vote_uni_sync(mask, pred);
 }
 inline __device__ unsigned int __ballot_sync(unsigned int mask, int pred) {
  return __nvvm_vote_ballot_sync(mask, pred);
 }
 inline __device__ unsigned int __activemask() {
 #if CUDA_VERSION < 9020
  return __nvvm_vote_ballot(1);
 #else
  return __nvvm_activemask();
 #endif
 }
 inline __device__ unsigned int __fns(unsigned mask, unsigned base, int offset) {
  return __nvvm_fns(mask, base, offset);
 }
 #endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300
 // Define __match* builtins CUDA-9 headers expect to see.
 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700
 inline __device__ unsigned int __match32_any_sync(unsigned int mask,
                                                  unsigned int value) {
  return __nvvm_match_any_sync_i32(mask, value);
 }
 inline __device__ unsigned int
 __match64_any_sync(unsigned int mask, unsigned long long value) {
  return __nvvm_match_any_sync_i64(mask, value);
 }
 inline __device__ unsigned int
 __match32_all_sync(unsigned int mask, unsigned int value, int *pred) {
  return __nvvm_match_all_sync_i32p(mask, value, pred);
 }
 inline __device__ unsigned int
 __match64_all_sync(unsigned int mask, unsigned long long value, int *pred) {
  return __nvvm_match_all_sync_i64p(mask, value, pred);
 }
 #include "crt/sm_70_rt.hpp"
 #endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700
 #endif // __CUDA_VERSION >= 9000
 // sm_32 intrinsics: __ldg and __funnelshift_{l,lc,r,rc}.
 // Prevent the vanilla sm_32 intrinsics header from being included.
 #define __SM_32_INTRINSICS_H__
 #define __SM_32_INTRINSICS_HPP__
 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320
 inline __device__ char __ldg(const char *ptr) { return __nvvm_ldg_c(ptr); }
 inline __device__ short __ldg(const short *ptr) { return __nvvm_ldg_s(ptr); }
 inline __device__ int __ldg(const int *ptr) { return __nvvm_ldg_i(ptr); }
 inline __device__ long __ldg(const long *ptr) { return __nvvm_ldg_l(ptr); }
 inline __device__ long long __ldg(const long long *ptr) {
  return __nvvm_ldg_ll(ptr);
 }
 inline __device__ unsigned char __ldg(const unsigned char *ptr) {
  return __nvvm_ldg_uc(ptr);
 }
 inline __device__ signed char __ldg(const signed char *ptr) {
  return __nvvm_ldg_uc((const unsigned char *)ptr);
 }
 inline __device__ unsigned short __ldg(const unsigned short *ptr) {
  return __nvvm_ldg_us(ptr);
 }
 inline __device__ unsigned int __ldg(const unsigned int *ptr) {
  return __nvvm_ldg_ui(ptr);
 }
 inline __device__ unsigned long __ldg(const unsigned long *ptr) {
  return __nvvm_ldg_ul(ptr);
 }
 inline __device__ unsigned long long __ldg(const unsigned long long *ptr) {
  return __nvvm_ldg_ull(ptr);
 }
 inline __device__ float __ldg(const float *ptr) { return __nvvm_ldg_f(ptr); }
 inline __device__ double __ldg(const double *ptr) { return __nvvm_ldg_d(ptr); }
 inline __device__ char2 __ldg(const char2 *ptr) {
  typedef char c2 __attribute__((ext_vector_type(2)));
  // We can assume that ptr is aligned at least to char2's alignment, but the
  // load will assume that ptr is aligned to char2's alignment.  This is only
  // safe if alignof(c2) <= alignof(char2).
  c2 rv = __nvvm_ldg_c2(reinterpret_cast<const c2 *>(ptr));
  char2 ret;
  ret.x = rv[0];
  ret.y = rv[1];
  return ret;
 }
 inline __device__ char4 __ldg(const char4 *ptr) {
  typedef char c4 __attribute__((ext_vector_type(4)));
  c4 rv = __nvvm_ldg_c4(reinterpret_cast<const c4 *>(ptr));
  char4 ret;
  ret.x = rv[0];
  ret.y = rv[1];
  ret.z = rv[2];
  ret.w = rv[3];
  return ret;
 }
 inline __device__ short2 __ldg(const short2 *ptr) {
  typedef short s2 __attribute__((ext_vector_type(2)));
  s2 rv = __nvvm_ldg_s2(reinterpret_cast<const s2 *>(ptr));
  short2 ret;
  ret.x = rv[0];
  ret.y = rv[1];
  return ret;
 }
 inline __device__ short4 __ldg(const short4 *ptr) {
  typedef short s4 __attribute__((ext_vector_type(4)));
  s4 rv = __nvvm_ldg_s4(reinterpret_cast<const s4 *>(ptr));
  short4 ret;
  ret.x = rv[0];
  ret.y = rv[1];
  ret.z = rv[2];
  ret.w = rv[3];
  return ret;
 }
 inline __device__ int2 __ldg(const int2 *ptr) {
  typedef int i2 __attribute__((ext_vector_type(2)));
  i2 rv = __nvvm_ldg_i2(reinterpret_cast<const i2 *>(ptr));
  int2 ret;
  ret.x = rv[0];
  ret.y = rv[1];
  return ret;
 }
 inline __device__ int4 __ldg(const int4 *ptr) {
  typedef int i4 __attribute__((ext_vector_type(4)));
  i4 rv = __nvvm_ldg_i4(reinterpret_cast<const i4 *>(ptr));
  int4 ret;
  ret.x = rv[0];
  ret.y = rv[1];
  ret.z = rv[2];
  ret.w = rv[3];
  return ret;
 }
 inline __device__ longlong2 __ldg(const longlong2 *ptr) {
  typedef long long ll2 __attribute__((ext_vector_type(2)));
  ll2 rv = __nvvm_ldg_ll2(reinterpret_cast<const ll2 *>(ptr));
  longlong2 ret;
  ret.x = rv[0];
  ret.y = rv[1];
  return ret;
 }
 inline __device__ uchar2 __ldg(const uchar2 *ptr) {
  typedef unsigned char uc2 __attribute__((ext_vector_type(2)));
  uc2 rv = __nvvm_ldg_uc2(reinterpret_cast<const uc2 *>(ptr));
  uchar2 ret;
  ret.x = rv[0];
  ret.y = rv[1];
  return ret;
 }
 inline __device__ uchar4 __ldg(const uchar4 *ptr) {
  typedef unsigned char uc4 __attribute__((ext_vector_type(4)));
  uc4 rv = __nvvm_ldg_uc4(reinterpret_cast<const uc4 *>(ptr));
  uchar4 ret;
  ret.x = rv[0];
  ret.y = rv[1];
  ret.z = rv[2];
  ret.w = rv[3];
  return ret;
 }
 inline __device__ ushort2 __ldg(const ushort2 *ptr) {
  typedef unsigned short us2 __attribute__((ext_vector_type(2)));
  us2 rv = __nvvm_ldg_us2(reinterpret_cast<const us2 *>(ptr));
  ushort2 ret;
  ret.x = rv[0];
  ret.y = rv[1];
  return ret;
 }
 inline __device__ ushort4 __ldg(const ushort4 *ptr) {
  typedef unsigned short us4 __attribute__((ext_vector_type(4)));
  us4 rv = __nvvm_ldg_us4(reinterpret_cast<const us4 *>(ptr));
  ushort4 ret;
  ret.x = rv[0];
  ret.y = rv[1];
  ret.z = rv[2];
  ret.w = rv[3];
  return ret;
 }
 inline __device__ uint2 __ldg(const uint2 *ptr) {
  typedef unsigned int ui2 __attribute__((ext_vector_type(2)));
  ui2 rv = __nvvm_ldg_ui2(reinterpret_cast<const ui2 *>(ptr));
  uint2 ret;
  ret.x = rv[0];
  ret.y = rv[1];
  return ret;
 }
 inline __device__ uint4 __ldg(const uint4 *ptr) {
  typedef unsigned int ui4 __attribute__((ext_vector_type(4)));
  ui4 rv = __nvvm_ldg_ui4(reinterpret_cast<const ui4 *>(ptr));
  uint4 ret;
  ret.x = rv[0];
  ret.y = rv[1];
  ret.z = rv[2];
  ret.w = rv[3];
  return ret;
 }
 inline __device__ ulonglong2 __ldg(const ulonglong2 *ptr) {
  typedef unsigned long long ull2 __attribute__((ext_vector_type(2)));
  ull2 rv = __nvvm_ldg_ull2(reinterpret_cast<const ull2 *>(ptr));
  ulonglong2 ret;
  ret.x = rv[0];
  ret.y = rv[1];
  return ret;
 }
 inline __device__ float2 __ldg(const float2 *ptr) {
  typedef float f2 __attribute__((ext_vector_type(2)));
  f2 rv = __nvvm_ldg_f2(reinterpret_cast<const f2 *>(ptr));
  float2 ret;
  ret.x = rv[0];
  ret.y = rv[1];
  return ret;
 }
 inline __device__ float4 __ldg(const float4 *ptr) {
  typedef float f4 __attribute__((ext_vector_type(4)));
  f4 rv = __nvvm_ldg_f4(reinterpret_cast<const f4 *>(ptr));
  float4 ret;
  ret.x = rv[0];
  ret.y = rv[1];
  ret.z = rv[2];
  ret.w = rv[3];
  return ret;
 }
 inline __device__ double2 __ldg(const double2 *ptr) {
  typedef double d2 __attribute__((ext_vector_type(2)));
  d2 rv = __nvvm_ldg_d2(reinterpret_cast<const d2 *>(ptr));
  double2 ret;
  ret.x = rv[0];
  ret.y = rv[1];
  return ret;
 }
 // TODO: Implement these as intrinsics, so the backend can work its magic on
 // these.  Alternatively, we could implement these as plain C and try to get
 // llvm to recognize the relevant patterns.
 inline __device__ unsigned __funnelshift_l(unsigned low32, unsigned high32,
                                           unsigned shiftWidth) {
  unsigned result;
  asm("shf.l.wrap.b32 %0, %1, %2, %3;"
      : "=r"(result)
      : "r"(low32), "r"(high32), "r"(shiftWidth));
  return result;
 }
 inline __device__ unsigned __funnelshift_lc(unsigned low32, unsigned high32,
                                            unsigned shiftWidth) {
  unsigned result;
  asm("shf.l.clamp.b32 %0, %1, %2, %3;"
      : "=r"(result)
      : "r"(low32), "r"(high32), "r"(shiftWidth));
  return result;
 }
 inline __device__ unsigned __funnelshift_r(unsigned low32, unsigned high32,
                                           unsigned shiftWidth) {
  unsigned result;
  asm("shf.r.wrap.b32 %0, %1, %2, %3;"
      : "=r"(result)
      : "r"(low32), "r"(high32), "r"(shiftWidth));
  return result;
 }
 inline __device__ unsigned __funnelshift_rc(unsigned low32, unsigned high32,
                                            unsigned shiftWidth) {
  unsigned ret;
  asm("shf.r.clamp.b32 %0, %1, %2, %3;"
      : "=r"(ret)
      : "r"(low32), "r"(high32), "r"(shiftWidth));
  return ret;
 }
 #if defined(__cplusplus) && (__cplusplus >= 201103L)
 #pragma push_macro("__INTRINSIC_LOAD")
 #define __INTRINSIC_LOAD(__FnName, __AsmOp, __DeclType, __TmpType, __AsmType,  \
                         __Clobber)                                            \
  inline __device__ __DeclType __FnName(const __DeclType *__ptr) {             \
    __TmpType __ret;                                                           \
    asm(__AsmOp " %0, [%1];" : __AsmType(__ret) : "l"(__ptr)__Clobber);        \
    return (__DeclType)__ret;                                                  \
  }
 #pragma push_macro("__INTRINSIC_LOAD2")
 #define __INTRINSIC_LOAD2(__FnName, __AsmOp, __DeclType, __TmpType, __AsmType, \
                          __Clobber)                                           \
  inline __device__ __DeclType __FnName(const __DeclType *__ptr) {             \
    __DeclType __ret;                                                          \
    __TmpType __tmp;                                                           \
    asm(__AsmOp " {%0,%1}, [%2];"                                              \
        : __AsmType(__tmp.x), __AsmType(__tmp.y)                               \
        : "l"(__ptr)__Clobber);                                                \
    using __ElementType = decltype(__ret.x);                                   \
    __ret.x = (__ElementType)(__tmp.x);                                        \
    __ret.y = (__ElementType)__tmp.y;                                          \
    return __ret;                                                              \
  }
 #pragma push_macro("__INTRINSIC_LOAD4")
 #define __INTRINSIC_LOAD4(__FnName, __AsmOp, __DeclType, __TmpType, __AsmType, \
                          __Clobber)                                           \
  inline __device__ __DeclType __FnName(const __DeclType *__ptr) {             \
    __DeclType __ret;                                                          \
    __TmpType __tmp;                                                           \
    asm(__AsmOp " {%0,%1,%2,%3}, [%4];"                                        \
        : __AsmType(__tmp.x), __AsmType(__tmp.y), __AsmType(__tmp.z),          \
          __AsmType(__tmp.w)                                                   \
        : "l"(__ptr)__Clobber);                                                \
    using __ElementType = decltype(__ret.x);                                   \
    __ret.x = (__ElementType)__tmp.x;                                          \
    __ret.y = (__ElementType)__tmp.y;                                          \
    __ret.z = (__ElementType)__tmp.z;                                          \
    __ret.w = (__ElementType)__tmp.w;                                          \
    return __ret;                                                              \
  }
 __INTRINSIC_LOAD(__ldcg, "ld.global.cg.s8", char, unsigned int, "=r", );
 __INTRINSIC_LOAD(__ldcg, "ld.global.cg.s8", signed char, unsigned int, "=r", );
 __INTRINSIC_LOAD(__ldcg, "ld.global.cg.s16", short, unsigned short, "=h", );
 __INTRINSIC_LOAD(__ldcg, "ld.global.cg.s32", int, unsigned int, "=r", );
 __INTRINSIC_LOAD(__ldcg, "ld.global.cg.s64", long long, unsigned long long,
                 "=l", );
 __INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.s8", char2, int2, "=r", );
 __INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.s8", char4, int4, "=r", );
 __INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.s16", short2, short2, "=h", );
 __INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.s16", short4, short4, "=h", );
 __INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.s32", int2, int2, "=r", );
 __INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.s32", int4, int4, "=r", );
 __INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.s64 ", longlong2, longlong2, "=l", );
 __INTRINSIC_LOAD(__ldcg, "ld.global.cg.u8", unsigned char, unsigned int,
                 "=r", );
 __INTRINSIC_LOAD(__ldcg, "ld.global.cg.u16", unsigned short, unsigned short,
                 "=h", );
 __INTRINSIC_LOAD(__ldcg, "ld.global.cg.u32", unsigned int, unsigned int,
                 "=r", );
 __INTRINSIC_LOAD(__ldcg, "ld.global.cg.u64", unsigned long long,
                 unsigned long long, "=l", );
 __INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.u8", uchar2, int2, "=r", );
 __INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.u8", uchar4, int4, "=r", );
 __INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.u16", ushort2, ushort2, "=h", );
 __INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.u16", ushort4, ushort4, "=h", );
 __INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.u32", uint2, uint2, "=r", );
 __INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.u32", uint4, uint4, "=r", );
 __INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.u64", ulonglong2, ulonglong2,
                  "=l", );
 __INTRINSIC_LOAD(__ldcg, "ld.global.cg.f32", float, float, "=f", );
 __INTRINSIC_LOAD(__ldcg, "ld.global.cg.f64", double, double, "=d", );
 __INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.f32", float2, float2, "=f", );
 __INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.f32", float4, float4, "=f", );
 __INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.f64", double2, double2, "=d", );
 inline __device__ long __ldcg(const long *__ptr) {
  unsigned long __ret;
  if (sizeof(long) == 8) {
    asm("ld.global.cg.s64 %0, [%1];" : "=l"(__ret) : "l"(__ptr));
  } else {
    asm("ld.global.cg.s32 %0, [%1];" : "=r"(__ret) : "l"(__ptr));
  }
  return (long)__ret;
 }
 __INTRINSIC_LOAD(__ldcv, "ld.global.cv.u8", unsigned char, unsigned int,
                 "=r", : "memory");
 __INTRINSIC_LOAD(__ldcv, "ld.global.cv.u16", unsigned short, unsigned short,
                 "=h", : "memory");
 __INTRINSIC_LOAD(__ldcv, "ld.global.cv.u32", unsigned int, unsigned int,
                 "=r", : "memory");
 __INTRINSIC_LOAD(__ldcv, "ld.global.cv.u64", unsigned long long,
                 unsigned long long, "=l", : "memory");
 __INTRINSIC_LOAD(__ldcv, "ld.global.cv.s8", char, unsigned int,
                 "=r", : "memory");
 __INTRINSIC_LOAD(__ldcv, "ld.global.cv.s8", signed char, unsigned int,
                 "=r", : "memory");
 __INTRINSIC_LOAD(__ldcv, "ld.global.cv.s16", short, unsigned short,
                 "=h", : "memory");
 __INTRINSIC_LOAD(__ldcv, "ld.global.cv.s32", int, unsigned int,
                 "=r", : "memory");
 __INTRINSIC_LOAD(__ldcv, "ld.global.cv.s64", long long, unsigned long long,
                 "=l", : "memory");
 __INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.u8", uchar2, uint2,
                  "=r", : "memory");
 __INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.u8", uchar4, uint4,
                  "=r", : "memory");
 __INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.u16", ushort2, ushort2,
                  "=h", : "memory");
 __INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.u16", ushort4, ushort4,
                  "=h", : "memory");
 __INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.u32", uint2, uint2,
                  "=r", : "memory");
 __INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.u32", uint4, uint4,
                  "=r", : "memory");
 __INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.u64", ulonglong2, ulonglong2,
                  "=l", : "memory");
 __INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.s8", char2, int2, "=r", : "memory");
 __INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.s8", char4, int4, "=r", : "memory");
 __INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.s16", short2, short2,
                  "=h", : "memory");
 __INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.s16", short4, short4,
                  "=h", : "memory");
 __INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.s32", int2, int2, "=r", : "memory");
 __INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.s32", int4, int4, "=r", : "memory");
 __INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.s64", longlong2, longlong2,
                  "=l", : "memory");
 __INTRINSIC_LOAD(__ldcv, "ld.global.cv.f32", float, float, "=f", : "memory");
 __INTRINSIC_LOAD(__ldcv, "ld.global.cv.f64", double, double, "=d", : "memory");
 __INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.f32", float2, float2,
                  "=f", : "memory");
 __INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.f32", float4, float4,
                  "=f", : "memory");
 __INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.f64", double2, double2,
                  "=d", : "memory");
 inline __device__ long __ldcv(const long *__ptr) {
  unsigned long __ret;
  if (sizeof(long) == 8) {
    asm("ld.global.cv.s64 %0, [%1];" : "=l"(__ret) : "l"(__ptr));
  } else {
    asm("ld.global.cv.s32 %0, [%1];" : "=r"(__ret) : "l"(__ptr));
  }
  return (long)__ret;
 }
 __INTRINSIC_LOAD(__ldcs, "ld.global.cs.s8", char, unsigned int, "=r", );
 __INTRINSIC_LOAD(__ldcs, "ld.global.cs.s8", signed char, signed int, "=r", );
 __INTRINSIC_LOAD(__ldcs, "ld.global.cs.s16", short, unsigned short, "=h", );
 __INTRINSIC_LOAD(__ldcs, "ld.global.cs.s32", int, unsigned int, "=r", );
 __INTRINSIC_LOAD(__ldcs, "ld.global.cs.s64", long long, unsigned long long,
                 "=l", );
 __INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.s8", char2, int2, "=r", );
 __INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.s8", char4, int4, "=r", );
 __INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.s16", short2, short2, "=h", );
 __INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.s16", short4, short4, "=h", );
 __INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.s32", int2, int2, "=r", );
 __INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.s32", int4, int4, "=r", );
 __INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.s64", longlong2, longlong2, "=l", );
 __INTRINSIC_LOAD(__ldcs, "ld.global.cs.u8", unsigned char, unsigned int,
                 "=r", );
 __INTRINSIC_LOAD(__ldcs, "ld.global.cs.u16", unsigned short, unsigned short,
                 "=h", );
 __INTRINSIC_LOAD(__ldcs, "ld.global.cs.u32", unsigned int, unsigned int,
                 "=r", );
 __INTRINSIC_LOAD(__ldcs, "ld.global.cs.u64", unsigned long long,
                 unsigned long long, "=l", );
 __INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.u8", uchar2, uint2, "=r", );
 __INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.u8", uchar4, uint4, "=r", );
 __INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.u16", ushort2, ushort2, "=h", );
 __INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.u16", ushort4, ushort4, "=h", );
 __INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.u32", uint2, uint2, "=r", );
 __INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.u32", uint4, uint4, "=r", );
 __INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.u64", ulonglong2, ulonglong2,
                  "=l", );
 __INTRINSIC_LOAD(__ldcs, "ld.global.cs.f32", float, float, "=f", );
 __INTRINSIC_LOAD(__ldcs, "ld.global.cs.f64", double, double, "=d", );
 __INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.f32", float2, float2, "=f", );
 __INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.f32", float4, float4, "=f", );
 __INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.f64", double2, double2, "=d", );
 #pragma pop_macro("__INTRINSIC_LOAD")
 #pragma pop_macro("__INTRINSIC_LOAD2")
 #pragma pop_macro("__INTRINSIC_LOAD4")
 inline __device__ long __ldcs(const long *__ptr) {
  unsigned long __ret;
  if (sizeof(long) == 8) {
    asm("ld.global.cs.s64 %0, [%1];" : "=l"(__ret) : "l"(__ptr));
  } else {
    asm("ld.global.cs.s32 %0, [%1];" : "=r"(__ret) : "l"(__ptr));
  }
  return (long)__ret;
 }
 #pragma push_macro("__INTRINSIC_STORE")
 #define __INTRINSIC_STORE(__FnName, __AsmOp, __DeclType, __TmpType, __AsmType) \
  inline __device__ void __FnName(__DeclType *__ptr, __DeclType __value) {     \
    __TmpType __tmp = (__TmpType)__value;                                      \
    asm(__AsmOp " [%0], %1;" ::"l"(__ptr), __AsmType(__tmp) : "memory");       \
  }
 #pragma push_macro("__INTRINSIC_STORE2")
 #define __INTRINSIC_STORE2(__FnName, __AsmOp, __DeclType, __TmpType,           \
                           __AsmType)                                          \
  inline __device__ void __FnName(__DeclType *__ptr, __DeclType __value) {     \
    __TmpType __tmp;                                                           \
    using __ElementType = decltype(__tmp.x);                                   \
    __tmp.x = (__ElementType)(__value.x);                                      \
    __tmp.y = (__ElementType)(__value.y);                                      \
    asm(__AsmOp " [%0], {%1,%2};" ::"l"(__ptr), __AsmType(__tmp.x),            \
        __AsmType(__tmp.y)                                                     \
        : "memory");                                                           \
  }
 #pragma push_macro("__INTRINSIC_STORE4")
 #define __INTRINSIC_STORE4(__FnName, __AsmOp, __DeclType, __TmpType,           \
                           __AsmType)                                          \
  inline __device__ void __FnName(__DeclType *__ptr, __DeclType __value) {     \
    __TmpType __tmp;                                                           \
    using __ElementType = decltype(__tmp.x);                                   \
    __tmp.x = (__ElementType)(__value.x);                                      \
    __tmp.y = (__ElementType)(__value.y);                                      \
    __tmp.z = (__ElementType)(__value.z);                                      \
    __tmp.w = (__ElementType)(__value.w);                                      \
    asm(__AsmOp " [%0], {%1,%2,%3,%4};" ::"l"(__ptr), __AsmType(__tmp.x),      \
        __AsmType(__tmp.y), __AsmType(__tmp.z), __AsmType(__tmp.w)             \
        : "memory");                                                           \
  }
 __INTRINSIC_STORE(__stwt, "st.global.wt.s8", char, int, "r");
 __INTRINSIC_STORE(__stwt, "st.global.wt.s8", signed char, int, "r");
 __INTRINSIC_STORE(__stwt, "st.global.wt.s16", short, short, "h");
 __INTRINSIC_STORE(__stwt, "st.global.wt.s32", int, int, "r");
 __INTRINSIC_STORE(__stwt, "st.global.wt.s64", long long, long long, "l");
 __INTRINSIC_STORE2(__stwt, "st.global.wt.v2.s8", char2, int2, "r");
 __INTRINSIC_STORE4(__stwt, "st.global.wt.v4.s8", char4, int4, "r");
 __INTRINSIC_STORE2(__stwt, "st.global.wt.v2.s16", short2, short2, "h");
 __INTRINSIC_STORE4(__stwt, "st.global.wt.v4.s16", short4, short4, "h");
 __INTRINSIC_STORE2(__stwt, "st.global.wt.v2.s32", int2, int2, "r");
 __INTRINSIC_STORE4(__stwt, "st.global.wt.v4.s32", int4, int4, "r");
 __INTRINSIC_STORE2(__stwt, "st.global.wt.v2.s64", longlong2, longlong2, "l");
 __INTRINSIC_STORE(__stwt, "st.global.wt.u8", unsigned char, int, "r");
 __INTRINSIC_STORE(__stwt, "st.global.wt.u16", unsigned short, unsigned short,
                  "h");
 __INTRINSIC_STORE(__stwt, "st.global.wt.u32", unsigned int, unsigned int, "r");
 __INTRINSIC_STORE(__stwt, "st.global.wt.u64", unsigned long long,
                  unsigned long long, "l");
 __INTRINSIC_STORE2(__stwt, "st.global.wt.v2.u8", uchar2, uchar2, "r");
 __INTRINSIC_STORE4(__stwt, "st.global.wt.v4.u8", uchar4, uint4, "r");
 __INTRINSIC_STORE2(__stwt, "st.global.wt.v2.u16", ushort2, ushort2, "h");
 __INTRINSIC_STORE4(__stwt, "st.global.wt.v4.u16", ushort4, ushort4, "h");
 __INTRINSIC_STORE2(__stwt, "st.global.wt.v2.u32", uint2, uint2, "r");
 __INTRINSIC_STORE4(__stwt, "st.global.wt.v4.u32", uint4, uint4, "r");
 __INTRINSIC_STORE2(__stwt, "st.global.wt.v2.u64", ulonglong2, ulonglong2, "l");
 __INTRINSIC_STORE(__stwt, "st.global.wt.f32", float, float, "f");
 __INTRINSIC_STORE(__stwt, "st.global.wt.f64", double, double, "d");
 __INTRINSIC_STORE2(__stwt, "st.global.wt.v2.f32", float2, float2, "f");
 __INTRINSIC_STORE4(__stwt, "st.global.wt.v4.f32", float4, float4, "f");
 __INTRINSIC_STORE2(__stwt, "st.global.wt.v2.f64", double2, double2, "d");
 #pragma pop_macro("__INTRINSIC_STORE")
 #pragma pop_macro("__INTRINSIC_STORE2")
 #pragma pop_macro("__INTRINSIC_STORE4")
 #endif // defined(__cplusplus) && (__cplusplus >= 201103L)
 #endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320
 #if CUDA_VERSION >= 11000
 extern "C" {
 __device__ inline size_t __nv_cvta_generic_to_global_impl(const void *__ptr) {
  return (size_t)(void __attribute__((address_space(1))) *)__ptr;
 }
 __device__ inline size_t __nv_cvta_generic_to_shared_impl(const void *__ptr) {
  return (size_t)(void __attribute__((address_space(3))) *)__ptr;
 }
 __device__ inline size_t __nv_cvta_generic_to_constant_impl(const void *__ptr) {
  return (size_t)(void __attribute__((address_space(4))) *)__ptr;
 }
 __device__ inline size_t __nv_cvta_generic_to_local_impl(const void *__ptr) {
  return (size_t)(void __attribute__((address_space(5))) *)__ptr;
 }
 __device__ inline void *__nv_cvta_global_to_generic_impl(size_t __ptr) {
  return (void *)(void __attribute__((address_space(1))) *)__ptr;
 }
 __device__ inline void *__nv_cvta_shared_to_generic_impl(size_t __ptr) {
  return (void *)(void __attribute__((address_space(3))) *)__ptr;
 }
 __device__ inline void *__nv_cvta_constant_to_generic_impl(size_t __ptr) {
  return (void *)(void __attribute__((address_space(4))) *)__ptr;
 }
 __device__ inline void *__nv_cvta_local_to_generic_impl(size_t __ptr) {
  return (void *)(void __attribute__((address_space(5))) *)__ptr;
 }
 __device__ inline cuuint32_t __nvvm_get_smem_pointer(void *__ptr) {
  return __nv_cvta_generic_to_shared_impl(__ptr);
 }
 } // extern "C"
 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 800
 __device__ inline unsigned __reduce_add_sync(unsigned __mask,
                                             unsigned __value) {
  return __nvvm_redux_sync_add(__value, __mask);
 }
 __device__ inline unsigned __reduce_min_sync(unsigned __mask,
                                             unsigned __value) {
  return __nvvm_redux_sync_umin(__value, __mask);
 }
 __device__ inline unsigned __reduce_max_sync(unsigned __mask,
                                             unsigned __value) {
  return __nvvm_redux_sync_umax(__value, __mask);
 }
 __device__ inline int __reduce_min_sync(unsigned __mask, int __value) {
  return __nvvm_redux_sync_min(__value, __mask);
 }
 __device__ inline int __reduce_max_sync(unsigned __mask, int __value) {
  return __nvvm_redux_sync_max(__value, __mask);
 }
 __device__ inline unsigned __reduce_or_sync(unsigned __mask, unsigned __value) {
  return __nvvm_redux_sync_or(__value, __mask);
 }
 __device__ inline unsigned __reduce_and_sync(unsigned __mask,
                                             unsigned __value) {
  return __nvvm_redux_sync_and(__value, __mask);
 }
 __device__ inline unsigned __reduce_xor_sync(unsigned __mask,
                                             unsigned __value) {
  return __nvvm_redux_sync_xor(__value, __mask);
 }
 __device__ inline void __nv_memcpy_async_shared_global_4(void *__dst,
                                                         const void *__src,
                                                         unsigned __src_size) {
  __nvvm_cp_async_ca_shared_global_4(
      (void __attribute__((address_space(3))) *)__dst,
      (const void __attribute__((address_space(1))) *)__src, __src_size);
 }
 __device__ inline void __nv_memcpy_async_shared_global_8(void *__dst,
                                                         const void *__src,
                                                         unsigned __src_size) {
  __nvvm_cp_async_ca_shared_global_8(
      (void __attribute__((address_space(3))) *)__dst,
      (const void __attribute__((address_space(1))) *)__src, __src_size);
 }
 __device__ inline void __nv_memcpy_async_shared_global_16(void *__dst,
                                                          const void *__src,
                                                          unsigned __src_size) {
  __nvvm_cp_async_ca_shared_global_16(
      (void __attribute__((address_space(3))) *)__dst,
      (const void __attribute__((address_space(1))) *)__src, __src_size);
 }
 __device__ inline void *
 __nv_associate_access_property(const void *__ptr, unsigned long long __prop) {
  // TODO: it appears to provide compiler with some sort of a hint. We do not
  // know what exactly it is supposed to do. However, CUDA headers suggest that
  // just passing through __ptr should not affect correctness. They do so on
  // pre-sm80 GPUs where this builtin is not available.
  return (void*)__ptr;
 }
 #endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 800
 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 900
 __device__ inline unsigned __isCtaShared(const void *ptr) {
  return __isShared(ptr);
 }
 __device__ inline unsigned __isClusterShared(const void *__ptr) {
  return __nvvm_isspacep_shared_cluster(__ptr);
 }
 __device__ inline void *__cluster_map_shared_rank(const void *__ptr,
                                                  unsigned __rank) {
  return __nvvm_mapa((void *)__ptr, __rank);
 }
 __device__ inline unsigned __cluster_query_shared_rank(const void *__ptr) {
  return __nvvm_getctarank((void *)__ptr);
 }
 __device__ inline uint2
 __cluster_map_shared_multicast(const void *__ptr,
                               unsigned int __cluster_cta_mask) {
  return make_uint2((unsigned)__cvta_generic_to_shared(__ptr),
                    __cluster_cta_mask);
 }
 __device__ inline unsigned __clusterDimIsSpecified() {
  return __nvvm_is_explicit_cluster();
 }
 __device__ inline dim3 __clusterDim() {
  return dim3(__nvvm_read_ptx_sreg_cluster_nctaid_x(),
              __nvvm_read_ptx_sreg_cluster_nctaid_y(),
              __nvvm_read_ptx_sreg_cluster_nctaid_z());
 }
 __device__ inline dim3 __clusterRelativeBlockIdx() {
  return dim3(__nvvm_read_ptx_sreg_cluster_ctaid_x(),
              __nvvm_read_ptx_sreg_cluster_ctaid_y(),
              __nvvm_read_ptx_sreg_cluster_ctaid_z());
 }
 __device__ inline dim3 __clusterGridDimInClusters() {
  return dim3(__nvvm_read_ptx_sreg_nclusterid_x(),
              __nvvm_read_ptx_sreg_nclusterid_y(),
              __nvvm_read_ptx_sreg_nclusterid_z());
 }
 __device__ inline dim3 __clusterIdx() {
  return dim3(__nvvm_read_ptx_sreg_clusterid_x(),
              __nvvm_read_ptx_sreg_clusterid_y(),
              __nvvm_read_ptx_sreg_clusterid_z());
 }
 __device__ inline unsigned __clusterRelativeBlockRank() {
  return __nvvm_read_ptx_sreg_cluster_ctarank();
 }
 __device__ inline unsigned __clusterSizeInBlocks() {
  return __nvvm_read_ptx_sreg_cluster_nctarank();
 }
 __device__ inline void __cluster_barrier_arrive() {
  __nvvm_barrier_cluster_arrive();
 }
 __device__ inline void __cluster_barrier_arrive_relaxed() {
  __nvvm_barrier_cluster_arrive_relaxed();
 }
 __device__ inline void __cluster_barrier_wait() {
  __nvvm_barrier_cluster_wait();
 }
 __device__ inline void __threadfence_cluster() { __nvvm_fence_sc_cluster(); }
 __device__ inline float2 atomicAdd(float2 *__ptr, float2 __val) {
  float2 __ret;
  __asm__("atom.add.v2.f32         {%0, %1}, [%2], {%3, %4};"
          : "=f"(__ret.x), "=f"(__ret.y)
          : "l"(__ptr), "f"(__val.x), "f"(__val.y));
  return __ret;
 }
 __device__ inline float2 atomicAdd_block(float2 *__ptr, float2 __val) {
  float2 __ret;
  __asm__("atom.cta.add.v2.f32         {%0, %1}, [%2], {%3, %4};"
          : "=f"(__ret.x), "=f"(__ret.y)
          : "l"(__ptr), "f"(__val.x), "f"(__val.y));
  return __ret;
 }
 __device__ inline float2 atomicAdd_system(float2 *__ptr, float2 __val) {
  float2 __ret;
  __asm__("atom.sys.add.v2.f32         {%0, %1}, [%2], {%3, %4};"
          : "=f"(__ret.x), "=f"(__ret.y)
          : "l"(__ptr), "f"(__val.x), "f"(__val.y));
  return __ret;
 }
 __device__ inline float4 atomicAdd(float4 *__ptr, float4 __val) {
  float4 __ret;
  __asm__("atom.add.v4.f32         {%0, %1, %2, %3}, [%4], {%5, %6, %7, %8};"
          : "=f"(__ret.x), "=f"(__ret.y), "=f"(__ret.z), "=f"(__ret.w)
          : "l"(__ptr), "f"(__val.x), "f"(__val.y), "f"(__val.z), "f"(__val.w));
  return __ret;
 }
 __device__ inline float4 atomicAdd_block(float4 *__ptr, float4 __val) {
  float4 __ret;
  __asm__(
      "atom.cta.add.v4.f32         {%0, %1, %2, %3}, [%4], {%5, %6, %7, %8};"
      : "=f"(__ret.x), "=f"(__ret.y), "=f"(__ret.z), "=f"(__ret.w)
      : "l"(__ptr), "f"(__val.x), "f"(__val.y), "f"(__val.z), "f"(__val.w));
  return __ret;
 }
 __device__ inline float4 atomicAdd_system(float4 *__ptr, float4 __val) {
  float4 __ret;
  __asm__(
      "atom.sys.add.v4.f32         {%0, %1, %2, %3}, [%4], {%5, %6, %7, %8};"
      : "=f"(__ret.x), "=f"(__ret.y), "=f"(__ret.z), "=f"(__ret.w)
      : "l"(__ptr), "f"(__val.x), "f"(__val.y), "f"(__val.z), "f"(__val.w)
      :);
  return __ret;
 }
 #endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 900
 #endif // CUDA_VERSION >= 11000
 #endif // defined(__CLANG_CUDA_INTRINSICS_H__)
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__clang_cuda_libdevice_declares.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__clang_cuda_libdevice_declares.h
@@ -1,468 +0,0 @@
 /*===-- __clang_cuda_libdevice_declares.h - decls for libdevice functions --===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef __CLANG_CUDA_LIBDEVICE_DECLARES_H__
 #define __CLANG_CUDA_LIBDEVICE_DECLARES_H__
 #if defined(__cplusplus)
 extern "C" {
 #endif
 #if defined(__OPENMP_NVPTX__)
 #define __DEVICE__
 #pragma omp begin assumes ext_spmd_amenable no_openmp
 #elif defined(__CUDA__)
 #define __DEVICE__ __device__
 #endif
 __DEVICE__ int __nv_abs(int __a);
 __DEVICE__ double __nv_acos(double __a);
 __DEVICE__ float __nv_acosf(float __a);
 __DEVICE__ double __nv_acosh(double __a);
 __DEVICE__ float __nv_acoshf(float __a);
 __DEVICE__ double __nv_asin(double __a);
 __DEVICE__ float __nv_asinf(float __a);
 __DEVICE__ double __nv_asinh(double __a);
 __DEVICE__ float __nv_asinhf(float __a);
 __DEVICE__ double __nv_atan2(double __a, double __b);
 __DEVICE__ float __nv_atan2f(float __a, float __b);
 __DEVICE__ double __nv_atan(double __a);
 __DEVICE__ float __nv_atanf(float __a);
 __DEVICE__ double __nv_atanh(double __a);
 __DEVICE__ float __nv_atanhf(float __a);
 __DEVICE__ int __nv_brev(int __a);
 __DEVICE__ long long __nv_brevll(long long __a);
 __DEVICE__ int __nv_byte_perm(int __a, int __b, int __c);
 __DEVICE__ double __nv_cbrt(double __a);
 __DEVICE__ float __nv_cbrtf(float __a);
 __DEVICE__ double __nv_ceil(double __a);
 __DEVICE__ float __nv_ceilf(float __a);
 __DEVICE__ int __nv_clz(int __a);
 __DEVICE__ int __nv_clzll(long long __a);
 __DEVICE__ double __nv_copysign(double __a, double __b);
 __DEVICE__ float __nv_copysignf(float __a, float __b);
 __DEVICE__ double __nv_cos(double __a);
 __DEVICE__ float __nv_cosf(float __a);
 __DEVICE__ double __nv_cosh(double __a);
 __DEVICE__ float __nv_coshf(float __a);
 __DEVICE__ double __nv_cospi(double __a);
 __DEVICE__ float __nv_cospif(float __a);
 __DEVICE__ double __nv_cyl_bessel_i0(double __a);
 __DEVICE__ float __nv_cyl_bessel_i0f(float __a);
 __DEVICE__ double __nv_cyl_bessel_i1(double __a);
 __DEVICE__ float __nv_cyl_bessel_i1f(float __a);
 __DEVICE__ double __nv_dadd_rd(double __a, double __b);
 __DEVICE__ double __nv_dadd_rn(double __a, double __b);
 __DEVICE__ double __nv_dadd_ru(double __a, double __b);
 __DEVICE__ double __nv_dadd_rz(double __a, double __b);
 __DEVICE__ double __nv_ddiv_rd(double __a, double __b);
 __DEVICE__ double __nv_ddiv_rn(double __a, double __b);
 __DEVICE__ double __nv_ddiv_ru(double __a, double __b);
 __DEVICE__ double __nv_ddiv_rz(double __a, double __b);
 __DEVICE__ double __nv_dmul_rd(double __a, double __b);
 __DEVICE__ double __nv_dmul_rn(double __a, double __b);
 __DEVICE__ double __nv_dmul_ru(double __a, double __b);
 __DEVICE__ double __nv_dmul_rz(double __a, double __b);
 __DEVICE__ float __nv_double2float_rd(double __a);
 __DEVICE__ float __nv_double2float_rn(double __a);
 __DEVICE__ float __nv_double2float_ru(double __a);
 __DEVICE__ float __nv_double2float_rz(double __a);
 __DEVICE__ int __nv_double2hiint(double __a);
 __DEVICE__ int __nv_double2int_rd(double __a);
 __DEVICE__ int __nv_double2int_rn(double __a);
 __DEVICE__ int __nv_double2int_ru(double __a);
 __DEVICE__ int __nv_double2int_rz(double __a);
 __DEVICE__ long long __nv_double2ll_rd(double __a);
 __DEVICE__ long long __nv_double2ll_rn(double __a);
 __DEVICE__ long long __nv_double2ll_ru(double __a);
 __DEVICE__ long long __nv_double2ll_rz(double __a);
 __DEVICE__ int __nv_double2loint(double __a);
 __DEVICE__ unsigned int __nv_double2uint_rd(double __a);
 __DEVICE__ unsigned int __nv_double2uint_rn(double __a);
 __DEVICE__ unsigned int __nv_double2uint_ru(double __a);
 __DEVICE__ unsigned int __nv_double2uint_rz(double __a);
 __DEVICE__ unsigned long long __nv_double2ull_rd(double __a);
 __DEVICE__ unsigned long long __nv_double2ull_rn(double __a);
 __DEVICE__ unsigned long long __nv_double2ull_ru(double __a);
 __DEVICE__ unsigned long long __nv_double2ull_rz(double __a);
 __DEVICE__ unsigned long long __nv_double_as_longlong(double __a);
 __DEVICE__ double __nv_drcp_rd(double __a);
 __DEVICE__ double __nv_drcp_rn(double __a);
 __DEVICE__ double __nv_drcp_ru(double __a);
 __DEVICE__ double __nv_drcp_rz(double __a);
 __DEVICE__ double __nv_dsqrt_rd(double __a);
 __DEVICE__ double __nv_dsqrt_rn(double __a);
 __DEVICE__ double __nv_dsqrt_ru(double __a);
 __DEVICE__ double __nv_dsqrt_rz(double __a);
 __DEVICE__ double __nv_dsub_rd(double __a, double __b);
 __DEVICE__ double __nv_dsub_rn(double __a, double __b);
 __DEVICE__ double __nv_dsub_ru(double __a, double __b);
 __DEVICE__ double __nv_dsub_rz(double __a, double __b);
 __DEVICE__ double __nv_erfc(double __a);
 __DEVICE__ float __nv_erfcf(float __a);
 __DEVICE__ double __nv_erfcinv(double __a);
 __DEVICE__ float __nv_erfcinvf(float __a);
 __DEVICE__ double __nv_erfcx(double __a);
 __DEVICE__ float __nv_erfcxf(float __a);
 __DEVICE__ double __nv_erf(double __a);
 __DEVICE__ float __nv_erff(float __a);
 __DEVICE__ double __nv_erfinv(double __a);
 __DEVICE__ float __nv_erfinvf(float __a);
 __DEVICE__ double __nv_exp10(double __a);
 __DEVICE__ float __nv_exp10f(float __a);
 __DEVICE__ double __nv_exp2(double __a);
 __DEVICE__ float __nv_exp2f(float __a);
 __DEVICE__ double __nv_exp(double __a);
 __DEVICE__ float __nv_expf(float __a);
 __DEVICE__ double __nv_expm1(double __a);
 __DEVICE__ float __nv_expm1f(float __a);
 __DEVICE__ double __nv_fabs(double __a);
 __DEVICE__ float __nv_fabsf(float __a);
 __DEVICE__ float __nv_fadd_rd(float __a, float __b);
 __DEVICE__ float __nv_fadd_rn(float __a, float __b);
 __DEVICE__ float __nv_fadd_ru(float __a, float __b);
 __DEVICE__ float __nv_fadd_rz(float __a, float __b);
 __DEVICE__ float __nv_fast_cosf(float __a);
 __DEVICE__ float __nv_fast_exp10f(float __a);
 __DEVICE__ float __nv_fast_expf(float __a);
 __DEVICE__ float __nv_fast_fdividef(float __a, float __b);
 __DEVICE__ float __nv_fast_log10f(float __a);
 __DEVICE__ float __nv_fast_log2f(float __a);
 __DEVICE__ float __nv_fast_logf(float __a);
 __DEVICE__ float __nv_fast_powf(float __a, float __b);
 __DEVICE__ void __nv_fast_sincosf(float __a, float *__s, float *__c);
 __DEVICE__ float __nv_fast_sinf(float __a);
 __DEVICE__ float __nv_fast_tanf(float __a);
 __DEVICE__ double __nv_fdim(double __a, double __b);
 __DEVICE__ float __nv_fdimf(float __a, float __b);
 __DEVICE__ float __nv_fdiv_rd(float __a, float __b);
 __DEVICE__ float __nv_fdiv_rn(float __a, float __b);
 __DEVICE__ float __nv_fdiv_ru(float __a, float __b);
 __DEVICE__ float __nv_fdiv_rz(float __a, float __b);
 __DEVICE__ int __nv_ffs(int __a);
 __DEVICE__ int __nv_ffsll(long long __a);
 __DEVICE__ int __nv_finitef(float __a);
 __DEVICE__ unsigned short __nv_float2half_rn(float __a);
 __DEVICE__ int __nv_float2int_rd(float __a);
 __DEVICE__ int __nv_float2int_rn(float __a);
 __DEVICE__ int __nv_float2int_ru(float __a);
 __DEVICE__ int __nv_float2int_rz(float __a);
 __DEVICE__ long long __nv_float2ll_rd(float __a);
 __DEVICE__ long long __nv_float2ll_rn(float __a);
 __DEVICE__ long long __nv_float2ll_ru(float __a);
 __DEVICE__ long long __nv_float2ll_rz(float __a);
 __DEVICE__ unsigned int __nv_float2uint_rd(float __a);
 __DEVICE__ unsigned int __nv_float2uint_rn(float __a);
 __DEVICE__ unsigned int __nv_float2uint_ru(float __a);
 __DEVICE__ unsigned int __nv_float2uint_rz(float __a);
 __DEVICE__ unsigned long long __nv_float2ull_rd(float __a);
 __DEVICE__ unsigned long long __nv_float2ull_rn(float __a);
 __DEVICE__ unsigned long long __nv_float2ull_ru(float __a);
 __DEVICE__ unsigned long long __nv_float2ull_rz(float __a);
 __DEVICE__ int __nv_float_as_int(float __a);
 __DEVICE__ unsigned int __nv_float_as_uint(float __a);
 __DEVICE__ double __nv_floor(double __a);
 __DEVICE__ float __nv_floorf(float __a);
 __DEVICE__ double __nv_fma(double __a, double __b, double __c);
 __DEVICE__ float __nv_fmaf(float __a, float __b, float __c);
 __DEVICE__ float __nv_fmaf_ieee_rd(float __a, float __b, float __c);
 __DEVICE__ float __nv_fmaf_ieee_rn(float __a, float __b, float __c);
 __DEVICE__ float __nv_fmaf_ieee_ru(float __a, float __b, float __c);
 __DEVICE__ float __nv_fmaf_ieee_rz(float __a, float __b, float __c);
 __DEVICE__ float __nv_fmaf_rd(float __a, float __b, float __c);
 __DEVICE__ float __nv_fmaf_rn(float __a, float __b, float __c);
 __DEVICE__ float __nv_fmaf_ru(float __a, float __b, float __c);
 __DEVICE__ float __nv_fmaf_rz(float __a, float __b, float __c);
 __DEVICE__ double __nv_fma_rd(double __a, double __b, double __c);
 __DEVICE__ double __nv_fma_rn(double __a, double __b, double __c);
 __DEVICE__ double __nv_fma_ru(double __a, double __b, double __c);
 __DEVICE__ double __nv_fma_rz(double __a, double __b, double __c);
 __DEVICE__ double __nv_fmax(double __a, double __b);
 __DEVICE__ float __nv_fmaxf(float __a, float __b);
 __DEVICE__ double __nv_fmin(double __a, double __b);
 __DEVICE__ float __nv_fminf(float __a, float __b);
 __DEVICE__ double __nv_fmod(double __a, double __b);
 __DEVICE__ float __nv_fmodf(float __a, float __b);
 __DEVICE__ float __nv_fmul_rd(float __a, float __b);
 __DEVICE__ float __nv_fmul_rn(float __a, float __b);
 __DEVICE__ float __nv_fmul_ru(float __a, float __b);
 __DEVICE__ float __nv_fmul_rz(float __a, float __b);
 __DEVICE__ float __nv_frcp_rd(float __a);
 __DEVICE__ float __nv_frcp_rn(float __a);
 __DEVICE__ float __nv_frcp_ru(float __a);
 __DEVICE__ float __nv_frcp_rz(float __a);
 __DEVICE__ double __nv_frexp(double __a, int *__b);
 __DEVICE__ float __nv_frexpf(float __a, int *__b);
 __DEVICE__ float __nv_frsqrt_rn(float __a);
 __DEVICE__ float __nv_fsqrt_rd(float __a);
 __DEVICE__ float __nv_fsqrt_rn(float __a);
 __DEVICE__ float __nv_fsqrt_ru(float __a);
 __DEVICE__ float __nv_fsqrt_rz(float __a);
 __DEVICE__ float __nv_fsub_rd(float __a, float __b);
 __DEVICE__ float __nv_fsub_rn(float __a, float __b);
 __DEVICE__ float __nv_fsub_ru(float __a, float __b);
 __DEVICE__ float __nv_fsub_rz(float __a, float __b);
 __DEVICE__ int __nv_hadd(int __a, int __b);
 __DEVICE__ float __nv_half2float(unsigned short __h);
 __DEVICE__ double __nv_hiloint2double(int __a, int __b);
 __DEVICE__ double __nv_hypot(double __a, double __b);
 __DEVICE__ float __nv_hypotf(float __a, float __b);
 __DEVICE__ int __nv_ilogb(double __a);
 __DEVICE__ int __nv_ilogbf(float __a);
 __DEVICE__ double __nv_int2double_rn(int __a);
 __DEVICE__ float __nv_int2float_rd(int __a);
 __DEVICE__ float __nv_int2float_rn(int __a);
 __DEVICE__ float __nv_int2float_ru(int __a);
 __DEVICE__ float __nv_int2float_rz(int __a);
 __DEVICE__ float __nv_int_as_float(int __a);
 __DEVICE__ int __nv_isfinited(double __a);
 __DEVICE__ int __nv_isinfd(double __a);
 __DEVICE__ int __nv_isinff(float __a);
 __DEVICE__ int __nv_isnand(double __a);
 __DEVICE__ int __nv_isnanf(float __a);
 __DEVICE__ double __nv_j0(double __a);
 __DEVICE__ float __nv_j0f(float __a);
 __DEVICE__ double __nv_j1(double __a);
 __DEVICE__ float __nv_j1f(float __a);
 __DEVICE__ float __nv_jnf(int __a, float __b);
 __DEVICE__ double __nv_jn(int __a, double __b);
 __DEVICE__ double __nv_ldexp(double __a, int __b);
 __DEVICE__ float __nv_ldexpf(float __a, int __b);
 __DEVICE__ double __nv_lgamma(double __a);
 __DEVICE__ float __nv_lgammaf(float __a);
 __DEVICE__ double __nv_ll2double_rd(long long __a);
 __DEVICE__ double __nv_ll2double_rn(long long __a);
 __DEVICE__ double __nv_ll2double_ru(long long __a);
 __DEVICE__ double __nv_ll2double_rz(long long __a);
 __DEVICE__ float __nv_ll2float_rd(long long __a);
 __DEVICE__ float __nv_ll2float_rn(long long __a);
 __DEVICE__ float __nv_ll2float_ru(long long __a);
 __DEVICE__ float __nv_ll2float_rz(long long __a);
 __DEVICE__ long long __nv_llabs(long long __a);
 __DEVICE__ long long __nv_llmax(long long __a, long long __b);
 __DEVICE__ long long __nv_llmin(long long __a, long long __b);
 __DEVICE__ long long __nv_llrint(double __a);
 __DEVICE__ long long __nv_llrintf(float __a);
 __DEVICE__ long long __nv_llround(double __a);
 __DEVICE__ long long __nv_llroundf(float __a);
 __DEVICE__ double __nv_log10(double __a);
 __DEVICE__ float __nv_log10f(float __a);
 __DEVICE__ double __nv_log1p(double __a);
 __DEVICE__ float __nv_log1pf(float __a);
 __DEVICE__ double __nv_log2(double __a);
 __DEVICE__ float __nv_log2f(float __a);
 __DEVICE__ double __nv_logb(double __a);
 __DEVICE__ float __nv_logbf(float __a);
 __DEVICE__ double __nv_log(double __a);
 __DEVICE__ float __nv_logf(float __a);
 __DEVICE__ double __nv_longlong_as_double(long long __a);
 __DEVICE__ int __nv_max(int __a, int __b);
 __DEVICE__ int __nv_min(int __a, int __b);
 __DEVICE__ double __nv_modf(double __a, double *__b);
 __DEVICE__ float __nv_modff(float __a, float *__b);
 __DEVICE__ int __nv_mul24(int __a, int __b);
 __DEVICE__ long long __nv_mul64hi(long long __a, long long __b);
 __DEVICE__ int __nv_mulhi(int __a, int __b);
 __DEVICE__ double __nv_nan(const signed char *__a);
 __DEVICE__ float __nv_nanf(const signed char *__a);
 __DEVICE__ double __nv_nearbyint(double __a);
 __DEVICE__ float __nv_nearbyintf(float __a);
 __DEVICE__ double __nv_nextafter(double __a, double __b);
 __DEVICE__ float __nv_nextafterf(float __a, float __b);
 __DEVICE__ double __nv_norm3d(double __a, double __b, double __c);
 __DEVICE__ float __nv_norm3df(float __a, float __b, float __c);
 __DEVICE__ double __nv_norm4d(double __a, double __b, double __c, double __d);
 __DEVICE__ float __nv_norm4df(float __a, float __b, float __c, float __d);
 __DEVICE__ double __nv_normcdf(double __a);
 __DEVICE__ float __nv_normcdff(float __a);
 __DEVICE__ double __nv_normcdfinv(double __a);
 __DEVICE__ float __nv_normcdfinvf(float __a);
 __DEVICE__ float __nv_normf(int __a, const float *__b);
 __DEVICE__ double __nv_norm(int __a, const double *__b);
 __DEVICE__ int __nv_popc(unsigned int __a);
 __DEVICE__ int __nv_popcll(unsigned long long __a);
 __DEVICE__ double __nv_pow(double __a, double __b);
 __DEVICE__ float __nv_powf(float __a, float __b);
 __DEVICE__ double __nv_powi(double __a, int __b);
 __DEVICE__ float __nv_powif(float __a, int __b);
 __DEVICE__ double __nv_rcbrt(double __a);
 __DEVICE__ float __nv_rcbrtf(float __a);
 __DEVICE__ double __nv_rcp64h(double __a);
 __DEVICE__ double __nv_remainder(double __a, double __b);
 __DEVICE__ float __nv_remainderf(float __a, float __b);
 __DEVICE__ double __nv_remquo(double __a, double __b, int *__c);
 __DEVICE__ float __nv_remquof(float __a, float __b, int *__c);
 __DEVICE__ int __nv_rhadd(int __a, int __b);
 __DEVICE__ double __nv_rhypot(double __a, double __b);
 __DEVICE__ float __nv_rhypotf(float __a, float __b);
 __DEVICE__ double __nv_rint(double __a);
 __DEVICE__ float __nv_rintf(float __a);
 __DEVICE__ double __nv_rnorm3d(double __a, double __b, double __c);
 __DEVICE__ float __nv_rnorm3df(float __a, float __b, float __c);
 __DEVICE__ double __nv_rnorm4d(double __a, double __b, double __c, double __d);
 __DEVICE__ float __nv_rnorm4df(float __a, float __b, float __c, float __d);
 __DEVICE__ float __nv_rnormf(int __a, const float *__b);
 __DEVICE__ double __nv_rnorm(int __a, const double *__b);
 __DEVICE__ double __nv_round(double __a);
 __DEVICE__ float __nv_roundf(float __a);
 __DEVICE__ double __nv_rsqrt(double __a);
 __DEVICE__ float __nv_rsqrtf(float __a);
 __DEVICE__ int __nv_sad(int __a, int __b, int __c);
 __DEVICE__ float __nv_saturatef(float __a);
 __DEVICE__ double __nv_scalbn(double __a, int __b);
 __DEVICE__ float __nv_scalbnf(float __a, int __b);
 __DEVICE__ int __nv_signbitd(double __a);
 __DEVICE__ int __nv_signbitf(float __a);
 __DEVICE__ void __nv_sincos(double __a, double *__b, double *__c);
 __DEVICE__ void __nv_sincosf(float __a, float *__b, float *__c);
 __DEVICE__ void __nv_sincospi(double __a, double *__b, double *__c);
 __DEVICE__ void __nv_sincospif(float __a, float *__b, float *__c);
 __DEVICE__ double __nv_sin(double __a);
 __DEVICE__ float __nv_sinf(float __a);
 __DEVICE__ double __nv_sinh(double __a);
 __DEVICE__ float __nv_sinhf(float __a);
 __DEVICE__ double __nv_sinpi(double __a);
 __DEVICE__ float __nv_sinpif(float __a);
 __DEVICE__ double __nv_sqrt(double __a);
 __DEVICE__ float __nv_sqrtf(float __a);
 __DEVICE__ double __nv_tan(double __a);
 __DEVICE__ float __nv_tanf(float __a);
 __DEVICE__ double __nv_tanh(double __a);
 __DEVICE__ float __nv_tanhf(float __a);
 __DEVICE__ double __nv_tgamma(double __a);
 __DEVICE__ float __nv_tgammaf(float __a);
 __DEVICE__ double __nv_trunc(double __a);
 __DEVICE__ float __nv_truncf(float __a);
 __DEVICE__ int __nv_uhadd(unsigned int __a, unsigned int __b);
 __DEVICE__ double __nv_uint2double_rn(unsigned int __i);
 __DEVICE__ float __nv_uint2float_rd(unsigned int __a);
 __DEVICE__ float __nv_uint2float_rn(unsigned int __a);
 __DEVICE__ float __nv_uint2float_ru(unsigned int __a);
 __DEVICE__ float __nv_uint2float_rz(unsigned int __a);
 __DEVICE__ float __nv_uint_as_float(unsigned int __a);
 __DEVICE__ double __nv_ull2double_rd(unsigned long long __a);
 __DEVICE__ double __nv_ull2double_rn(unsigned long long __a);
 __DEVICE__ double __nv_ull2double_ru(unsigned long long __a);
 __DEVICE__ double __nv_ull2double_rz(unsigned long long __a);
 __DEVICE__ float __nv_ull2float_rd(unsigned long long __a);
 __DEVICE__ float __nv_ull2float_rn(unsigned long long __a);
 __DEVICE__ float __nv_ull2float_ru(unsigned long long __a);
 __DEVICE__ float __nv_ull2float_rz(unsigned long long __a);
 __DEVICE__ unsigned long long __nv_ullmax(unsigned long long __a,
                                          unsigned long long __b);
 __DEVICE__ unsigned long long __nv_ullmin(unsigned long long __a,
                                          unsigned long long __b);
 __DEVICE__ unsigned int __nv_umax(unsigned int __a, unsigned int __b);
 __DEVICE__ unsigned int __nv_umin(unsigned int __a, unsigned int __b);
 __DEVICE__ unsigned int __nv_umul24(unsigned int __a, unsigned int __b);
 __DEVICE__ unsigned long long __nv_umul64hi(unsigned long long __a,
                                            unsigned long long __b);
 __DEVICE__ unsigned int __nv_umulhi(unsigned int __a, unsigned int __b);
 __DEVICE__ unsigned int __nv_urhadd(unsigned int __a, unsigned int __b);
 __DEVICE__ unsigned int __nv_usad(unsigned int __a, unsigned int __b,
                                  unsigned int __c);
 #if CUDA_VERSION >= 9000 && CUDA_VERSION < 9020
 __DEVICE__ int __nv_vabs2(int __a);
 __DEVICE__ int __nv_vabs4(int __a);
 __DEVICE__ int __nv_vabsdiffs2(int __a, int __b);
 __DEVICE__ int __nv_vabsdiffs4(int __a, int __b);
 __DEVICE__ int __nv_vabsdiffu2(int __a, int __b);
 __DEVICE__ int __nv_vabsdiffu4(int __a, int __b);
 __DEVICE__ int __nv_vabsss2(int __a);
 __DEVICE__ int __nv_vabsss4(int __a);
 __DEVICE__ int __nv_vadd2(int __a, int __b);
 __DEVICE__ int __nv_vadd4(int __a, int __b);
 __DEVICE__ int __nv_vaddss2(int __a, int __b);
 __DEVICE__ int __nv_vaddss4(int __a, int __b);
 __DEVICE__ int __nv_vaddus2(int __a, int __b);
 __DEVICE__ int __nv_vaddus4(int __a, int __b);
 __DEVICE__ int __nv_vavgs2(int __a, int __b);
 __DEVICE__ int __nv_vavgs4(int __a, int __b);
 __DEVICE__ int __nv_vavgu2(int __a, int __b);
 __DEVICE__ int __nv_vavgu4(int __a, int __b);
 __DEVICE__ int __nv_vcmpeq2(int __a, int __b);
 __DEVICE__ int __nv_vcmpeq4(int __a, int __b);
 __DEVICE__ int __nv_vcmpges2(int __a, int __b);
 __DEVICE__ int __nv_vcmpges4(int __a, int __b);
 __DEVICE__ int __nv_vcmpgeu2(int __a, int __b);
 __DEVICE__ int __nv_vcmpgeu4(int __a, int __b);
 __DEVICE__ int __nv_vcmpgts2(int __a, int __b);
 __DEVICE__ int __nv_vcmpgts4(int __a, int __b);
 __DEVICE__ int __nv_vcmpgtu2(int __a, int __b);
 __DEVICE__ int __nv_vcmpgtu4(int __a, int __b);
 __DEVICE__ int __nv_vcmples2(int __a, int __b);
 __DEVICE__ int __nv_vcmples4(int __a, int __b);
 __DEVICE__ int __nv_vcmpleu2(int __a, int __b);
 __DEVICE__ int __nv_vcmpleu4(int __a, int __b);
 __DEVICE__ int __nv_vcmplts2(int __a, int __b);
 __DEVICE__ int __nv_vcmplts4(int __a, int __b);
 __DEVICE__ int __nv_vcmpltu2(int __a, int __b);
 __DEVICE__ int __nv_vcmpltu4(int __a, int __b);
 __DEVICE__ int __nv_vcmpne2(int __a, int __b);
 __DEVICE__ int __nv_vcmpne4(int __a, int __b);
 __DEVICE__ int __nv_vhaddu2(int __a, int __b);
 __DEVICE__ int __nv_vhaddu4(int __a, int __b);
 __DEVICE__ int __nv_vmaxs2(int __a, int __b);
 __DEVICE__ int __nv_vmaxs4(int __a, int __b);
 __DEVICE__ int __nv_vmaxu2(int __a, int __b);
 __DEVICE__ int __nv_vmaxu4(int __a, int __b);
 __DEVICE__ int __nv_vmins2(int __a, int __b);
 __DEVICE__ int __nv_vmins4(int __a, int __b);
 __DEVICE__ int __nv_vminu2(int __a, int __b);
 __DEVICE__ int __nv_vminu4(int __a, int __b);
 __DEVICE__ int __nv_vneg2(int __a);
 __DEVICE__ int __nv_vneg4(int __a);
 __DEVICE__ int __nv_vnegss2(int __a);
 __DEVICE__ int __nv_vnegss4(int __a);
 __DEVICE__ int __nv_vsads2(int __a, int __b);
 __DEVICE__ int __nv_vsads4(int __a, int __b);
 __DEVICE__ int __nv_vsadu2(int __a, int __b);
 __DEVICE__ int __nv_vsadu4(int __a, int __b);
 __DEVICE__ int __nv_vseteq2(int __a, int __b);
 __DEVICE__ int __nv_vseteq4(int __a, int __b);
 __DEVICE__ int __nv_vsetges2(int __a, int __b);
 __DEVICE__ int __nv_vsetges4(int __a, int __b);
 __DEVICE__ int __nv_vsetgeu2(int __a, int __b);
 __DEVICE__ int __nv_vsetgeu4(int __a, int __b);
 __DEVICE__ int __nv_vsetgts2(int __a, int __b);
 __DEVICE__ int __nv_vsetgts4(int __a, int __b);
 __DEVICE__ int __nv_vsetgtu2(int __a, int __b);
 __DEVICE__ int __nv_vsetgtu4(int __a, int __b);
 __DEVICE__ int __nv_vsetles2(int __a, int __b);
 __DEVICE__ int __nv_vsetles4(int __a, int __b);
 __DEVICE__ int __nv_vsetleu2(int __a, int __b);
 __DEVICE__ int __nv_vsetleu4(int __a, int __b);
 __DEVICE__ int __nv_vsetlts2(int __a, int __b);
 __DEVICE__ int __nv_vsetlts4(int __a, int __b);
 __DEVICE__ int __nv_vsetltu2(int __a, int __b);
 __DEVICE__ int __nv_vsetltu4(int __a, int __b);
 __DEVICE__ int __nv_vsetne2(int __a, int __b);
 __DEVICE__ int __nv_vsetne4(int __a, int __b);
 __DEVICE__ int __nv_vsub2(int __a, int __b);
 __DEVICE__ int __nv_vsub4(int __a, int __b);
 __DEVICE__ int __nv_vsubss2(int __a, int __b);
 __DEVICE__ int __nv_vsubss4(int __a, int __b);
 __DEVICE__ int __nv_vsubus2(int __a, int __b);
 __DEVICE__ int __nv_vsubus4(int __a, int __b);
 #endif  // CUDA_VERSION
 __DEVICE__ double __nv_y0(double __a);
 __DEVICE__ float __nv_y0f(float __a);
 __DEVICE__ double __nv_y1(double __a);
 __DEVICE__ float __nv_y1f(float __a);
 __DEVICE__ float __nv_ynf(int __a, float __b);
 __DEVICE__ double __nv_yn(int __a, double __b);
 #if defined(__OPENMP_NVPTX__)
 #pragma omp end assumes ext_spmd_amenable no_openmp
 #endif
 #if defined(__cplusplus)
 } // extern "C"
 #endif
 #endif // __CLANG_CUDA_LIBDEVICE_DECLARES_H__
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__clang_cuda_math.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__clang_cuda_math.h
@@ -1,353 +0,0 @@
 /*===---- __clang_cuda_math.h - Device-side CUDA math support --------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef __CLANG_CUDA_MATH_H__
 #define __CLANG_CUDA_MATH_H__
 #ifndef __CUDA__
 #error "This file is for CUDA compilation only."
 #endif
 // The __CLANG_GPU_DISABLE_MATH_WRAPPERS macro provides a way to let standard
 // libcalls reach the link step instead of being eagerly replaced.
 #ifndef __CLANG_GPU_DISABLE_MATH_WRAPPERS
 #ifndef __OPENMP_NVPTX__
 #if CUDA_VERSION < 9000
 #error This file is intended to be used with CUDA-9+ only.
 #endif
 #endif
 // __DEVICE__ is a helper macro with common set of attributes for the wrappers
 // we implement in this file. We need static in order to avoid emitting unused
 // functions and __forceinline__ helps inlining these wrappers at -O1.
 #pragma push_macro("__DEVICE__")
 #ifdef __OPENMP_NVPTX__
 #if defined(__cplusplus)
 #define __DEVICE__ static constexpr __attribute__((always_inline, nothrow))
 #else
 #define __DEVICE__ static __attribute__((always_inline, nothrow))
 #endif
 #else
 #define __DEVICE__ static __device__ __forceinline__
 #endif
 // Specialized version of __DEVICE__ for functions with void return type. Needed
 // because the OpenMP overlay requires constexpr functions here but prior to
 // c++14 void return functions could not be constexpr.
 #pragma push_macro("__DEVICE_VOID__")
 #if defined(__OPENMP_NVPTX__) && defined(__cplusplus) && __cplusplus < 201402L
 #define __DEVICE_VOID__ static __attribute__((always_inline, nothrow))
 #else
 #define __DEVICE_VOID__ __DEVICE__
 #endif
 // libdevice provides fast low precision and slow full-recision implementations
 // for some functions. Which one gets selected depends on
 // __CLANG_CUDA_APPROX_TRANSCENDENTALS__ which gets defined by clang if
 // -ffast-math or -fgpu-approx-transcendentals are in effect.
 #pragma push_macro("__FAST_OR_SLOW")
 #if defined(__CLANG_GPU_APPROX_TRANSCENDENTALS__)
 #define __FAST_OR_SLOW(fast, slow) fast
 #else
 #define __FAST_OR_SLOW(fast, slow) slow
 #endif
 __DEVICE__ int abs(int __a) { return __nv_abs(__a); }
 __DEVICE__ double fabs(double __a) { return __nv_fabs(__a); }
 __DEVICE__ double acos(double __a) { return __nv_acos(__a); }
 __DEVICE__ float acosf(float __a) { return __nv_acosf(__a); }
 __DEVICE__ double acosh(double __a) { return __nv_acosh(__a); }
 __DEVICE__ float acoshf(float __a) { return __nv_acoshf(__a); }
 __DEVICE__ double asin(double __a) { return __nv_asin(__a); }
 __DEVICE__ float asinf(float __a) { return __nv_asinf(__a); }
 __DEVICE__ double asinh(double __a) { return __nv_asinh(__a); }
 __DEVICE__ float asinhf(float __a) { return __nv_asinhf(__a); }
 __DEVICE__ double atan(double __a) { return __nv_atan(__a); }
 __DEVICE__ double atan2(double __a, double __b) { return __nv_atan2(__a, __b); }
 __DEVICE__ float atan2f(float __a, float __b) { return __nv_atan2f(__a, __b); }
 __DEVICE__ float atanf(float __a) { return __nv_atanf(__a); }
 __DEVICE__ double atanh(double __a) { return __nv_atanh(__a); }
 __DEVICE__ float atanhf(float __a) { return __nv_atanhf(__a); }
 __DEVICE__ double cbrt(double __a) { return __nv_cbrt(__a); }
 __DEVICE__ float cbrtf(float __a) { return __nv_cbrtf(__a); }
 __DEVICE__ double ceil(double __a) { return __nv_ceil(__a); }
 __DEVICE__ float ceilf(float __a) { return __nv_ceilf(__a); }
 __DEVICE__ double copysign(double __a, double __b) {
  return __nv_copysign(__a, __b);
 }
 __DEVICE__ float copysignf(float __a, float __b) {
  return __nv_copysignf(__a, __b);
 }
 __DEVICE__ double cos(double __a) { return __nv_cos(__a); }
 __DEVICE__ float cosf(float __a) {
  return __FAST_OR_SLOW(__nv_fast_cosf, __nv_cosf)(__a);
 }
 __DEVICE__ double cosh(double __a) { return __nv_cosh(__a); }
 __DEVICE__ float coshf(float __a) { return __nv_coshf(__a); }
 __DEVICE__ double cospi(double __a) { return __nv_cospi(__a); }
 __DEVICE__ float cospif(float __a) { return __nv_cospif(__a); }
 __DEVICE__ double cyl_bessel_i0(double __a) { return __nv_cyl_bessel_i0(__a); }
 __DEVICE__ float cyl_bessel_i0f(float __a) { return __nv_cyl_bessel_i0f(__a); }
 __DEVICE__ double cyl_bessel_i1(double __a) { return __nv_cyl_bessel_i1(__a); }
 __DEVICE__ float cyl_bessel_i1f(float __a) { return __nv_cyl_bessel_i1f(__a); }
 __DEVICE__ double erf(double __a) { return __nv_erf(__a); }
 __DEVICE__ double erfc(double __a) { return __nv_erfc(__a); }
 __DEVICE__ float erfcf(float __a) { return __nv_erfcf(__a); }
 __DEVICE__ double erfcinv(double __a) { return __nv_erfcinv(__a); }
 __DEVICE__ float erfcinvf(float __a) { return __nv_erfcinvf(__a); }
 __DEVICE__ double erfcx(double __a) { return __nv_erfcx(__a); }
 __DEVICE__ float erfcxf(float __a) { return __nv_erfcxf(__a); }
 __DEVICE__ float erff(float __a) { return __nv_erff(__a); }
 __DEVICE__ double erfinv(double __a) { return __nv_erfinv(__a); }
 __DEVICE__ float erfinvf(float __a) { return __nv_erfinvf(__a); }
 __DEVICE__ double exp(double __a) { return __nv_exp(__a); }
 __DEVICE__ double exp10(double __a) { return __nv_exp10(__a); }
 __DEVICE__ float exp10f(float __a) { return __nv_exp10f(__a); }
 __DEVICE__ double exp2(double __a) { return __nv_exp2(__a); }
 __DEVICE__ float exp2f(float __a) { return __nv_exp2f(__a); }
 __DEVICE__ float expf(float __a) { return __nv_expf(__a); }
 __DEVICE__ double expm1(double __a) { return __nv_expm1(__a); }
 __DEVICE__ float expm1f(float __a) { return __nv_expm1f(__a); }
 __DEVICE__ float fabsf(float __a) { return __nv_fabsf(__a); }
 __DEVICE__ double fdim(double __a, double __b) { return __nv_fdim(__a, __b); }
 __DEVICE__ float fdimf(float __a, float __b) { return __nv_fdimf(__a, __b); }
 __DEVICE__ double fdivide(double __a, double __b) { return __a / __b; }
 __DEVICE__ float fdividef(float __a, float __b) {
 #if __FAST_MATH__ && !__CUDA_PREC_DIV
  return __nv_fast_fdividef(__a, __b);
 #else
  return __a / __b;
 #endif
 }
 __DEVICE__ double floor(double __f) { return __nv_floor(__f); }
 __DEVICE__ float floorf(float __f) { return __nv_floorf(__f); }
 __DEVICE__ double fma(double __a, double __b, double __c) {
  return __nv_fma(__a, __b, __c);
 }
 __DEVICE__ float fmaf(float __a, float __b, float __c) {
  return __nv_fmaf(__a, __b, __c);
 }
 __DEVICE__ double fmax(double __a, double __b) { return __nv_fmax(__a, __b); }
 __DEVICE__ float fmaxf(float __a, float __b) { return __nv_fmaxf(__a, __b); }
 __DEVICE__ double fmin(double __a, double __b) { return __nv_fmin(__a, __b); }
 __DEVICE__ float fminf(float __a, float __b) { return __nv_fminf(__a, __b); }
 __DEVICE__ double fmod(double __a, double __b) { return __nv_fmod(__a, __b); }
 __DEVICE__ float fmodf(float __a, float __b) { return __nv_fmodf(__a, __b); }
 __DEVICE__ double frexp(double __a, int *__b) { return __nv_frexp(__a, __b); }
 __DEVICE__ float frexpf(float __a, int *__b) { return __nv_frexpf(__a, __b); }
 __DEVICE__ double hypot(double __a, double __b) { return __nv_hypot(__a, __b); }
 __DEVICE__ float hypotf(float __a, float __b) { return __nv_hypotf(__a, __b); }
 __DEVICE__ int ilogb(double __a) { return __nv_ilogb(__a); }
 __DEVICE__ int ilogbf(float __a) { return __nv_ilogbf(__a); }
 __DEVICE__ double j0(double __a) { return __nv_j0(__a); }
 __DEVICE__ float j0f(float __a) { return __nv_j0f(__a); }
 __DEVICE__ double j1(double __a) { return __nv_j1(__a); }
 __DEVICE__ float j1f(float __a) { return __nv_j1f(__a); }
 __DEVICE__ double jn(int __n, double __a) { return __nv_jn(__n, __a); }
 __DEVICE__ float jnf(int __n, float __a) { return __nv_jnf(__n, __a); }
 #if defined(__LP64__) || defined(_WIN64)
 __DEVICE__ long labs(long __a) { return __nv_llabs(__a); };
 #else
 __DEVICE__ long labs(long __a) { return __nv_abs(__a); };
 #endif
 __DEVICE__ double ldexp(double __a, int __b) { return __nv_ldexp(__a, __b); }
 __DEVICE__ float ldexpf(float __a, int __b) { return __nv_ldexpf(__a, __b); }
 __DEVICE__ double lgamma(double __a) { return __nv_lgamma(__a); }
 __DEVICE__ float lgammaf(float __a) { return __nv_lgammaf(__a); }
 __DEVICE__ long long llabs(long long __a) { return __nv_llabs(__a); }
 __DEVICE__ long long llmax(long long __a, long long __b) {
  return __nv_llmax(__a, __b);
 }
 __DEVICE__ long long llmin(long long __a, long long __b) {
  return __nv_llmin(__a, __b);
 }
 __DEVICE__ long long llrint(double __a) { return __nv_llrint(__a); }
 __DEVICE__ long long llrintf(float __a) { return __nv_llrintf(__a); }
 __DEVICE__ long long llround(double __a) { return __nv_llround(__a); }
 __DEVICE__ long long llroundf(float __a) { return __nv_llroundf(__a); }
 __DEVICE__ double round(double __a) { return __nv_round(__a); }
 __DEVICE__ float roundf(float __a) { return __nv_roundf(__a); }
 __DEVICE__ double log(double __a) { return __nv_log(__a); }
 __DEVICE__ double log10(double __a) { return __nv_log10(__a); }
 __DEVICE__ float log10f(float __a) { return __nv_log10f(__a); }
 __DEVICE__ double log1p(double __a) { return __nv_log1p(__a); }
 __DEVICE__ float log1pf(float __a) { return __nv_log1pf(__a); }
 __DEVICE__ double log2(double __a) { return __nv_log2(__a); }
 __DEVICE__ float log2f(float __a) {
  return __FAST_OR_SLOW(__nv_fast_log2f, __nv_log2f)(__a);
 }
 __DEVICE__ double logb(double __a) { return __nv_logb(__a); }
 __DEVICE__ float logbf(float __a) { return __nv_logbf(__a); }
 __DEVICE__ float logf(float __a) {
  return __FAST_OR_SLOW(__nv_fast_logf, __nv_logf)(__a);
 }
 #if defined(__LP64__) || defined(_WIN64)
 __DEVICE__ long lrint(double __a) { return llrint(__a); }
 __DEVICE__ long lrintf(float __a) { return __float2ll_rn(__a); }
 __DEVICE__ long lround(double __a) { return llround(__a); }
 __DEVICE__ long lroundf(float __a) { return llroundf(__a); }
 #else
 __DEVICE__ long lrint(double __a) { return (long)rint(__a); }
 __DEVICE__ long lrintf(float __a) { return __float2int_rn(__a); }
 __DEVICE__ long lround(double __a) { return round(__a); }
 __DEVICE__ long lroundf(float __a) { return roundf(__a); }
 #endif
 __DEVICE__ int max(int __a, int __b) { return __nv_max(__a, __b); }
 __DEVICE__ int min(int __a, int __b) { return __nv_min(__a, __b); }
 __DEVICE__ double modf(double __a, double *__b) { return __nv_modf(__a, __b); }
 __DEVICE__ float modff(float __a, float *__b) { return __nv_modff(__a, __b); }
 __DEVICE__ double nearbyint(double __a) { return __builtin_nearbyint(__a); }
 __DEVICE__ float nearbyintf(float __a) { return __builtin_nearbyintf(__a); }
 __DEVICE__ double nextafter(double __a, double __b) {
  return __nv_nextafter(__a, __b);
 }
 __DEVICE__ float nextafterf(float __a, float __b) {
  return __nv_nextafterf(__a, __b);
 }
 __DEVICE__ double norm(int __dim, const double *__t) {
  return __nv_norm(__dim, __t);
 }
 __DEVICE__ double norm3d(double __a, double __b, double __c) {
  return __nv_norm3d(__a, __b, __c);
 }
 __DEVICE__ float norm3df(float __a, float __b, float __c) {
  return __nv_norm3df(__a, __b, __c);
 }
 __DEVICE__ double norm4d(double __a, double __b, double __c, double __d) {
  return __nv_norm4d(__a, __b, __c, __d);
 }
 __DEVICE__ float norm4df(float __a, float __b, float __c, float __d) {
  return __nv_norm4df(__a, __b, __c, __d);
 }
 __DEVICE__ double normcdf(double __a) { return __nv_normcdf(__a); }
 __DEVICE__ float normcdff(float __a) { return __nv_normcdff(__a); }
 __DEVICE__ double normcdfinv(double __a) { return __nv_normcdfinv(__a); }
 __DEVICE__ float normcdfinvf(float __a) { return __nv_normcdfinvf(__a); }
 __DEVICE__ float normf(int __dim, const float *__t) {
  return __nv_normf(__dim, __t);
 }
 __DEVICE__ double pow(double __a, double __b) { return __nv_pow(__a, __b); }
 __DEVICE__ float powf(float __a, float __b) { return __nv_powf(__a, __b); }
 __DEVICE__ double powi(double __a, int __b) { return __nv_powi(__a, __b); }
 __DEVICE__ float powif(float __a, int __b) { return __nv_powif(__a, __b); }
 __DEVICE__ double rcbrt(double __a) { return __nv_rcbrt(__a); }
 __DEVICE__ float rcbrtf(float __a) { return __nv_rcbrtf(__a); }
 __DEVICE__ double remainder(double __a, double __b) {
  return __nv_remainder(__a, __b);
 }
 __DEVICE__ float remainderf(float __a, float __b) {
  return __nv_remainderf(__a, __b);
 }
 __DEVICE__ double remquo(double __a, double __b, int *__c) {
  return __nv_remquo(__a, __b, __c);
 }
 __DEVICE__ float remquof(float __a, float __b, int *__c) {
  return __nv_remquof(__a, __b, __c);
 }
 __DEVICE__ double rhypot(double __a, double __b) {
  return __nv_rhypot(__a, __b);
 }
 __DEVICE__ float rhypotf(float __a, float __b) {
  return __nv_rhypotf(__a, __b);
 }
 // __nv_rint* in libdevice is buggy and produces incorrect results.
 __DEVICE__ double rint(double __a) { return __builtin_rint(__a); }
 __DEVICE__ float rintf(float __a) { return __builtin_rintf(__a); }
 __DEVICE__ double rnorm(int __a, const double *__b) {
  return __nv_rnorm(__a, __b);
 }
 __DEVICE__ double rnorm3d(double __a, double __b, double __c) {
  return __nv_rnorm3d(__a, __b, __c);
 }
 __DEVICE__ float rnorm3df(float __a, float __b, float __c) {
  return __nv_rnorm3df(__a, __b, __c);
 }
 __DEVICE__ double rnorm4d(double __a, double __b, double __c, double __d) {
  return __nv_rnorm4d(__a, __b, __c, __d);
 }
 __DEVICE__ float rnorm4df(float __a, float __b, float __c, float __d) {
  return __nv_rnorm4df(__a, __b, __c, __d);
 }
 __DEVICE__ float rnormf(int __dim, const float *__t) {
  return __nv_rnormf(__dim, __t);
 }
 __DEVICE__ double rsqrt(double __a) { return __nv_rsqrt(__a); }
 __DEVICE__ float rsqrtf(float __a) { return __nv_rsqrtf(__a); }
 __DEVICE__ double scalbn(double __a, int __b) { return __nv_scalbn(__a, __b); }
 __DEVICE__ float scalbnf(float __a, int __b) { return __nv_scalbnf(__a, __b); }
 __DEVICE__ double scalbln(double __a, long __b) {
  if (__b > INT_MAX)
    return __a > 0 ? HUGE_VAL : -HUGE_VAL;
  if (__b < INT_MIN)
    return __a > 0 ? 0.0 : -0.0;
  return scalbn(__a, (int)__b);
 }
 __DEVICE__ float scalblnf(float __a, long __b) {
  if (__b > INT_MAX)
    return __a > 0 ? HUGE_VALF : -HUGE_VALF;
  if (__b < INT_MIN)
    return __a > 0 ? 0.f : -0.f;
  return scalbnf(__a, (int)__b);
 }
 __DEVICE__ double sin(double __a) { return __nv_sin(__a); }
 __DEVICE_VOID__ void sincos(double __a, double *__s, double *__c) {
  return __nv_sincos(__a, __s, __c);
 }
 __DEVICE_VOID__ void sincosf(float __a, float *__s, float *__c) {
  return __FAST_OR_SLOW(__nv_fast_sincosf, __nv_sincosf)(__a, __s, __c);
 }
 __DEVICE_VOID__ void sincospi(double __a, double *__s, double *__c) {
  return __nv_sincospi(__a, __s, __c);
 }
 __DEVICE_VOID__ void sincospif(float __a, float *__s, float *__c) {
  return __nv_sincospif(__a, __s, __c);
 }
 __DEVICE__ float sinf(float __a) {
  return __FAST_OR_SLOW(__nv_fast_sinf, __nv_sinf)(__a);
 }
 __DEVICE__ double sinh(double __a) { return __nv_sinh(__a); }
 __DEVICE__ float sinhf(float __a) { return __nv_sinhf(__a); }
 __DEVICE__ double sinpi(double __a) { return __nv_sinpi(__a); }
 __DEVICE__ float sinpif(float __a) { return __nv_sinpif(__a); }
 __DEVICE__ double sqrt(double __a) { return __nv_sqrt(__a); }
 __DEVICE__ float sqrtf(float __a) { return __nv_sqrtf(__a); }
 __DEVICE__ double tan(double __a) { return __nv_tan(__a); }
 __DEVICE__ float tanf(float __a) { return __nv_tanf(__a); }
 __DEVICE__ double tanh(double __a) { return __nv_tanh(__a); }
 __DEVICE__ float tanhf(float __a) { return __nv_tanhf(__a); }
 __DEVICE__ double tgamma(double __a) { return __nv_tgamma(__a); }
 __DEVICE__ float tgammaf(float __a) { return __nv_tgammaf(__a); }
 __DEVICE__ double trunc(double __a) { return __nv_trunc(__a); }
 __DEVICE__ float truncf(float __a) { return __nv_truncf(__a); }
 __DEVICE__ unsigned long long ullmax(unsigned long long __a,
                                     unsigned long long __b) {
  return __nv_ullmax(__a, __b);
 }
 __DEVICE__ unsigned long long ullmin(unsigned long long __a,
                                     unsigned long long __b) {
  return __nv_ullmin(__a, __b);
 }
 __DEVICE__ unsigned int umax(unsigned int __a, unsigned int __b) {
  return __nv_umax(__a, __b);
 }
 __DEVICE__ unsigned int umin(unsigned int __a, unsigned int __b) {
  return __nv_umin(__a, __b);
 }
 __DEVICE__ double y0(double __a) { return __nv_y0(__a); }
 __DEVICE__ float y0f(float __a) { return __nv_y0f(__a); }
 __DEVICE__ double y1(double __a) { return __nv_y1(__a); }
 __DEVICE__ float y1f(float __a) { return __nv_y1f(__a); }
 __DEVICE__ double yn(int __a, double __b) { return __nv_yn(__a, __b); }
 __DEVICE__ float ynf(int __a, float __b) { return __nv_ynf(__a, __b); }
 #pragma pop_macro("__DEVICE__")
 #pragma pop_macro("__DEVICE_VOID__")
 #pragma pop_macro("__FAST_OR_SLOW")
 #endif // __CLANG_GPU_DISABLE_MATH_WRAPPERS
 #endif // __CLANG_CUDA_MATH_H__
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__clang_cuda_math_forward_declares.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__clang_cuda_math_forward_declares.h
@@ -1,284 +0,0 @@
 /*===- __clang_math_forward_declares.h - Prototypes of __device__ math fns --===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef __CLANG__CUDA_MATH_FORWARD_DECLARES_H__
 #define __CLANG__CUDA_MATH_FORWARD_DECLARES_H__
 #if !defined(__CUDA__) && !__HIP__
 #error "This file is for CUDA/HIP compilation only."
 #endif
 // This file forward-declares of some math functions we (or the CUDA headers)
 // will define later.  We need to do this, and do it before cmath is included,
 // because the standard library may have constexpr math functions.  In the
 // absence of a prior __device__ decl, those constexpr functions may become
 // implicitly host+device.  host+device functions can't be overloaded, so that
 // would preclude the use of our own __device__ overloads for these functions.
 #pragma push_macro("__DEVICE__")
 #define __DEVICE__                                                             \
  static __inline__ __attribute__((always_inline)) __attribute__((device))
 __DEVICE__ long abs(long);
 __DEVICE__ long long abs(long long);
 __DEVICE__ double abs(double);
 __DEVICE__ float abs(float);
 __DEVICE__ int abs(int);
 __DEVICE__ double acos(double);
 __DEVICE__ float acos(float);
 __DEVICE__ double acosh(double);
 __DEVICE__ float acosh(float);
 __DEVICE__ double asin(double);
 __DEVICE__ float asin(float);
 __DEVICE__ double asinh(double);
 __DEVICE__ float asinh(float);
 __DEVICE__ double atan2(double, double);
 __DEVICE__ float atan2(float, float);
 __DEVICE__ double atan(double);
 __DEVICE__ float atan(float);
 __DEVICE__ double atanh(double);
 __DEVICE__ float atanh(float);
 __DEVICE__ double cbrt(double);
 __DEVICE__ float cbrt(float);
 __DEVICE__ double ceil(double);
 __DEVICE__ float ceil(float);
 __DEVICE__ double copysign(double, double);
 __DEVICE__ float copysign(float, float);
 __DEVICE__ double cos(double);
 __DEVICE__ float cos(float);
 __DEVICE__ double cosh(double);
 __DEVICE__ float cosh(float);
 __DEVICE__ double erfc(double);
 __DEVICE__ float erfc(float);
 __DEVICE__ double erf(double);
 __DEVICE__ float erf(float);
 __DEVICE__ double exp2(double);
 __DEVICE__ float exp2(float);
 __DEVICE__ double exp(double);
 __DEVICE__ float exp(float);
 __DEVICE__ double expm1(double);
 __DEVICE__ float expm1(float);
 __DEVICE__ double fabs(double);
 __DEVICE__ float fabs(float);
 __DEVICE__ double fdim(double, double);
 __DEVICE__ float fdim(float, float);
 __DEVICE__ double floor(double);
 __DEVICE__ float floor(float);
 __DEVICE__ double fma(double, double, double);
 __DEVICE__ float fma(float, float, float);
 __DEVICE__ double fmax(double, double);
 __DEVICE__ float fmax(float, float);
 __DEVICE__ double fmin(double, double);
 __DEVICE__ float fmin(float, float);
 __DEVICE__ double fmod(double, double);
 __DEVICE__ float fmod(float, float);
 __DEVICE__ int fpclassify(double);
 __DEVICE__ int fpclassify(float);
 __DEVICE__ double frexp(double, int *);
 __DEVICE__ float frexp(float, int *);
 __DEVICE__ double hypot(double, double);
 __DEVICE__ float hypot(float, float);
 __DEVICE__ int ilogb(double);
 __DEVICE__ int ilogb(float);
 #ifdef _MSC_VER
 __DEVICE__ bool isfinite(long double);
 #endif
 __DEVICE__ bool isfinite(double);
 __DEVICE__ bool isfinite(float);
 __DEVICE__ bool isgreater(double, double);
 __DEVICE__ bool isgreaterequal(double, double);
 __DEVICE__ bool isgreaterequal(float, float);
 __DEVICE__ bool isgreater(float, float);
 #ifdef _MSC_VER
 __DEVICE__ bool isinf(long double);
 #endif
 __DEVICE__ bool isinf(double);
 __DEVICE__ bool isinf(float);
 __DEVICE__ bool isless(double, double);
 __DEVICE__ bool islessequal(double, double);
 __DEVICE__ bool islessequal(float, float);
 __DEVICE__ bool isless(float, float);
 __DEVICE__ bool islessgreater(double, double);
 __DEVICE__ bool islessgreater(float, float);
 #ifdef _MSC_VER
 __DEVICE__ bool isnan(long double);
 #endif
 __DEVICE__ bool isnan(double);
 __DEVICE__ bool isnan(float);
 __DEVICE__ bool isnormal(double);
 __DEVICE__ bool isnormal(float);
 __DEVICE__ bool isunordered(double, double);
 __DEVICE__ bool isunordered(float, float);
 __DEVICE__ long labs(long);
 __DEVICE__ double ldexp(double, int);
 __DEVICE__ float ldexp(float, int);
 __DEVICE__ double lgamma(double);
 __DEVICE__ float lgamma(float);
 __DEVICE__ long long llabs(long long);
 __DEVICE__ long long llrint(double);
 __DEVICE__ long long llrint(float);
 __DEVICE__ double log10(double);
 __DEVICE__ float log10(float);
 __DEVICE__ double log1p(double);
 __DEVICE__ float log1p(float);
 __DEVICE__ double log2(double);
 __DEVICE__ float log2(float);
 __DEVICE__ double logb(double);
 __DEVICE__ float logb(float);
 __DEVICE__ double log(double);
 __DEVICE__ float log(float);
 __DEVICE__ long lrint(double);
 __DEVICE__ long lrint(float);
 __DEVICE__ long lround(double);
 __DEVICE__ long lround(float);
 __DEVICE__ long long llround(float); // No llround(double).
 __DEVICE__ double modf(double, double *);
 __DEVICE__ float modf(float, float *);
 __DEVICE__ double nan(const char *);
 __DEVICE__ float nanf(const char *);
 __DEVICE__ double nearbyint(double);
 __DEVICE__ float nearbyint(float);
 __DEVICE__ double nextafter(double, double);
 __DEVICE__ float nextafter(float, float);
 __DEVICE__ double pow(double, double);
 __DEVICE__ double pow(double, int);
 __DEVICE__ float pow(float, float);
 __DEVICE__ float pow(float, int);
 __DEVICE__ double remainder(double, double);
 __DEVICE__ float remainder(float, float);
 __DEVICE__ double remquo(double, double, int *);
 __DEVICE__ float remquo(float, float, int *);
 __DEVICE__ double rint(double);
 __DEVICE__ float rint(float);
 __DEVICE__ double round(double);
 __DEVICE__ float round(float);
 __DEVICE__ double scalbln(double, long);
 __DEVICE__ float scalbln(float, long);
 __DEVICE__ double scalbn(double, int);
 __DEVICE__ float scalbn(float, int);
 #ifdef _MSC_VER
 __DEVICE__ bool signbit(long double);
 #endif
 __DEVICE__ bool signbit(double);
 __DEVICE__ bool signbit(float);
 __DEVICE__ double sin(double);
 __DEVICE__ float sin(float);
 __DEVICE__ double sinh(double);
 __DEVICE__ float sinh(float);
 __DEVICE__ double sqrt(double);
 __DEVICE__ float sqrt(float);
 __DEVICE__ double tan(double);
 __DEVICE__ float tan(float);
 __DEVICE__ double tanh(double);
 __DEVICE__ float tanh(float);
 __DEVICE__ double tgamma(double);
 __DEVICE__ float tgamma(float);
 __DEVICE__ double trunc(double);
 __DEVICE__ float trunc(float);
 // Notably missing above is nexttoward, which we don't define on
 // the device side because libdevice doesn't give us an implementation, and we
 // don't want to be in the business of writing one ourselves.
 // We need to define these overloads in exactly the namespace our standard
 // library uses (including the right inline namespace), otherwise they won't be
 // picked up by other functions in the standard library (e.g. functions in
 // <complex>).  Thus the ugliness below.
 #ifdef _LIBCPP_BEGIN_NAMESPACE_STD
 _LIBCPP_BEGIN_NAMESPACE_STD
 #else
 namespace std {
 #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #endif
 #endif
 using ::abs;
 using ::acos;
 using ::acosh;
 using ::asin;
 using ::asinh;
 using ::atan;
 using ::atan2;
 using ::atanh;
 using ::cbrt;
 using ::ceil;
 using ::copysign;
 using ::cos;
 using ::cosh;
 using ::erf;
 using ::erfc;
 using ::exp;
 using ::exp2;
 using ::expm1;
 using ::fabs;
 using ::fdim;
 using ::floor;
 using ::fma;
 using ::fmax;
 using ::fmin;
 using ::fmod;
 using ::fpclassify;
 using ::frexp;
 using ::hypot;
 using ::ilogb;
 using ::isfinite;
 using ::isgreater;
 using ::isgreaterequal;
 using ::isinf;
 using ::isless;
 using ::islessequal;
 using ::islessgreater;
 using ::isnan;
 using ::isnormal;
 using ::isunordered;
 using ::labs;
 using ::ldexp;
 using ::lgamma;
 using ::llabs;
 using ::llrint;
 using ::log;
 using ::log10;
 using ::log1p;
 using ::log2;
 using ::logb;
 using ::lrint;
 using ::lround;
 using ::llround;
 using ::modf;
 using ::nan;
 using ::nanf;
 using ::nearbyint;
 using ::nextafter;
 using ::pow;
 using ::remainder;
 using ::remquo;
 using ::rint;
 using ::round;
 using ::scalbln;
 using ::scalbn;
 using ::signbit;
 using ::sin;
 using ::sinh;
 using ::sqrt;
 using ::tan;
 using ::tanh;
 using ::tgamma;
 using ::trunc;
 #ifdef _LIBCPP_END_NAMESPACE_STD
 _LIBCPP_END_NAMESPACE_STD
 #else
 #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
 _GLIBCXX_END_NAMESPACE_VERSION
 #endif
 } // namespace std
 #endif
 #pragma pop_macro("__DEVICE__")
 #endif
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__clang_cuda_runtime_wrapper.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__clang_cuda_runtime_wrapper.h
@@ -1,504 +0,0 @@
 /*===---- __clang_cuda_runtime_wrapper.h - CUDA runtime support -------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 /*
 * WARNING: This header is intended to be directly -include'd by
 * the compiler and is not supposed to be included by users.
 *
 * CUDA headers are implemented in a way that currently makes it
 * impossible for user code to #include directly when compiling with
 * Clang. They present different view of CUDA-supplied functions
 * depending on where in NVCC's compilation pipeline the headers are
 * included. Neither of these modes provides function definitions with
 * correct attributes, so we use preprocessor to force the headers
 * into a form that Clang can use.
 *
 * Similarly to NVCC which -include's cuda_runtime.h, Clang -include's
 * this file during every CUDA compilation.
 */
 #ifndef __CLANG_CUDA_RUNTIME_WRAPPER_H__
 #define __CLANG_CUDA_RUNTIME_WRAPPER_H__
 #if defined(__CUDA__) && defined(__clang__)
 // Include some forward declares that must come before cmath.
 #include <__clang_cuda_math_forward_declares.h>
 // Define __CUDACC__ early as libstdc++ standard headers with GNU extensions
 // enabled depend on it to avoid using __float128, which is unsupported in
 // CUDA.
 #define __CUDACC__
 // Include some standard headers to avoid CUDA headers including them
 // while some required macros (like __THROW) are in a weird state.
 #include <cmath>
 #include <cstdlib>
 #include <stdlib.h>
 #include <string.h>
 #undef __CUDACC__
 // Preserve common macros that will be changed below by us or by CUDA
 // headers.
 #pragma push_macro("__THROW")
 #pragma push_macro("__CUDA_ARCH__")
 // WARNING: Preprocessor hacks below are based on specific details of
 // CUDA-7.x headers and are not expected to work with any other
 // version of CUDA headers.
 #include "cuda.h"
 #if !defined(CUDA_VERSION)
 #error "cuda.h did not define CUDA_VERSION"
 #elif CUDA_VERSION < 7000
 #error "Unsupported CUDA version!"
 #endif
 #pragma push_macro("__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__")
 #if CUDA_VERSION >= 10000
 #define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__
 #endif
 // Make largest subset of device functions available during host
 // compilation.
 #ifndef __CUDA_ARCH__
 #define __CUDA_ARCH__ 9999
 #endif
 #include "__clang_cuda_builtin_vars.h"
 // No need for device_launch_parameters.h as __clang_cuda_builtin_vars.h above
 // has taken care of builtin variables declared in the file.
 #define __DEVICE_LAUNCH_PARAMETERS_H__
 // {math,device}_functions.h only have declarations of the
 // functions. We don't need them as we're going to pull in their
 // definitions from .hpp files.
 #define __DEVICE_FUNCTIONS_H__
 #define __MATH_FUNCTIONS_H__
 #define __COMMON_FUNCTIONS_H__
 // device_functions_decls is replaced by __clang_cuda_device_functions.h
 // included below.
 #define __DEVICE_FUNCTIONS_DECLS_H__
 #undef __CUDACC__
 #if CUDA_VERSION < 9000
 #define __CUDABE__
 #else
 #define __CUDACC__
 #define __CUDA_LIBDEVICE__
 #endif
 // Disables definitions of device-side runtime support stubs in
 // cuda_device_runtime_api.h
 #include "host_defines.h"
 #undef __CUDACC__
 #include "driver_types.h"
 #include "host_config.h"
 // Temporarily replace "nv_weak" with weak, so __attribute__((nv_weak)) in
 // cuda_device_runtime_api.h ends up being __attribute__((weak)) which is the
 // functional equivalent of what we need.
 #pragma push_macro("nv_weak")
 #define nv_weak weak
 #undef __CUDABE__
 #undef __CUDA_LIBDEVICE__
 #define __CUDACC__
 #include "cuda_runtime.h"
 #pragma pop_macro("nv_weak")
 #undef __CUDACC__
 #define __CUDABE__
 // CUDA headers use __nvvm_memcpy and __nvvm_memset which Clang does
 // not have at the moment. Emulate them with a builtin memcpy/memset.
 #define __nvvm_memcpy(s, d, n, a) __builtin_memcpy(s, d, n)
 #define __nvvm_memset(d, c, n, a) __builtin_memset(d, c, n)
 #if CUDA_VERSION < 9000
 #include "crt/device_runtime.h"
 #endif
 #include "crt/host_runtime.h"
 // device_runtime.h defines __cxa_* macros that will conflict with
 // cxxabi.h.
 // FIXME: redefine these as __device__ functions.
 #undef __cxa_vec_ctor
 #undef __cxa_vec_cctor
 #undef __cxa_vec_dtor
 #undef __cxa_vec_new
 #undef __cxa_vec_new2
 #undef __cxa_vec_new3
 #undef __cxa_vec_delete2
 #undef __cxa_vec_delete
 #undef __cxa_vec_delete3
 #undef __cxa_pure_virtual
 // math_functions.hpp expects this host function be defined on MacOS, but it
 // ends up not being there because of the games we play here.  Just define it
 // ourselves; it's simple enough.
 #ifdef __APPLE__
 inline __host__ double __signbitd(double x) {
  return std::signbit(x);
 }
 #endif
 // CUDA 9.1 no longer provides declarations for libdevice functions, so we need
 // to provide our own.
 #include <__clang_cuda_libdevice_declares.h>
 // Wrappers for many device-side standard library functions, incl. math
 // functions, became compiler builtins in CUDA-9 and have been removed from the
 // CUDA headers. Clang now provides its own implementation of the wrappers.
 #if CUDA_VERSION >= 9000
 #include <__clang_cuda_device_functions.h>
 #include <__clang_cuda_math.h>
 #endif
 // __THROW is redefined to be empty by device_functions_decls.h in CUDA. Clang's
 // counterpart does not do it, so we need to make it empty here to keep
 // following CUDA includes happy.
 #undef __THROW
 #define __THROW
 // CUDA 8.0.41 relies on __USE_FAST_MATH__ and __CUDA_PREC_DIV's values.
 // Previous versions used to check whether they are defined or not.
 // CU_DEVICE_INVALID macro is only defined in 8.0.41, so we use it
 // here to detect the switch.
 #if defined(CU_DEVICE_INVALID)
 #if !defined(__USE_FAST_MATH__)
 #define __USE_FAST_MATH__ 0
 #endif
 #if !defined(__CUDA_PREC_DIV)
 #define __CUDA_PREC_DIV 0
 #endif
 #endif
 // Temporarily poison __host__ macro to ensure it's not used by any of
 // the headers we're about to include.
 #pragma push_macro("__host__")
 #define __host__ UNEXPECTED_HOST_ATTRIBUTE
 // device_functions.hpp and math_functions*.hpp use 'static
 // __forceinline__' (with no __device__) for definitions of device
 // functions. Temporarily redefine __forceinline__ to include
 // __device__.
 #pragma push_macro("__forceinline__")
 #define __forceinline__ __device__ __inline__ __attribute__((always_inline))
 #if CUDA_VERSION < 9000
 #include "device_functions.hpp"
 #endif
 // math_function.hpp uses the __USE_FAST_MATH__ macro to determine whether we
 // get the slow-but-accurate or fast-but-inaccurate versions of functions like
 // sin and exp.  This is controlled in clang by -fgpu-approx-transcendentals.
 //
 // device_functions.hpp uses __USE_FAST_MATH__ for a different purpose (fast vs.
 // slow divides), so we need to scope our define carefully here.
 #pragma push_macro("__USE_FAST_MATH__")
 #if defined(__CLANG_GPU_APPROX_TRANSCENDENTALS__)
 #define __USE_FAST_MATH__ 1
 #endif
 #if CUDA_VERSION >= 9000
 #include "crt/math_functions.hpp"
 #else
 #include "math_functions.hpp"
 #endif
 #pragma pop_macro("__USE_FAST_MATH__")
 #if CUDA_VERSION < 9000
 #include "math_functions_dbl_ptx3.hpp"
 #endif
 #pragma pop_macro("__forceinline__")
 // Pull in host-only functions that are only available when neither
 // __CUDACC__ nor __CUDABE__ are defined.
 #undef __MATH_FUNCTIONS_HPP__
 #undef __CUDABE__
 #if CUDA_VERSION < 9000
 #include "math_functions.hpp"
 #endif
 // Alas, additional overloads for these functions are hard to get to.
 // Considering that we only need these overloads for a few functions,
 // we can provide them here.
 static inline float rsqrt(float __a) { return rsqrtf(__a); }
 static inline float rcbrt(float __a) { return rcbrtf(__a); }
 static inline float sinpi(float __a) { return sinpif(__a); }
 static inline float cospi(float __a) { return cospif(__a); }
 static inline void sincospi(float __a, float *__b, float *__c) {
  return sincospif(__a, __b, __c);
 }
 static inline float erfcinv(float __a) { return erfcinvf(__a); }
 static inline float normcdfinv(float __a) { return normcdfinvf(__a); }
 static inline float normcdf(float __a) { return normcdff(__a); }
 static inline float erfcx(float __a) { return erfcxf(__a); }
 #if CUDA_VERSION < 9000
 // For some reason single-argument variant is not always declared by
 // CUDA headers. Alas, device_functions.hpp included below needs it.
 static inline __device__ void __brkpt(int __c) { __brkpt(); }
 #endif
 // Now include *.hpp with definitions of various GPU functions.  Alas,
 // a lot of thins get declared/defined with __host__ attribute which
 // we don't want and we have to define it out. We also have to include
 // {device,math}_functions.hpp again in order to extract the other
 // branch of #if/else inside.
 #define __host__
 #undef __CUDABE__
 #define __CUDACC__
 #if CUDA_VERSION >= 9000
 // Some atomic functions became compiler builtins in CUDA-9 , so we need their
 // declarations.
 #include "device_atomic_functions.h"
 #endif
 #undef __DEVICE_FUNCTIONS_HPP__
 #include "device_atomic_functions.hpp"
 #if CUDA_VERSION >= 9000
 #include "crt/device_functions.hpp"
 #include "crt/device_double_functions.hpp"
 #else
 #include "device_functions.hpp"
 #define __CUDABE__
 #include "device_double_functions.h"
 #undef __CUDABE__
 #endif
 #include "sm_20_atomic_functions.hpp"
 // Predicate functions used in `__builtin_assume` need to have no side effect.
 // However, sm_20_intrinsics.hpp doesn't define them with neither pure nor
 // const attribute. Rename definitions from sm_20_intrinsics.hpp and re-define
 // them as pure ones.
 #pragma push_macro("__isGlobal")
 #pragma push_macro("__isShared")
 #pragma push_macro("__isConstant")
 #pragma push_macro("__isLocal")
 #define __isGlobal __ignored_cuda___isGlobal
 #define __isShared __ignored_cuda___isShared
 #define __isConstant __ignored_cuda___isConstant
 #define __isLocal __ignored_cuda___isLocal
 #include "sm_20_intrinsics.hpp"
 #pragma pop_macro("__isGlobal")
 #pragma pop_macro("__isShared")
 #pragma pop_macro("__isConstant")
 #pragma pop_macro("__isLocal")
 #pragma push_macro("__DEVICE__")
 #define __DEVICE__ static __device__ __forceinline__ __attribute__((const))
 __DEVICE__ unsigned int __isGlobal(const void *p) {
  return __nvvm_isspacep_global(p);
 }
 __DEVICE__ unsigned int __isShared(const void *p) {
  return __nvvm_isspacep_shared(p);
 }
 __DEVICE__ unsigned int __isConstant(const void *p) {
  return __nvvm_isspacep_const(p);
 }
 __DEVICE__ unsigned int __isLocal(const void *p) {
  return __nvvm_isspacep_local(p);
 }
 #pragma pop_macro("__DEVICE__")
 #include "sm_32_atomic_functions.hpp"
 // Don't include sm_30_intrinsics.h and sm_32_intrinsics.h.  These define the
 // __shfl and __ldg intrinsics using inline (volatile) asm, but we want to
 // define them using builtins so that the optimizer can reason about and across
 // these instructions.  In particular, using intrinsics for ldg gets us the
 // [addr+imm] addressing mode, which, although it doesn't actually exist in the
 // hardware, seems to generate faster machine code because ptxas can more easily
 // reason about our code.
 #if CUDA_VERSION >= 8000
 #pragma push_macro("__CUDA_ARCH__")
 #undef __CUDA_ARCH__
 #include "sm_60_atomic_functions.hpp"
 #include "sm_61_intrinsics.hpp"
 #pragma pop_macro("__CUDA_ARCH__")
 #endif
 #undef __MATH_FUNCTIONS_HPP__
 // math_functions.hpp defines ::signbit as a __host__ __device__ function.  This
 // conflicts with libstdc++'s constexpr ::signbit, so we have to rename
 // math_function.hpp's ::signbit.  It's guarded by #undef signbit, but that's
 // conditional on __GNUC__.  :)
 #pragma push_macro("signbit")
 #pragma push_macro("__GNUC__")
 #undef __GNUC__
 #define signbit __ignored_cuda_signbit
 // CUDA-9 omits device-side definitions of some math functions if it sees
 // include guard from math.h wrapper from libstdc++. We have to undo the header
 // guard temporarily to get the definitions we need.
 #pragma push_macro("_GLIBCXX_MATH_H")
 #pragma push_macro("_LIBCPP_VERSION")
 #if CUDA_VERSION >= 9000
 #undef _GLIBCXX_MATH_H
 // We also need to undo another guard that checks for libc++ 3.8+
 #ifdef _LIBCPP_VERSION
 #define _LIBCPP_VERSION 3700
 #endif
 #endif
 #if CUDA_VERSION >= 9000
 #include "crt/math_functions.hpp"
 #else
 #include "math_functions.hpp"
 #endif
 #pragma pop_macro("_GLIBCXX_MATH_H")
 #pragma pop_macro("_LIBCPP_VERSION")
 #pragma pop_macro("__GNUC__")
 #pragma pop_macro("signbit")
 #pragma pop_macro("__host__")
 // __clang_cuda_texture_intrinsics.h must be included first in order to provide
 // implementation for __nv_tex_surf_handler that CUDA's headers depend on.
 // The implementation requires c++11 and only works with CUDA-9 or newer.
 #if __cplusplus >= 201103L && CUDA_VERSION >= 9000
 // clang-format off
 #include <__clang_cuda_texture_intrinsics.h>
 // clang-format on
 #else
 #if CUDA_VERSION >= 9000
 // Provide a hint that texture support needs C++11.
 template <typename T> struct __nv_tex_needs_cxx11 {
  const static bool value = false;
 };
 template <class T>
 __host__ __device__ void __nv_tex_surf_handler(const char *name, T *ptr,
                                               cudaTextureObject_t obj,
                                               float x) {
  _Static_assert(__nv_tex_needs_cxx11<T>::value,
                 "Texture support requires C++11");
 }
 #else
 // Textures in CUDA-8 and older are not supported by clang.There's no
 // convenient way to intercept texture use in these versions, so we can't
 // produce a meaningful error. The source code that attempts to use textures
 // will continue to fail as it does now.
 #endif // CUDA_VERSION
 #endif // __cplusplus >= 201103L && CUDA_VERSION >= 9000
 #include "surface_indirect_functions.h"
 #include "texture_fetch_functions.h"
 #include "texture_indirect_functions.h"
 // Restore state of __CUDA_ARCH__ and __THROW we had on entry.
 #pragma pop_macro("__CUDA_ARCH__")
 #pragma pop_macro("__THROW")
 // Set up compiler macros expected to be seen during compilation.
 #undef __CUDABE__
 #define __CUDACC__
 extern "C" {
 // Device-side CUDA system calls.
 // http://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls
 // We need these declarations and wrappers for device-side
 // malloc/free/printf calls to work without relying on
 // -fcuda-disable-target-call-checks option.
 __device__ int vprintf(const char *, const char *);
 __device__ void free(void *) __attribute((nothrow));
 __device__ void *malloc(size_t) __attribute((nothrow)) __attribute__((malloc));
 // __assertfail() used to have a `noreturn` attribute. Unfortunately that
 // contributed to triggering the longstanding bug in ptxas when assert was used
 // in sufficiently convoluted code. See
 // https://bugs.llvm.org/show_bug.cgi?id=27738 for the details.
 __device__ void __assertfail(const char *__message, const char *__file,
                             unsigned __line, const char *__function,
                             size_t __charSize);
 // In order for standard assert() macro on linux to work we need to
 // provide device-side __assert_fail()
 __device__ static inline void __assert_fail(const char *__message,
                                            const char *__file, unsigned __line,
                                            const char *__function) {
  __assertfail(__message, __file, __line, __function, sizeof(char));
 }
 // Clang will convert printf into vprintf, but we still need
 // device-side declaration for it.
 __device__ int printf(const char *, ...);
 } // extern "C"
 // We also need device-side std::malloc and std::free.
 namespace std {
 __device__ static inline void free(void *__ptr) { ::free(__ptr); }
 __device__ static inline void *malloc(size_t __size) {
  return ::malloc(__size);
 }
 } // namespace std
 // Out-of-line implementations from __clang_cuda_builtin_vars.h.  These need to
 // come after we've pulled in the definition of uint3 and dim3.
 __device__ inline __cuda_builtin_threadIdx_t::operator dim3() const {
  return dim3(x, y, z);
 }
 __device__ inline __cuda_builtin_threadIdx_t::operator uint3() const {
  return {x, y, z};
 }
 __device__ inline __cuda_builtin_blockIdx_t::operator dim3() const {
  return dim3(x, y, z);
 }
 __device__ inline __cuda_builtin_blockIdx_t::operator uint3() const {
  return {x, y, z};
 }
 __device__ inline __cuda_builtin_blockDim_t::operator dim3() const {
  return dim3(x, y, z);
 }
 __device__ inline __cuda_builtin_blockDim_t::operator uint3() const {
  return {x, y, z};
 }
 __device__ inline __cuda_builtin_gridDim_t::operator dim3() const {
  return dim3(x, y, z);
 }
 __device__ inline __cuda_builtin_gridDim_t::operator uint3() const {
  return {x, y, z};
 }
 #include <__clang_cuda_cmath.h>
 #include <__clang_cuda_intrinsics.h>
 #include <__clang_cuda_complex_builtins.h>
 // curand_mtgp32_kernel helpfully redeclares blockDim and threadIdx in host
 // mode, giving them their "proper" types of dim3 and uint3.  This is
 // incompatible with the types we give in __clang_cuda_builtin_vars.h.  As as
 // hack, force-include the header (nvcc doesn't include it by default) but
 // redefine dim3 and uint3 to our builtin types.  (Thankfully dim3 and uint3 are
 // only used here for the redeclarations of blockDim and threadIdx.)
 #pragma push_macro("dim3")
 #pragma push_macro("uint3")
 #define dim3 __cuda_builtin_blockDim_t
 #define uint3 __cuda_builtin_threadIdx_t
 #include "curand_mtgp32_kernel.h"
 #pragma pop_macro("dim3")
 #pragma pop_macro("uint3")
 #pragma pop_macro("__USE_FAST_MATH__")
 #pragma pop_macro("__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__")
 // CUDA runtime uses this undocumented function to access kernel launch
 // configuration. The declaration is in crt/device_functions.h but that file
 // includes a lot of other stuff we don't want. Instead, we'll provide our own
 // declaration for it here.
 #if CUDA_VERSION >= 9020
 extern "C" unsigned __cudaPushCallConfiguration(dim3 gridDim, dim3 blockDim,
                                                size_t sharedMem = 0,
                                                void *stream = 0);
 #endif
 #endif // __CUDA__
 #endif // __CLANG_CUDA_RUNTIME_WRAPPER_H__
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__clang_cuda_texture_intrinsics.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__clang_cuda_texture_intrinsics.h
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__clang_hip_cmath.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__clang_hip_cmath.h
@@ -1,848 +0,0 @@
 /*===---- __clang_hip_cmath.h - HIP cmath decls -----------------------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef __CLANG_HIP_CMATH_H__
 #define __CLANG_HIP_CMATH_H__
 #if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__)
 #error "This file is for HIP and OpenMP AMDGCN device compilation only."
 #endif
 #if !defined(__HIPCC_RTC__)
 #if defined(__cplusplus)
 #include <limits>
 #include <type_traits>
 #include <utility>
 #endif
 #include <limits.h>
 #include <stdint.h>
 #endif // !defined(__HIPCC_RTC__)
 #pragma push_macro("__DEVICE__")
 #pragma push_macro("__CONSTEXPR__")
 #ifdef __OPENMP_AMDGCN__
 #define __DEVICE__ static __attribute__((always_inline, nothrow))
 #define __CONSTEXPR__ constexpr
 #else
 #define __DEVICE__ static __device__ inline __attribute__((always_inline))
 #define __CONSTEXPR__
 #endif // __OPENMP_AMDGCN__
 // Start with functions that cannot be defined by DEF macros below.
 #if defined(__cplusplus)
 #if defined __OPENMP_AMDGCN__
 __DEVICE__ __CONSTEXPR__ float fabs(float __x) { return ::fabsf(__x); }
 __DEVICE__ __CONSTEXPR__ float sin(float __x) { return ::sinf(__x); }
 __DEVICE__ __CONSTEXPR__ float cos(float __x) { return ::cosf(__x); }
 #endif
 __DEVICE__ __CONSTEXPR__ double abs(double __x) { return ::fabs(__x); }
 __DEVICE__ __CONSTEXPR__ float abs(float __x) { return ::fabsf(__x); }
 __DEVICE__ __CONSTEXPR__ long long abs(long long __n) { return ::llabs(__n); }
 __DEVICE__ __CONSTEXPR__ long abs(long __n) { return ::labs(__n); }
 __DEVICE__ __CONSTEXPR__ float fma(float __x, float __y, float __z) {
  return ::fmaf(__x, __y, __z);
 }
 #if !defined(__HIPCC_RTC__)
 // The value returned by fpclassify is platform dependent, therefore it is not
 // supported by hipRTC.
 __DEVICE__ __CONSTEXPR__ int fpclassify(float __x) {
  return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
                              FP_ZERO, __x);
 }
 __DEVICE__ __CONSTEXPR__ int fpclassify(double __x) {
  return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
                              FP_ZERO, __x);
 }
 #endif // !defined(__HIPCC_RTC__)
 __DEVICE__ __CONSTEXPR__ float frexp(float __arg, int *__exp) {
  return ::frexpf(__arg, __exp);
 }
 #if defined(__OPENMP_AMDGCN__)
 // For OpenMP we work around some old system headers that have non-conforming
 // `isinf(float)` and `isnan(float)` implementations that return an `int`. We do
 // this by providing two versions of these functions, differing only in the
 // return type. To avoid conflicting definitions we disable implicit base
 // function generation. That means we will end up with two specializations, one
 // per type, but only one has a base function defined by the system header.
 #pragma omp begin declare variant match(                                       \
    implementation = {extension(disable_implicit_base)})
 // FIXME: We lack an extension to customize the mangling of the variants, e.g.,
 //        add a suffix. This means we would clash with the names of the variants
 //        (note that we do not create implicit base functions here). To avoid
 //        this clash we add a new trait to some of them that is always true
 //        (this is LLVM after all ;)). It will only influence the mangled name
 //        of the variants inside the inner region and avoid the clash.
 #pragma omp begin declare variant match(implementation = {vendor(llvm)})
 __DEVICE__ __CONSTEXPR__ int isinf(float __x) { return ::__isinff(__x); }
 __DEVICE__ __CONSTEXPR__ int isinf(double __x) { return ::__isinf(__x); }
 __DEVICE__ __CONSTEXPR__ int isfinite(float __x) { return ::__finitef(__x); }
 __DEVICE__ __CONSTEXPR__ int isfinite(double __x) { return ::__finite(__x); }
 __DEVICE__ __CONSTEXPR__ int isnan(float __x) { return ::__isnanf(__x); }
 __DEVICE__ __CONSTEXPR__ int isnan(double __x) { return ::__isnan(__x); }
 #pragma omp end declare variant
 #endif // defined(__OPENMP_AMDGCN__)
 __DEVICE__ __CONSTEXPR__ bool isinf(float __x) { return ::__isinff(__x); }
 __DEVICE__ __CONSTEXPR__ bool isinf(double __x) { return ::__isinf(__x); }
 __DEVICE__ __CONSTEXPR__ bool isfinite(float __x) { return ::__finitef(__x); }
 __DEVICE__ __CONSTEXPR__ bool isfinite(double __x) { return ::__finite(__x); }
 __DEVICE__ __CONSTEXPR__ bool isnan(float __x) { return ::__isnanf(__x); }
 __DEVICE__ __CONSTEXPR__ bool isnan(double __x) { return ::__isnan(__x); }
 #if defined(__OPENMP_AMDGCN__)
 #pragma omp end declare variant
 #endif // defined(__OPENMP_AMDGCN__)
 __DEVICE__ __CONSTEXPR__ bool isgreater(float __x, float __y) {
  return __builtin_isgreater(__x, __y);
 }
 __DEVICE__ __CONSTEXPR__ bool isgreater(double __x, double __y) {
  return __builtin_isgreater(__x, __y);
 }
 __DEVICE__ __CONSTEXPR__ bool isgreaterequal(float __x, float __y) {
  return __builtin_isgreaterequal(__x, __y);
 }
 __DEVICE__ __CONSTEXPR__ bool isgreaterequal(double __x, double __y) {
  return __builtin_isgreaterequal(__x, __y);
 }
 __DEVICE__ __CONSTEXPR__ bool isless(float __x, float __y) {
  return __builtin_isless(__x, __y);
 }
 __DEVICE__ __CONSTEXPR__ bool isless(double __x, double __y) {
  return __builtin_isless(__x, __y);
 }
 __DEVICE__ __CONSTEXPR__ bool islessequal(float __x, float __y) {
  return __builtin_islessequal(__x, __y);
 }
 __DEVICE__ __CONSTEXPR__ bool islessequal(double __x, double __y) {
  return __builtin_islessequal(__x, __y);
 }
 __DEVICE__ __CONSTEXPR__ bool islessgreater(float __x, float __y) {
  return __builtin_islessgreater(__x, __y);
 }
 __DEVICE__ __CONSTEXPR__ bool islessgreater(double __x, double __y) {
  return __builtin_islessgreater(__x, __y);
 }
 __DEVICE__ __CONSTEXPR__ bool isnormal(float __x) {
  return __builtin_isnormal(__x);
 }
 __DEVICE__ __CONSTEXPR__ bool isnormal(double __x) {
  return __builtin_isnormal(__x);
 }
 __DEVICE__ __CONSTEXPR__ bool isunordered(float __x, float __y) {
  return __builtin_isunordered(__x, __y);
 }
 __DEVICE__ __CONSTEXPR__ bool isunordered(double __x, double __y) {
  return __builtin_isunordered(__x, __y);
 }
 __DEVICE__ __CONSTEXPR__ float modf(float __x, float *__iptr) {
  return ::modff(__x, __iptr);
 }
 __DEVICE__ __CONSTEXPR__ float pow(float __base, int __iexp) {
  return ::powif(__base, __iexp);
 }
 __DEVICE__ __CONSTEXPR__ double pow(double __base, int __iexp) {
  return ::powi(__base, __iexp);
 }
 __DEVICE__ __CONSTEXPR__ float remquo(float __x, float __y, int *__quo) {
  return ::remquof(__x, __y, __quo);
 }
 __DEVICE__ __CONSTEXPR__ float scalbln(float __x, long int __n) {
  return ::scalblnf(__x, __n);
 }
 __DEVICE__ __CONSTEXPR__ bool signbit(float __x) { return ::__signbitf(__x); }
 __DEVICE__ __CONSTEXPR__ bool signbit(double __x) { return ::__signbit(__x); }
 // Notably missing above is nexttoward.  We omit it because
 // ocml doesn't provide an implementation, and we don't want to be in the
 // business of implementing tricky libm functions in this header.
 // Other functions.
 __DEVICE__ __CONSTEXPR__ _Float16 fma(_Float16 __x, _Float16 __y,
                                      _Float16 __z) {
  return __builtin_fmaf16(__x, __y, __z);
 }
 __DEVICE__ __CONSTEXPR__ _Float16 pow(_Float16 __base, int __iexp) {
  return __ocml_pown_f16(__base, __iexp);
 }
 #ifndef __OPENMP_AMDGCN__
 // BEGIN DEF_FUN and HIP_OVERLOAD
 // BEGIN DEF_FUN
 #pragma push_macro("__DEF_FUN1")
 #pragma push_macro("__DEF_FUN2")
 #pragma push_macro("__DEF_FUN2_FI")
 // Define cmath functions with float argument and returns __retty.
 #define __DEF_FUN1(__retty, __func)                                            \
  __DEVICE__ __CONSTEXPR__ __retty __func(float __x) { return __func##f(__x); }
 // Define cmath functions with two float arguments and returns __retty.
 #define __DEF_FUN2(__retty, __func)                                            \
  __DEVICE__ __CONSTEXPR__ __retty __func(float __x, float __y) {              \
    return __func##f(__x, __y);                                                \
  }
 // Define cmath functions with a float and an int argument and returns __retty.
 #define __DEF_FUN2_FI(__retty, __func)                                         \
  __DEVICE__ __CONSTEXPR__ __retty __func(float __x, int __y) {                \
    return __func##f(__x, __y);                                                \
  }
 __DEF_FUN1(float, acos)
 __DEF_FUN1(float, acosh)
 __DEF_FUN1(float, asin)
 __DEF_FUN1(float, asinh)
 __DEF_FUN1(float, atan)
 __DEF_FUN2(float, atan2)
 __DEF_FUN1(float, atanh)
 __DEF_FUN1(float, cbrt)
 __DEF_FUN1(float, ceil)
 __DEF_FUN2(float, copysign)
 __DEF_FUN1(float, cos)
 __DEF_FUN1(float, cosh)
 __DEF_FUN1(float, erf)
 __DEF_FUN1(float, erfc)
 __DEF_FUN1(float, exp)
 __DEF_FUN1(float, exp2)
 __DEF_FUN1(float, expm1)
 __DEF_FUN1(float, fabs)
 __DEF_FUN2(float, fdim)
 __DEF_FUN1(float, floor)
 __DEF_FUN2(float, fmax)
 __DEF_FUN2(float, fmin)
 __DEF_FUN2(float, fmod)
 __DEF_FUN2(float, hypot)
 __DEF_FUN1(int, ilogb)
 __DEF_FUN2_FI(float, ldexp)
 __DEF_FUN1(float, lgamma)
 __DEF_FUN1(float, log)
 __DEF_FUN1(float, log10)
 __DEF_FUN1(float, log1p)
 __DEF_FUN1(float, log2)
 __DEF_FUN1(float, logb)
 __DEF_FUN1(long long, llrint)
 __DEF_FUN1(long long, llround)
 __DEF_FUN1(long, lrint)
 __DEF_FUN1(long, lround)
 __DEF_FUN1(float, nearbyint)
 __DEF_FUN2(float, nextafter)
 __DEF_FUN2(float, pow)
 __DEF_FUN2(float, remainder)
 __DEF_FUN1(float, rint)
 __DEF_FUN1(float, round)
 __DEF_FUN2_FI(float, scalbn)
 __DEF_FUN1(float, sin)
 __DEF_FUN1(float, sinh)
 __DEF_FUN1(float, sqrt)
 __DEF_FUN1(float, tan)
 __DEF_FUN1(float, tanh)
 __DEF_FUN1(float, tgamma)
 __DEF_FUN1(float, trunc)
 #pragma pop_macro("__DEF_FUN1")
 #pragma pop_macro("__DEF_FUN2")
 #pragma pop_macro("__DEF_FUN2_FI")
 // END DEF_FUN
 // BEGIN HIP_OVERLOAD
 #pragma push_macro("__HIP_OVERLOAD1")
 #pragma push_macro("__HIP_OVERLOAD2")
 // __hip_enable_if::type is a type function which returns __T if __B is true.
 template <bool __B, class __T = void> struct __hip_enable_if {};
 template <class __T> struct __hip_enable_if<true, __T> { typedef __T type; };
 namespace __hip {
 template <class _Tp> struct is_integral {
  enum { value = 0 };
 };
 template <> struct is_integral<bool> {
  enum { value = 1 };
 };
 template <> struct is_integral<char> {
  enum { value = 1 };
 };
 template <> struct is_integral<signed char> {
  enum { value = 1 };
 };
 template <> struct is_integral<unsigned char> {
  enum { value = 1 };
 };
 template <> struct is_integral<wchar_t> {
  enum { value = 1 };
 };
 template <> struct is_integral<short> {
  enum { value = 1 };
 };
 template <> struct is_integral<unsigned short> {
  enum { value = 1 };
 };
 template <> struct is_integral<int> {
  enum { value = 1 };
 };
 template <> struct is_integral<unsigned int> {
  enum { value = 1 };
 };
 template <> struct is_integral<long> {
  enum { value = 1 };
 };
 template <> struct is_integral<unsigned long> {
  enum { value = 1 };
 };
 template <> struct is_integral<long long> {
  enum { value = 1 };
 };
 template <> struct is_integral<unsigned long long> {
  enum { value = 1 };
 };
 // ToDo: specializes is_arithmetic<_Float16>
 template <class _Tp> struct is_arithmetic {
  enum { value = 0 };
 };
 template <> struct is_arithmetic<bool> {
  enum { value = 1 };
 };
 template <> struct is_arithmetic<char> {
  enum { value = 1 };
 };
 template <> struct is_arithmetic<signed char> {
  enum { value = 1 };
 };
 template <> struct is_arithmetic<unsigned char> {
  enum { value = 1 };
 };
 template <> struct is_arithmetic<wchar_t> {
  enum { value = 1 };
 };
 template <> struct is_arithmetic<short> {
  enum { value = 1 };
 };
 template <> struct is_arithmetic<unsigned short> {
  enum { value = 1 };
 };
 template <> struct is_arithmetic<int> {
  enum { value = 1 };
 };
 template <> struct is_arithmetic<unsigned int> {
  enum { value = 1 };
 };
 template <> struct is_arithmetic<long> {
  enum { value = 1 };
 };
 template <> struct is_arithmetic<unsigned long> {
  enum { value = 1 };
 };
 template <> struct is_arithmetic<long long> {
  enum { value = 1 };
 };
 template <> struct is_arithmetic<unsigned long long> {
  enum { value = 1 };
 };
 template <> struct is_arithmetic<float> {
  enum { value = 1 };
 };
 template <> struct is_arithmetic<double> {
  enum { value = 1 };
 };
 struct true_type {
  static const __constant__ bool value = true;
 };
 struct false_type {
  static const __constant__ bool value = false;
 };
 template <typename __T, typename __U> struct is_same : public false_type {};
 template <typename __T> struct is_same<__T, __T> : public true_type {};
 template <typename __T> struct add_rvalue_reference { typedef __T &&type; };
 template <typename __T> typename add_rvalue_reference<__T>::type declval();
 // decltype is only available in C++11 and above.
 #if __cplusplus >= 201103L
 // __hip_promote
 template <class _Tp> struct __numeric_type {
  static void __test(...);
  static _Float16 __test(_Float16);
  static float __test(float);
  static double __test(char);
  static double __test(int);
  static double __test(unsigned);
  static double __test(long);
  static double __test(unsigned long);
  static double __test(long long);
  static double __test(unsigned long long);
  static double __test(double);
  // No support for long double, use double instead.
  static double __test(long double);
  template <typename _U>
  static auto __test_impl(int) -> decltype(__test(declval<_U>()));
  template <typename _U> static void __test_impl(...);
  typedef decltype(__test_impl<_Tp>(0)) type;
  static const bool value = !is_same<type, void>::value;
 };
 template <> struct __numeric_type<void> { static const bool value = true; };
 template <class _A1, class _A2 = void, class _A3 = void,
          bool = __numeric_type<_A1>::value &&__numeric_type<_A2>::value
              &&__numeric_type<_A3>::value>
 class __promote_imp {
 public:
  static const bool value = false;
 };
 template <class _A1, class _A2, class _A3>
 class __promote_imp<_A1, _A2, _A3, true> {
 private:
  typedef typename __promote_imp<_A1>::type __type1;
  typedef typename __promote_imp<_A2>::type __type2;
  typedef typename __promote_imp<_A3>::type __type3;
 public:
  typedef decltype(__type1() + __type2() + __type3()) type;
  static const bool value = true;
 };
 template <class _A1, class _A2> class __promote_imp<_A1, _A2, void, true> {
 private:
  typedef typename __promote_imp<_A1>::type __type1;
  typedef typename __promote_imp<_A2>::type __type2;
 public:
  typedef decltype(__type1() + __type2()) type;
  static const bool value = true;
 };
 template <class _A1> class __promote_imp<_A1, void, void, true> {
 public:
  typedef typename __numeric_type<_A1>::type type;
  static const bool value = true;
 };
 template <class _A1, class _A2 = void, class _A3 = void>
 class __promote : public __promote_imp<_A1, _A2, _A3> {};
 #endif //__cplusplus >= 201103L
 } // namespace __hip
 // __HIP_OVERLOAD1 is used to resolve function calls with integer argument to
 // avoid compilation error due to ambiguity. e.g. floor(5) is resolved with
 // floor(double).
 #define __HIP_OVERLOAD1(__retty, __fn)                                         \
  template <typename __T>                                                      \
  __DEVICE__ __CONSTEXPR__                                                     \
      typename __hip_enable_if<__hip::is_integral<__T>::value, __retty>::type  \
      __fn(__T __x) {                                                          \
    return ::__fn((double)__x);                                                \
  }
 // __HIP_OVERLOAD2 is used to resolve function calls with mixed float/double
 // or integer argument to avoid compilation error due to ambiguity. e.g.
 // max(5.0f, 6.0) is resolved with max(double, double).
 #if __cplusplus >= 201103L
 #define __HIP_OVERLOAD2(__retty, __fn)                                         \
  template <typename __T1, typename __T2>                                      \
  __DEVICE__ __CONSTEXPR__                                                     \
      typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&            \
                                   __hip::is_arithmetic<__T2>::value,          \
                               __retty>::type                                  \
      __fn(__T1 __x, __T2 __y) {                                               \
    typedef typename __hip::__promote<__T1, __T2>::type __arg_type;            \
    return __fn((__arg_type)__x, (__arg_type)__y);                             \
  }
 #else
 #define __HIP_OVERLOAD2(__retty, __fn)                                         \
  template <typename __T1, typename __T2>                                      \
  __DEVICE__ __CONSTEXPR__                                                     \
      typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&            \
                                   __hip::is_arithmetic<__T2>::value,          \
                               __retty>::type                                  \
      __fn(__T1 __x, __T2 __y) {                                               \
    return __fn((double)__x, (double)__y);                                     \
  }
 #endif
 __HIP_OVERLOAD1(double, acos)
 __HIP_OVERLOAD1(double, acosh)
 __HIP_OVERLOAD1(double, asin)
 __HIP_OVERLOAD1(double, asinh)
 __HIP_OVERLOAD1(double, atan)
 __HIP_OVERLOAD2(double, atan2)
 __HIP_OVERLOAD1(double, atanh)
 __HIP_OVERLOAD1(double, cbrt)
 __HIP_OVERLOAD1(double, ceil)
 __HIP_OVERLOAD2(double, copysign)
 __HIP_OVERLOAD1(double, cos)
 __HIP_OVERLOAD1(double, cosh)
 __HIP_OVERLOAD1(double, erf)
 __HIP_OVERLOAD1(double, erfc)
 __HIP_OVERLOAD1(double, exp)
 __HIP_OVERLOAD1(double, exp2)
 __HIP_OVERLOAD1(double, expm1)
 __HIP_OVERLOAD1(double, fabs)
 __HIP_OVERLOAD2(double, fdim)
 __HIP_OVERLOAD1(double, floor)
 __HIP_OVERLOAD2(double, fmax)
 __HIP_OVERLOAD2(double, fmin)
 __HIP_OVERLOAD2(double, fmod)
 #if !defined(__HIPCC_RTC__)
 __HIP_OVERLOAD1(int, fpclassify)
 #endif // !defined(__HIPCC_RTC__)
 __HIP_OVERLOAD2(double, hypot)
 __HIP_OVERLOAD1(int, ilogb)
 __HIP_OVERLOAD1(bool, isfinite)
 __HIP_OVERLOAD2(bool, isgreater)
 __HIP_OVERLOAD2(bool, isgreaterequal)
 __HIP_OVERLOAD1(bool, isinf)
 __HIP_OVERLOAD2(bool, isless)
 __HIP_OVERLOAD2(bool, islessequal)
 __HIP_OVERLOAD2(bool, islessgreater)
 __HIP_OVERLOAD1(bool, isnan)
 __HIP_OVERLOAD1(bool, isnormal)
 __HIP_OVERLOAD2(bool, isunordered)
 __HIP_OVERLOAD1(double, lgamma)
 __HIP_OVERLOAD1(double, log)
 __HIP_OVERLOAD1(double, log10)
 __HIP_OVERLOAD1(double, log1p)
 __HIP_OVERLOAD1(double, log2)
 __HIP_OVERLOAD1(double, logb)
 __HIP_OVERLOAD1(long long, llrint)
 __HIP_OVERLOAD1(long long, llround)
 __HIP_OVERLOAD1(long, lrint)
 __HIP_OVERLOAD1(long, lround)
 __HIP_OVERLOAD1(double, nearbyint)
 __HIP_OVERLOAD2(double, nextafter)
 __HIP_OVERLOAD2(double, pow)
 __HIP_OVERLOAD2(double, remainder)
 __HIP_OVERLOAD1(double, rint)
 __HIP_OVERLOAD1(double, round)
 __HIP_OVERLOAD1(bool, signbit)
 __HIP_OVERLOAD1(double, sin)
 __HIP_OVERLOAD1(double, sinh)
 __HIP_OVERLOAD1(double, sqrt)
 __HIP_OVERLOAD1(double, tan)
 __HIP_OVERLOAD1(double, tanh)
 __HIP_OVERLOAD1(double, tgamma)
 __HIP_OVERLOAD1(double, trunc)
 // Overload these but don't add them to std, they are not part of cmath.
 __HIP_OVERLOAD2(double, max)
 __HIP_OVERLOAD2(double, min)
 // Additional Overloads that don't quite match HIP_OVERLOAD.
 #if __cplusplus >= 201103L
 template <typename __T1, typename __T2, typename __T3>
 __DEVICE__ __CONSTEXPR__ typename __hip_enable_if<
    __hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value &&
        __hip::is_arithmetic<__T3>::value,
    typename __hip::__promote<__T1, __T2, __T3>::type>::type
 fma(__T1 __x, __T2 __y, __T3 __z) {
  typedef typename __hip::__promote<__T1, __T2, __T3>::type __result_type;
  return ::fma((__result_type)__x, (__result_type)__y, (__result_type)__z);
 }
 #else
 template <typename __T1, typename __T2, typename __T3>
 __DEVICE__ __CONSTEXPR__
    typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
                                 __hip::is_arithmetic<__T2>::value &&
                                 __hip::is_arithmetic<__T3>::value,
                             double>::type
    fma(__T1 __x, __T2 __y, __T3 __z) {
  return ::fma((double)__x, (double)__y, (double)__z);
 }
 #endif
 template <typename __T>
 __DEVICE__ __CONSTEXPR__
    typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
    frexp(__T __x, int *__exp) {
  return ::frexp((double)__x, __exp);
 }
 template <typename __T>
 __DEVICE__ __CONSTEXPR__
    typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
    ldexp(__T __x, int __exp) {
  return ::ldexp((double)__x, __exp);
 }
 template <typename __T>
 __DEVICE__ __CONSTEXPR__
    typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
    modf(__T __x, double *__exp) {
  return ::modf((double)__x, __exp);
 }
 #if __cplusplus >= 201103L
 template <typename __T1, typename __T2>
 __DEVICE__ __CONSTEXPR__
    typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
                                 __hip::is_arithmetic<__T2>::value,
                             typename __hip::__promote<__T1, __T2>::type>::type
    remquo(__T1 __x, __T2 __y, int *__quo) {
  typedef typename __hip::__promote<__T1, __T2>::type __result_type;
  return ::remquo((__result_type)__x, (__result_type)__y, __quo);
 }
 #else
 template <typename __T1, typename __T2>
 __DEVICE__ __CONSTEXPR__
    typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
                                 __hip::is_arithmetic<__T2>::value,
                             double>::type
    remquo(__T1 __x, __T2 __y, int *__quo) {
  return ::remquo((double)__x, (double)__y, __quo);
 }
 #endif
 template <typename __T>
 __DEVICE__ __CONSTEXPR__
    typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
    scalbln(__T __x, long int __exp) {
  return ::scalbln((double)__x, __exp);
 }
 template <typename __T>
 __DEVICE__ __CONSTEXPR__
    typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
    scalbn(__T __x, int __exp) {
  return ::scalbn((double)__x, __exp);
 }
 #pragma pop_macro("__HIP_OVERLOAD1")
 #pragma pop_macro("__HIP_OVERLOAD2")
 // END HIP_OVERLOAD
 // END DEF_FUN and HIP_OVERLOAD
 #endif // ifndef __OPENMP_AMDGCN__
 #endif // defined(__cplusplus)
 #ifndef __OPENMP_AMDGCN__
 // Define these overloads inside the namespace our standard library uses.
 #if !defined(__HIPCC_RTC__)
 #ifdef _LIBCPP_BEGIN_NAMESPACE_STD
 _LIBCPP_BEGIN_NAMESPACE_STD
 #else
 namespace std {
 #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #endif // _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #endif // _LIBCPP_BEGIN_NAMESPACE_STD
 // Pull the new overloads we defined above into namespace std.
 // using ::abs; - This may be considered for C++.
 using ::acos;
 using ::acosh;
 using ::asin;
 using ::asinh;
 using ::atan;
 using ::atan2;
 using ::atanh;
 using ::cbrt;
 using ::ceil;
 using ::copysign;
 using ::cos;
 using ::cosh;
 using ::erf;
 using ::erfc;
 using ::exp;
 using ::exp2;
 using ::expm1;
 using ::fabs;
 using ::fdim;
 using ::floor;
 using ::fma;
 using ::fmax;
 using ::fmin;
 using ::fmod;
 using ::fpclassify;
 using ::frexp;
 using ::hypot;
 using ::ilogb;
 using ::isfinite;
 using ::isgreater;
 using ::isgreaterequal;
 using ::isless;
 using ::islessequal;
 using ::islessgreater;
 using ::isnormal;
 using ::isunordered;
 using ::ldexp;
 using ::lgamma;
 using ::llrint;
 using ::llround;
 using ::log;
 using ::log10;
 using ::log1p;
 using ::log2;
 using ::logb;
 using ::lrint;
 using ::lround;
 using ::modf;
 // using ::nan; - This may be considered for C++.
 // using ::nanf; - This may be considered for C++.
 // using ::nanl; - This is not yet defined.
 using ::nearbyint;
 using ::nextafter;
 // using ::nexttoward; - Omit this since we do not have a definition.
 using ::pow;
 using ::remainder;
 using ::remquo;
 using ::rint;
 using ::round;
 using ::scalbln;
 using ::scalbn;
 using ::signbit;
 using ::sin;
 using ::sinh;
 using ::sqrt;
 using ::tan;
 using ::tanh;
 using ::tgamma;
 using ::trunc;
 // Well this is fun: We need to pull these symbols in for libc++, but we can't
 // pull them in with libstdc++, because its ::isinf and ::isnan are different
 // than its std::isinf and std::isnan.
 #ifndef __GLIBCXX__
 using ::isinf;
 using ::isnan;
 #endif
 // Finally, pull the "foobarf" functions that HIP defines into std.
 using ::acosf;
 using ::acoshf;
 using ::asinf;
 using ::asinhf;
 using ::atan2f;
 using ::atanf;
 using ::atanhf;
 using ::cbrtf;
 using ::ceilf;
 using ::copysignf;
 using ::cosf;
 using ::coshf;
 using ::erfcf;
 using ::erff;
 using ::exp2f;
 using ::expf;
 using ::expm1f;
 using ::fabsf;
 using ::fdimf;
 using ::floorf;
 using ::fmaf;
 using ::fmaxf;
 using ::fminf;
 using ::fmodf;
 using ::frexpf;
 using ::hypotf;
 using ::ilogbf;
 using ::ldexpf;
 using ::lgammaf;
 using ::llrintf;
 using ::llroundf;
 using ::log10f;
 using ::log1pf;
 using ::log2f;
 using ::logbf;
 using ::logf;
 using ::lrintf;
 using ::lroundf;
 using ::modff;
 using ::nearbyintf;
 using ::nextafterf;
 // using ::nexttowardf; - Omit this since we do not have a definition.
 using ::powf;
 using ::remainderf;
 using ::remquof;
 using ::rintf;
 using ::roundf;
 using ::scalblnf;
 using ::scalbnf;
 using ::sinf;
 using ::sinhf;
 using ::sqrtf;
 using ::tanf;
 using ::tanhf;
 using ::tgammaf;
 using ::truncf;
 #ifdef _LIBCPP_END_NAMESPACE_STD
 _LIBCPP_END_NAMESPACE_STD
 #else
 #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
 _GLIBCXX_END_NAMESPACE_VERSION
 #endif // _GLIBCXX_BEGIN_NAMESPACE_VERSION
 } // namespace std
 #endif // _LIBCPP_END_NAMESPACE_STD
 #endif // !defined(__HIPCC_RTC__)
 // Define device-side math functions from <ymath.h> on MSVC.
 #if !defined(__HIPCC_RTC__)
 #if defined(_MSC_VER)
 // Before VS2019, `<ymath.h>` is also included in `<limits>` and other headers.
 // But, from VS2019, it's only included in `<complex>`. Need to include
 // `<ymath.h>` here to ensure C functions declared there won't be markded as
 // `__host__` and `__device__` through `<complex>` wrapper.
 #include <ymath.h>
 #if defined(__cplusplus)
 extern "C" {
 #endif // defined(__cplusplus)
 __DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) double _Cosh(double x,
                                                                    double y) {
  return cosh(x) * y;
 }
 __DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) float _FCosh(float x,
                                                                    float y) {
  return coshf(x) * y;
 }
 __DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) short _Dtest(double *p) {
  return fpclassify(*p);
 }
 __DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) short _FDtest(float *p) {
  return fpclassify(*p);
 }
 __DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) double _Sinh(double x,
                                                                    double y) {
  return sinh(x) * y;
 }
 __DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) float _FSinh(float x,
                                                                    float y) {
  return sinhf(x) * y;
 }
 #if defined(__cplusplus)
 }
 #endif // defined(__cplusplus)
 #endif // defined(_MSC_VER)
 #endif // !defined(__HIPCC_RTC__)
 #endif // ifndef __OPENMP_AMDGCN__
 #pragma pop_macro("__DEVICE__")
 #pragma pop_macro("__CONSTEXPR__")
 #endif // __CLANG_HIP_CMATH_H__
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__clang_hip_libdevice_declares.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__clang_hip_libdevice_declares.h
@@ -1,345 +0,0 @@
 /*===---- __clang_hip_libdevice_declares.h - HIP device library decls -------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef __CLANG_HIP_LIBDEVICE_DECLARES_H__
 #define __CLANG_HIP_LIBDEVICE_DECLARES_H__
 #if !defined(__HIPCC_RTC__) && __has_include("hip/hip_version.h")
 #include "hip/hip_version.h"
 #endif // __has_include("hip/hip_version.h")
 #define __PRIVATE_AS __attribute__((opencl_private))
 #ifdef __cplusplus
 extern "C" {
 #endif
 // BEGIN FLOAT
 __device__ __attribute__((const)) float __ocml_acos_f32(float);
 __device__ __attribute__((pure)) float __ocml_acosh_f32(float);
 __device__ __attribute__((const)) float __ocml_asin_f32(float);
 __device__ __attribute__((pure)) float __ocml_asinh_f32(float);
 __device__ __attribute__((const)) float __ocml_atan2_f32(float, float);
 __device__ __attribute__((const)) float __ocml_atan_f32(float);
 __device__ __attribute__((pure)) float __ocml_atanh_f32(float);
 __device__ __attribute__((pure)) float __ocml_cbrt_f32(float);
 __device__ __attribute__((const)) float __ocml_ceil_f32(float);
 __device__ __attribute__((const)) __device__ float __ocml_copysign_f32(float,
                                                                       float);
 __device__ float __ocml_cos_f32(float);
 __device__ float __ocml_native_cos_f32(float);
 __device__ __attribute__((pure)) __device__ float __ocml_cosh_f32(float);
 __device__ float __ocml_cospi_f32(float);
 __device__ float __ocml_i0_f32(float);
 __device__ float __ocml_i1_f32(float);
 __device__ __attribute__((pure)) float __ocml_erfc_f32(float);
 __device__ __attribute__((pure)) float __ocml_erfcinv_f32(float);
 __device__ __attribute__((pure)) float __ocml_erfcx_f32(float);
 __device__ __attribute__((pure)) float __ocml_erf_f32(float);
 __device__ __attribute__((pure)) float __ocml_erfinv_f32(float);
 __device__ __attribute__((pure)) float __ocml_exp10_f32(float);
 __device__ __attribute__((pure)) float __ocml_native_exp10_f32(float);
 __device__ __attribute__((pure)) float __ocml_exp2_f32(float);
 __device__ __attribute__((pure)) float __ocml_exp_f32(float);
 __device__ __attribute__((pure)) float __ocml_native_exp_f32(float);
 __device__ __attribute__((pure)) float __ocml_expm1_f32(float);
 __device__ __attribute__((const)) float __ocml_fabs_f32(float);
 __device__ __attribute__((const)) float __ocml_fdim_f32(float, float);
 __device__ __attribute__((const)) float __ocml_floor_f32(float);
 __device__ __attribute__((const)) float __ocml_fma_f32(float, float, float);
 __device__ __attribute__((const)) float __ocml_fmax_f32(float, float);
 __device__ __attribute__((const)) float __ocml_fmin_f32(float, float);
 __device__ __attribute__((const)) __device__ float __ocml_fmod_f32(float,
                                                                   float);
 __device__ float __ocml_frexp_f32(float, __PRIVATE_AS int *);
 __device__ __attribute__((const)) float __ocml_hypot_f32(float, float);
 __device__ __attribute__((const)) int __ocml_ilogb_f32(float);
 __device__ __attribute__((const)) int __ocml_isfinite_f32(float);
 __device__ __attribute__((const)) int __ocml_isinf_f32(float);
 __device__ __attribute__((const)) int __ocml_isnan_f32(float);
 __device__ float __ocml_j0_f32(float);
 __device__ float __ocml_j1_f32(float);
 __device__ __attribute__((const)) float __ocml_ldexp_f32(float, int);
 __device__ float __ocml_lgamma_f32(float);
 __device__ __attribute__((pure)) float __ocml_log10_f32(float);
 __device__ __attribute__((pure)) float __ocml_native_log10_f32(float);
 __device__ __attribute__((pure)) float __ocml_log1p_f32(float);
 __device__ __attribute__((pure)) float __ocml_log2_f32(float);
 __device__ __attribute__((pure)) float __ocml_native_log2_f32(float);
 __device__ __attribute__((const)) float __ocml_logb_f32(float);
 __device__ __attribute__((pure)) float __ocml_log_f32(float);
 __device__ __attribute__((pure)) float __ocml_native_log_f32(float);
 __device__ float __ocml_modf_f32(float, __PRIVATE_AS float *);
 __device__ __attribute__((const)) float __ocml_nearbyint_f32(float);
 __device__ __attribute__((const)) float __ocml_nextafter_f32(float, float);
 __device__ __attribute__((const)) float __ocml_len3_f32(float, float, float);
 __device__ __attribute__((const)) float __ocml_len4_f32(float, float, float,
                                                        float);
 __device__ __attribute__((pure)) float __ocml_ncdf_f32(float);
 __device__ __attribute__((pure)) float __ocml_ncdfinv_f32(float);
 __device__ __attribute__((pure)) float __ocml_pow_f32(float, float);
 __device__ __attribute__((pure)) float __ocml_pown_f32(float, int);
 __device__ __attribute__((pure)) float __ocml_rcbrt_f32(float);
 __device__ __attribute__((const)) float __ocml_remainder_f32(float, float);
 __device__ float __ocml_remquo_f32(float, float, __PRIVATE_AS int *);
 __device__ __attribute__((const)) float __ocml_rhypot_f32(float, float);
 __device__ __attribute__((const)) float __ocml_rint_f32(float);
 __device__ __attribute__((const)) float __ocml_rlen3_f32(float, float, float);
 __device__ __attribute__((const)) float __ocml_rlen4_f32(float, float, float,
                                                         float);
 __device__ __attribute__((const)) float __ocml_round_f32(float);
 __device__ __attribute__((pure)) float __ocml_rsqrt_f32(float);
 __device__ __attribute__((const)) float __ocml_scalb_f32(float, float);
 __device__ __attribute__((const)) float __ocml_scalbn_f32(float, int);
 __device__ __attribute__((const)) int __ocml_signbit_f32(float);
 __device__ float __ocml_sincos_f32(float, __PRIVATE_AS float *);
 __device__ float __ocml_sincospi_f32(float, __PRIVATE_AS float *);
 __device__ float __ocml_sin_f32(float);
 __device__ float __ocml_native_sin_f32(float);
 __device__ __attribute__((pure)) float __ocml_sinh_f32(float);
 __device__ float __ocml_sinpi_f32(float);
 __device__ __attribute__((const)) float __ocml_sqrt_f32(float);
 __device__ __attribute__((const)) float __ocml_native_sqrt_f32(float);
 __device__ float __ocml_tan_f32(float);
 __device__ __attribute__((pure)) float __ocml_tanh_f32(float);
 __device__ float __ocml_tgamma_f32(float);
 __device__ __attribute__((const)) float __ocml_trunc_f32(float);
 __device__ float __ocml_y0_f32(float);
 __device__ float __ocml_y1_f32(float);
 // BEGIN INTRINSICS
 __device__ __attribute__((const)) float __ocml_add_rte_f32(float, float);
 __device__ __attribute__((const)) float __ocml_add_rtn_f32(float, float);
 __device__ __attribute__((const)) float __ocml_add_rtp_f32(float, float);
 __device__ __attribute__((const)) float __ocml_add_rtz_f32(float, float);
 __device__ __attribute__((const)) float __ocml_sub_rte_f32(float, float);
 __device__ __attribute__((const)) float __ocml_sub_rtn_f32(float, float);
 __device__ __attribute__((const)) float __ocml_sub_rtp_f32(float, float);
 __device__ __attribute__((const)) float __ocml_sub_rtz_f32(float, float);
 __device__ __attribute__((const)) float __ocml_mul_rte_f32(float, float);
 __device__ __attribute__((const)) float __ocml_mul_rtn_f32(float, float);
 __device__ __attribute__((const)) float __ocml_mul_rtp_f32(float, float);
 __device__ __attribute__((const)) float __ocml_mul_rtz_f32(float, float);
 __device__ __attribute__((const)) float __ocml_div_rte_f32(float, float);
 __device__ __attribute__((const)) float __ocml_div_rtn_f32(float, float);
 __device__ __attribute__((const)) float __ocml_div_rtp_f32(float, float);
 __device__ __attribute__((const)) float __ocml_div_rtz_f32(float, float);
 __device__ __attribute__((const)) float __ocml_sqrt_rte_f32(float);
 __device__ __attribute__((const)) float __ocml_sqrt_rtn_f32(float);
 __device__ __attribute__((const)) float __ocml_sqrt_rtp_f32(float);
 __device__ __attribute__((const)) float __ocml_sqrt_rtz_f32(float);
 __device__ __attribute__((const)) float __ocml_fma_rte_f32(float, float, float);
 __device__ __attribute__((const)) float __ocml_fma_rtn_f32(float, float, float);
 __device__ __attribute__((const)) float __ocml_fma_rtp_f32(float, float, float);
 __device__ __attribute__((const)) float __ocml_fma_rtz_f32(float, float, float);
 // END INTRINSICS
 // END FLOAT
 // BEGIN DOUBLE
 __device__ __attribute__((const)) double __ocml_acos_f64(double);
 __device__ __attribute__((pure)) double __ocml_acosh_f64(double);
 __device__ __attribute__((const)) double __ocml_asin_f64(double);
 __device__ __attribute__((pure)) double __ocml_asinh_f64(double);
 __device__ __attribute__((const)) double __ocml_atan2_f64(double, double);
 __device__ __attribute__((const)) double __ocml_atan_f64(double);
 __device__ __attribute__((pure)) double __ocml_atanh_f64(double);
 __device__ __attribute__((pure)) double __ocml_cbrt_f64(double);
 __device__ __attribute__((const)) double __ocml_ceil_f64(double);
 __device__ __attribute__((const)) double __ocml_copysign_f64(double, double);
 __device__ double __ocml_cos_f64(double);
 __device__ __attribute__((pure)) double __ocml_cosh_f64(double);
 __device__ double __ocml_cospi_f64(double);
 __device__ double __ocml_i0_f64(double);
 __device__ double __ocml_i1_f64(double);
 __device__ __attribute__((pure)) double __ocml_erfc_f64(double);
 __device__ __attribute__((pure)) double __ocml_erfcinv_f64(double);
 __device__ __attribute__((pure)) double __ocml_erfcx_f64(double);
 __device__ __attribute__((pure)) double __ocml_erf_f64(double);
 __device__ __attribute__((pure)) double __ocml_erfinv_f64(double);
 __device__ __attribute__((pure)) double __ocml_exp10_f64(double);
 __device__ __attribute__((pure)) double __ocml_exp2_f64(double);
 __device__ __attribute__((pure)) double __ocml_exp_f64(double);
 __device__ __attribute__((pure)) double __ocml_expm1_f64(double);
 __device__ __attribute__((const)) double __ocml_fabs_f64(double);
 __device__ __attribute__((const)) double __ocml_fdim_f64(double, double);
 __device__ __attribute__((const)) double __ocml_floor_f64(double);
 __device__ __attribute__((const)) double __ocml_fma_f64(double, double, double);
 __device__ __attribute__((const)) double __ocml_fmax_f64(double, double);
 __device__ __attribute__((const)) double __ocml_fmin_f64(double, double);
 __device__ __attribute__((const)) double __ocml_fmod_f64(double, double);
 __device__ double __ocml_frexp_f64(double, __PRIVATE_AS int *);
 __device__ __attribute__((const)) double __ocml_hypot_f64(double, double);
 __device__ __attribute__((const)) int __ocml_ilogb_f64(double);
 __device__ __attribute__((const)) int __ocml_isfinite_f64(double);
 __device__ __attribute__((const)) int __ocml_isinf_f64(double);
 __device__ __attribute__((const)) int __ocml_isnan_f64(double);
 __device__ double __ocml_j0_f64(double);
 __device__ double __ocml_j1_f64(double);
 __device__ __attribute__((const)) double __ocml_ldexp_f64(double, int);
 __device__ double __ocml_lgamma_f64(double);
 __device__ __attribute__((pure)) double __ocml_log10_f64(double);
 __device__ __attribute__((pure)) double __ocml_log1p_f64(double);
 __device__ __attribute__((pure)) double __ocml_log2_f64(double);
 __device__ __attribute__((const)) double __ocml_logb_f64(double);
 __device__ __attribute__((pure)) double __ocml_log_f64(double);
 __device__ double __ocml_modf_f64(double, __PRIVATE_AS double *);
 __device__ __attribute__((const)) double __ocml_nearbyint_f64(double);
 __device__ __attribute__((const)) double __ocml_nextafter_f64(double, double);
 __device__ __attribute__((const)) double __ocml_len3_f64(double, double,
                                                         double);
 __device__ __attribute__((const)) double __ocml_len4_f64(double, double, double,
                                                         double);
 __device__ __attribute__((pure)) double __ocml_ncdf_f64(double);
 __device__ __attribute__((pure)) double __ocml_ncdfinv_f64(double);
 __device__ __attribute__((pure)) double __ocml_pow_f64(double, double);
 __device__ __attribute__((pure)) double __ocml_pown_f64(double, int);
 __device__ __attribute__((pure)) double __ocml_rcbrt_f64(double);
 __device__ __attribute__((const)) double __ocml_remainder_f64(double, double);
 __device__ double __ocml_remquo_f64(double, double, __PRIVATE_AS int *);
 __device__ __attribute__((const)) double __ocml_rhypot_f64(double, double);
 __device__ __attribute__((const)) double __ocml_rint_f64(double);
 __device__ __attribute__((const)) double __ocml_rlen3_f64(double, double,
                                                          double);
 __device__ __attribute__((const)) double __ocml_rlen4_f64(double, double,
                                                          double, double);
 __device__ __attribute__((const)) double __ocml_round_f64(double);
 __device__ __attribute__((pure)) double __ocml_rsqrt_f64(double);
 __device__ __attribute__((const)) double __ocml_scalb_f64(double, double);
 __device__ __attribute__((const)) double __ocml_scalbn_f64(double, int);
 __device__ __attribute__((const)) int __ocml_signbit_f64(double);
 __device__ double __ocml_sincos_f64(double, __PRIVATE_AS double *);
 __device__ double __ocml_sincospi_f64(double, __PRIVATE_AS double *);
 __device__ double __ocml_sin_f64(double);
 __device__ __attribute__((pure)) double __ocml_sinh_f64(double);
 __device__ double __ocml_sinpi_f64(double);
 __device__ __attribute__((const)) double __ocml_sqrt_f64(double);
 __device__ double __ocml_tan_f64(double);
 __device__ __attribute__((pure)) double __ocml_tanh_f64(double);
 __device__ double __ocml_tgamma_f64(double);
 __device__ __attribute__((const)) double __ocml_trunc_f64(double);
 __device__ double __ocml_y0_f64(double);
 __device__ double __ocml_y1_f64(double);
 // BEGIN INTRINSICS
 __device__ __attribute__((const)) double __ocml_add_rte_f64(double, double);
 __device__ __attribute__((const)) double __ocml_add_rtn_f64(double, double);
 __device__ __attribute__((const)) double __ocml_add_rtp_f64(double, double);
 __device__ __attribute__((const)) double __ocml_add_rtz_f64(double, double);
 __device__ __attribute__((const)) double __ocml_sub_rte_f64(double, double);
 __device__ __attribute__((const)) double __ocml_sub_rtn_f64(double, double);
 __device__ __attribute__((const)) double __ocml_sub_rtp_f64(double, double);
 __device__ __attribute__((const)) double __ocml_sub_rtz_f64(double, double);
 __device__ __attribute__((const)) double __ocml_mul_rte_f64(double, double);
 __device__ __attribute__((const)) double __ocml_mul_rtn_f64(double, double);
 __device__ __attribute__((const)) double __ocml_mul_rtp_f64(double, double);
 __device__ __attribute__((const)) double __ocml_mul_rtz_f64(double, double);
 __device__ __attribute__((const)) double __ocml_div_rte_f64(double, double);
 __device__ __attribute__((const)) double __ocml_div_rtn_f64(double, double);
 __device__ __attribute__((const)) double __ocml_div_rtp_f64(double, double);
 __device__ __attribute__((const)) double __ocml_div_rtz_f64(double, double);
 __device__ __attribute__((const)) double __ocml_sqrt_rte_f64(double);
 __device__ __attribute__((const)) double __ocml_sqrt_rtn_f64(double);
 __device__ __attribute__((const)) double __ocml_sqrt_rtp_f64(double);
 __device__ __attribute__((const)) double __ocml_sqrt_rtz_f64(double);
 __device__ __attribute__((const)) double __ocml_fma_rte_f64(double, double,
                                                            double);
 __device__ __attribute__((const)) double __ocml_fma_rtn_f64(double, double,
                                                            double);
 __device__ __attribute__((const)) double __ocml_fma_rtp_f64(double, double,
                                                            double);
 __device__ __attribute__((const)) double __ocml_fma_rtz_f64(double, double,
                                                            double);
 __device__ __attribute__((const)) _Float16 __ocml_ceil_f16(_Float16);
 __device__ _Float16 __ocml_cos_f16(_Float16);
 __device__ __attribute__((const)) _Float16 __ocml_cvtrtn_f16_f32(float);
 __device__ __attribute__((const)) _Float16 __ocml_cvtrtp_f16_f32(float);
 __device__ __attribute__((const)) _Float16 __ocml_cvtrtz_f16_f32(float);
 __device__ __attribute__((pure)) _Float16 __ocml_exp_f16(_Float16);
 __device__ __attribute__((pure)) _Float16 __ocml_exp10_f16(_Float16);
 __device__ __attribute__((pure)) _Float16 __ocml_exp2_f16(_Float16);
 __device__ __attribute__((const)) _Float16 __ocml_floor_f16(_Float16);
 __device__ __attribute__((const)) _Float16 __ocml_fma_f16(_Float16, _Float16,
                                                          _Float16);
 __device__ __attribute__((const)) _Float16 __ocml_fmax_f16(_Float16, _Float16);
 __device__ __attribute__((const)) _Float16 __ocml_fmin_f16(_Float16, _Float16);
 __device__ __attribute__((const)) _Float16 __ocml_fabs_f16(_Float16);
 __device__ __attribute__((const)) int __ocml_isinf_f16(_Float16);
 __device__ __attribute__((const)) int __ocml_isnan_f16(_Float16);
 __device__ __attribute__((pure)) _Float16 __ocml_log_f16(_Float16);
 __device__ __attribute__((pure)) _Float16 __ocml_log10_f16(_Float16);
 __device__ __attribute__((pure)) _Float16 __ocml_log2_f16(_Float16);
 __device__ __attribute__((const)) _Float16 __ocml_rint_f16(_Float16);
 __device__ __attribute__((const)) _Float16 __ocml_rsqrt_f16(_Float16);
 __device__ _Float16 __ocml_sin_f16(_Float16);
 __device__ __attribute__((const)) _Float16 __ocml_sqrt_f16(_Float16);
 __device__ __attribute__((const)) _Float16 __ocml_trunc_f16(_Float16);
 __device__ __attribute__((pure)) _Float16 __ocml_pown_f16(_Float16, int);
 typedef _Float16 __2f16 __attribute__((ext_vector_type(2)));
 typedef short __2i16 __attribute__((ext_vector_type(2)));
 // We need to match C99's bool and get an i1 in the IR.
 #ifdef __cplusplus
 typedef bool __ockl_bool;
 #else
 typedef _Bool __ockl_bool;
 #endif
 __device__ __attribute__((const)) float __ockl_fdot2(__2f16 a, __2f16 b,
                                                     float c, __ockl_bool s);
 __device__ __attribute__((const)) __2f16 __ocml_ceil_2f16(__2f16);
 __device__ __attribute__((const)) __2f16 __ocml_fabs_2f16(__2f16);
 __device__ __2f16 __ocml_cos_2f16(__2f16);
 __device__ __attribute__((pure)) __2f16 __ocml_exp_2f16(__2f16);
 __device__ __attribute__((pure)) __2f16 __ocml_exp10_2f16(__2f16);
 __device__ __attribute__((pure)) __2f16 __ocml_exp2_2f16(__2f16);
 __device__ __attribute__((const)) __2f16 __ocml_floor_2f16(__2f16);
 __device__ __attribute__((const))
 __2f16 __ocml_fma_2f16(__2f16, __2f16, __2f16);
 __device__ __attribute__((const)) __2i16 __ocml_isinf_2f16(__2f16);
 __device__ __attribute__((const)) __2i16 __ocml_isnan_2f16(__2f16);
 __device__ __attribute__((pure)) __2f16 __ocml_log_2f16(__2f16);
 __device__ __attribute__((pure)) __2f16 __ocml_log10_2f16(__2f16);
 __device__ __attribute__((pure)) __2f16 __ocml_log2_2f16(__2f16);
 #if HIP_VERSION_MAJOR * 100 + HIP_VERSION_MINOR >= 560
 #define __DEPRECATED_SINCE_HIP_560(X) __attribute__((deprecated(X)))
 #else
 #define __DEPRECATED_SINCE_HIP_560(X)
 #endif
 // Deprecated, should be removed when rocm releases using it are no longer
 // relevant.
 __DEPRECATED_SINCE_HIP_560("use ((_Float16)1.0) / ")
 __device__ inline _Float16 __llvm_amdgcn_rcp_f16(_Float16 x) {
  return ((_Float16)1.0f) / x;
 }
 __DEPRECATED_SINCE_HIP_560("use ((__2f16)1.0) / ")
 __device__ inline __2f16
 __llvm_amdgcn_rcp_2f16(__2f16 __x)
 {
  return ((__2f16)1.0f) / __x;
 }
 #undef __DEPRECATED_SINCE_HIP_560
 __device__ __attribute__((const)) __2f16 __ocml_rint_2f16(__2f16);
 __device__ __attribute__((const)) __2f16 __ocml_rsqrt_2f16(__2f16);
 __device__ __2f16 __ocml_sin_2f16(__2f16);
 __device__ __attribute__((const)) __2f16 __ocml_sqrt_2f16(__2f16);
 __device__ __attribute__((const)) __2f16 __ocml_trunc_2f16(__2f16);
 __device__ __attribute__((const)) __2f16 __ocml_pown_2f16(__2f16, __2i16);
 #ifdef __cplusplus
 } // extern "C"
 #endif
 #endif // __CLANG_HIP_LIBDEVICE_DECLARES_H__
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__clang_hip_math.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__clang_hip_math.h
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__clang_hip_runtime_wrapper.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__clang_hip_runtime_wrapper.h
@@ -1,162 +0,0 @@
 /*===---- __clang_hip_runtime_wrapper.h - HIP runtime support ---------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 /*
 * WARNING: This header is intended to be directly -include'd by
 * the compiler and is not supposed to be included by users.
 *
 */
 #ifndef __CLANG_HIP_RUNTIME_WRAPPER_H__
 #define __CLANG_HIP_RUNTIME_WRAPPER_H__
 #if __HIP__
 #define __host__ __attribute__((host))
 #define __device__ __attribute__((device))
 #define __global__ __attribute__((global))
 #define __shared__ __attribute__((shared))
 #define __constant__ __attribute__((constant))
 #define __managed__ __attribute__((managed))
 #if !defined(__cplusplus) || __cplusplus < 201103L
  #define nullptr NULL;
 #endif
 #ifdef __cplusplus
 extern "C" {
  __attribute__((__visibility__("default")))
  __attribute__((weak))
  __attribute__((noreturn))
  __device__ void __cxa_pure_virtual(void) {
    __builtin_trap();
  }
  __attribute__((__visibility__("default")))
  __attribute__((weak))
  __attribute__((noreturn))
  __device__ void __cxa_deleted_virtual(void) {
    __builtin_trap();
  }
 }
 #endif //__cplusplus
 #if !defined(__HIPCC_RTC__)
 #if __has_include("hip/hip_version.h")
 #include "hip/hip_version.h"
 #endif // __has_include("hip/hip_version.h")
 #endif // __HIPCC_RTC__
 typedef __SIZE_TYPE__ __hip_size_t;
 #ifdef __cplusplus
 extern "C" {
 #endif //__cplusplus
 #if HIP_VERSION_MAJOR * 100 + HIP_VERSION_MINOR >= 405
 __device__ unsigned long long __ockl_dm_alloc(unsigned long long __size);
 __device__ void __ockl_dm_dealloc(unsigned long long __addr);
 #if __has_feature(address_sanitizer)
 __device__ unsigned long long __asan_malloc_impl(unsigned long long __size,
                                                 unsigned long long __pc);
 __device__ void __asan_free_impl(unsigned long long __addr,
                                 unsigned long long __pc);
 __attribute__((noinline, weak)) __device__ void *malloc(__hip_size_t __size) {
  unsigned long long __pc = (unsigned long long)__builtin_return_address(0);
  return (void *)__asan_malloc_impl(__size, __pc);
 }
 __attribute__((noinline, weak)) __device__ void free(void *__ptr) {
  unsigned long long __pc = (unsigned long long)__builtin_return_address(0);
  __asan_free_impl((unsigned long long)__ptr, __pc);
 }
 #else // __has_feature(address_sanitizer)
 __attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
  return (void *) __ockl_dm_alloc(__size);
 }
 __attribute__((weak)) inline __device__ void free(void *__ptr) {
  __ockl_dm_dealloc((unsigned long long)__ptr);
 }
 #endif // __has_feature(address_sanitizer)
 #else  // HIP version check
 #if __HIP_ENABLE_DEVICE_MALLOC__
 __device__ void *__hip_malloc(__hip_size_t __size);
 __device__ void *__hip_free(void *__ptr);
 __attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
  return __hip_malloc(__size);
 }
 __attribute__((weak)) inline __device__ void free(void *__ptr) {
  __hip_free(__ptr);
 }
 #else  // __HIP_ENABLE_DEVICE_MALLOC__
 __attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
  __builtin_trap();
  return (void *)0;
 }
 __attribute__((weak)) inline __device__ void free(void *__ptr) {
  __builtin_trap();
 }
 #endif // __HIP_ENABLE_DEVICE_MALLOC__
 #endif // HIP version check
 #ifdef __cplusplus
 } // extern "C"
 #endif //__cplusplus
 #if !defined(__HIPCC_RTC__)
 #include <cmath>
 #include <cstdlib>
 #include <stdlib.h>
 #if __has_include("hip/hip_version.h")
 #include "hip/hip_version.h"
 #endif // __has_include("hip/hip_version.h")
 #else
 typedef __SIZE_TYPE__ size_t;
 // Define macros which are needed to declare HIP device API's without standard
 // C/C++ headers. This is for readability so that these API's can be written
 // the same way as non-hipRTC use case. These macros need to be popped so that
 // they do not pollute users' name space.
 #pragma push_macro("NULL")
 #pragma push_macro("uint32_t")
 #pragma push_macro("uint64_t")
 #pragma push_macro("CHAR_BIT")
 #pragma push_macro("INT_MAX")
 #pragma push_macro("INT_MIN")
 #define NULL (void *)0
 #define uint32_t __UINT32_TYPE__
 #define uint64_t __UINT64_TYPE__
 #define CHAR_BIT __CHAR_BIT__
 #define INT_MAX __INTMAX_MAX__
 #define INT_MIN (-__INT_MAX__ - 1)
 #endif // __HIPCC_RTC__
 #include <__clang_hip_libdevice_declares.h>
 #include <__clang_hip_math.h>
 #include <__clang_hip_stdlib.h>
 #if defined(__HIPCC_RTC__)
 #include <__clang_hip_cmath.h>
 #else
 #include <__clang_cuda_math_forward_declares.h>
 #include <__clang_hip_cmath.h>
 #include <__clang_cuda_complex_builtins.h>
 #include <algorithm>
 #include <complex>
 #include <new>
 #endif // __HIPCC_RTC__
 #define __CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__ 1
 #if defined(__HIPCC_RTC__)
 #pragma pop_macro("NULL")
 #pragma pop_macro("uint32_t")
 #pragma pop_macro("uint64_t")
 #pragma pop_macro("CHAR_BIT")
 #pragma pop_macro("INT_MAX")
 #pragma pop_macro("INT_MIN")
 #endif // __HIPCC_RTC__
 #endif // __HIP__
 #endif // __CLANG_HIP_RUNTIME_WRAPPER_H__
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__clang_hip_stdlib.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__clang_hip_stdlib.h
@@ -1,43 +0,0 @@
 /*===---- __clang_hip_stdlib.h - Device-side HIP math support --------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef __CLANG_HIP_STDLIB_H__
 #if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__)
 #error "This file is for HIP and OpenMP AMDGCN device compilation only."
 #endif
 #if !defined(__cplusplus)
 #include <limits.h>
 #ifdef __OPENMP_AMDGCN__
 #define __DEVICE__ static inline __attribute__((always_inline, nothrow))
 #else
 #define __DEVICE__ static __device__ inline __attribute__((always_inline))
 #endif
 __DEVICE__
 int abs(int __x) {
  int __sgn = __x >> (sizeof(int) * CHAR_BIT - 1);
  return (__x ^ __sgn) - __sgn;
 }
 __DEVICE__
 long labs(long __x) {
  long __sgn = __x >> (sizeof(long) * CHAR_BIT - 1);
  return (__x ^ __sgn) - __sgn;
 }
 __DEVICE__
 long long llabs(long long __x) {
  long long __sgn = __x >> (sizeof(long long) * CHAR_BIT - 1);
  return (__x ^ __sgn) - __sgn;
 }
 #endif // !defined(__cplusplus)
 #endif // #define __CLANG_HIP_STDLIB_H__
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__clang_spirv_builtins.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__clang_spirv_builtins.h
@@ -1,217 +0,0 @@
 /*===---- spirv_builtin_vars.h - SPIR-V built-in ---------------------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef __SPIRV_BUILTIN_VARS_H
 #define __SPIRV_BUILTIN_VARS_H
 #if __cplusplus >= 201103L
 #define __SPIRV_NOEXCEPT noexcept
 #else
 #define __SPIRV_NOEXCEPT
 #endif
 #pragma push_macro("__size_t")
 #pragma push_macro("__uint32_t")
 #pragma push_macro("__uint64_t")
 #define __size_t __SIZE_TYPE__
 #define __uint32_t __UINT32_TYPE__
 #define __SPIRV_overloadable __attribute__((overloadable))
 #define __SPIRV_convergent __attribute__((convergent))
 #define __SPIRV_inline __attribute__((always_inline))
 #define __global __attribute__((opencl_global))
 #define __local __attribute__((opencl_local))
 #define __private __attribute__((opencl_private))
 #define __constant __attribute__((opencl_constant))
 #ifdef __SYCL_DEVICE_ONLY__
 #define __generic
 #else
 #define __generic __attribute__((opencl_generic))
 #endif
 // Check if SPIR-V builtins are supported.
 // As the translator doesn't use the LLVM intrinsics (which would be emitted if
 // we use the SPIR-V builtins) we can't rely on the SPIRV32/SPIRV64 etc macros
 // to establish if we can use the builtin alias. We disable builtin altogether
 // if we do not intent to use the backend. So instead of use target macros, rely
 // on a __has_builtin test.
 #if (__has_builtin(__builtin_spirv_num_workgroups))
 #define __SPIRV_BUILTIN_ALIAS(builtin)                                         \
  __attribute__((clang_builtin_alias(builtin)))
 #else
 #define __SPIRV_BUILTIN_ALIAS(builtin)
 #endif
 // Builtin IDs and sizes
 extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_num_workgroups) __size_t
    __spirv_NumWorkgroups(int);
 extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_workgroup_size) __size_t
    __spirv_WorkgroupSize(int);
 extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_workgroup_id) __size_t
    __spirv_WorkgroupId(int);
 extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_local_invocation_id) __size_t
    __spirv_LocalInvocationId(int);
 extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_global_invocation_id) __size_t
    __spirv_GlobalInvocationId(int);
 extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_global_size) __size_t
    __spirv_GlobalSize(int);
 extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_global_offset) __size_t
    __spirv_GlobalOffset(int);
 extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_subgroup_size) __uint32_t
    __spirv_SubgroupSize();
 extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_subgroup_max_size) __uint32_t
    __spirv_SubgroupMaxSize();
 extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_num_subgroups) __uint32_t
    __spirv_NumSubgroups();
 extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_subgroup_id) __uint32_t
    __spirv_SubgroupId();
 extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_subgroup_local_invocation_id)
    __uint32_t __spirv_SubgroupLocalInvocationId();
 // OpGenericCastToPtrExplicit
 extern __SPIRV_overloadable
 __SPIRV_BUILTIN_ALIAS(__builtin_spirv_generic_cast_to_ptr_explicit)
 __global void *__spirv_GenericCastToPtrExplicit_ToGlobal(__generic void *,
                                                         int) __SPIRV_NOEXCEPT;
 extern __SPIRV_overloadable
 __SPIRV_BUILTIN_ALIAS(__builtin_spirv_generic_cast_to_ptr_explicit)
 __global const void *
 __spirv_GenericCastToPtrExplicit_ToGlobal(__generic const void *,
                                          int) __SPIRV_NOEXCEPT;
 extern __SPIRV_overloadable
 __SPIRV_BUILTIN_ALIAS(__builtin_spirv_generic_cast_to_ptr_explicit)
 __global volatile void *
 __spirv_GenericCastToPtrExplicit_ToGlobal(__generic volatile void *,
                                          int) __SPIRV_NOEXCEPT;
 extern __SPIRV_overloadable
 __SPIRV_BUILTIN_ALIAS(__builtin_spirv_generic_cast_to_ptr_explicit)
 __global const volatile void *
 __spirv_GenericCastToPtrExplicit_ToGlobal(__generic const volatile void *,
                                          int) __SPIRV_NOEXCEPT;
 extern __SPIRV_overloadable
 __SPIRV_BUILTIN_ALIAS(__builtin_spirv_generic_cast_to_ptr_explicit)
 __local void *__spirv_GenericCastToPtrExplicit_ToLocal(__generic void *,
                                                       int) __SPIRV_NOEXCEPT;
 extern __SPIRV_overloadable
 __SPIRV_BUILTIN_ALIAS(__builtin_spirv_generic_cast_to_ptr_explicit)
 __local const void *
 __spirv_GenericCastToPtrExplicit_ToLocal(__generic const void *,
                                         int) __SPIRV_NOEXCEPT;
 extern __SPIRV_overloadable
 __SPIRV_BUILTIN_ALIAS(__builtin_spirv_generic_cast_to_ptr_explicit)
 __local volatile void *
 __spirv_GenericCastToPtrExplicit_ToLocal(__generic volatile void *,
                                         int) __SPIRV_NOEXCEPT;
 extern __SPIRV_overloadable
 __SPIRV_BUILTIN_ALIAS(__builtin_spirv_generic_cast_to_ptr_explicit)
 __local const volatile void *
 __spirv_GenericCastToPtrExplicit_ToLocal(__generic const volatile void *,
                                         int) __SPIRV_NOEXCEPT;
 extern __SPIRV_overloadable
 __SPIRV_BUILTIN_ALIAS(__builtin_spirv_generic_cast_to_ptr_explicit)
 __private void *
 __spirv_GenericCastToPtrExplicit_ToPrivate(__generic void *,
                                           int) __SPIRV_NOEXCEPT;
 extern __SPIRV_overloadable
 __SPIRV_BUILTIN_ALIAS(__builtin_spirv_generic_cast_to_ptr_explicit)
 __private const void *
 __spirv_GenericCastToPtrExplicit_ToPrivate(__generic const void *,
                                           int) __SPIRV_NOEXCEPT;
 extern __SPIRV_overloadable
 __SPIRV_BUILTIN_ALIAS(__builtin_spirv_generic_cast_to_ptr_explicit)
 __private volatile void *
 __spirv_GenericCastToPtrExplicit_ToPrivate(__generic volatile void *,
                                           int) __SPIRV_NOEXCEPT;
 extern __SPIRV_overloadable
 __SPIRV_BUILTIN_ALIAS(__builtin_spirv_generic_cast_to_ptr_explicit)
 __private const volatile void *
 __spirv_GenericCastToPtrExplicit_ToPrivate(__generic const volatile void *,
                                           int) __SPIRV_NOEXCEPT;
 // OpGenericCastToPtr
 static __SPIRV_overloadable __SPIRV_inline __global void *
 __spirv_GenericCastToPtr_ToGlobal(__generic void *p, int) __SPIRV_NOEXCEPT {
  return (__global void *)p;
 }
 static __SPIRV_overloadable __SPIRV_inline __global const void *
 __spirv_GenericCastToPtr_ToGlobal(__generic const void *p,
                                  int) __SPIRV_NOEXCEPT {
  return (__global const void *)p;
 }
 static __SPIRV_overloadable __SPIRV_inline __global volatile void *
 __spirv_GenericCastToPtr_ToGlobal(__generic volatile void *p,
                                  int) __SPIRV_NOEXCEPT {
  return (__global volatile void *)p;
 }
 static __SPIRV_overloadable __SPIRV_inline __global const volatile void *
 __spirv_GenericCastToPtr_ToGlobal(__generic const volatile void *p,
                                  int) __SPIRV_NOEXCEPT {
  return (__global const volatile void *)p;
 }
 static __SPIRV_overloadable __SPIRV_inline __local void *
 __spirv_GenericCastToPtr_ToLocal(__generic void *p, int) __SPIRV_NOEXCEPT {
  return (__local void *)p;
 }
 static __SPIRV_overloadable __SPIRV_inline __local const void *
 __spirv_GenericCastToPtr_ToLocal(__generic const void *p,
                                 int) __SPIRV_NOEXCEPT {
  return (__local const void *)p;
 }
 static __SPIRV_overloadable __SPIRV_inline __local volatile void *
 __spirv_GenericCastToPtr_ToLocal(__generic volatile void *p,
                                 int) __SPIRV_NOEXCEPT {
  return (__local volatile void *)p;
 }
 static __SPIRV_overloadable __SPIRV_inline __local const volatile void *
 __spirv_GenericCastToPtr_ToLocal(__generic const volatile void *p,
                                 int) __SPIRV_NOEXCEPT {
  return (__local const volatile void *)p;
 }
 static __SPIRV_overloadable __SPIRV_inline __private void *
 __spirv_GenericCastToPtr_ToPrivate(__generic void *p, int) __SPIRV_NOEXCEPT {
  return (__private void *)p;
 }
 static __SPIRV_overloadable __SPIRV_inline __private const void *
 __spirv_GenericCastToPtr_ToPrivate(__generic const void *p,
                                   int) __SPIRV_NOEXCEPT {
  return (__private const void *)p;
 }
 static __SPIRV_overloadable __SPIRV_inline __private volatile void *
 __spirv_GenericCastToPtr_ToPrivate(__generic volatile void *p,
                                   int) __SPIRV_NOEXCEPT {
  return (__private volatile void *)p;
 }
 static __SPIRV_overloadable __SPIRV_inline __private const volatile void *
 __spirv_GenericCastToPtr_ToPrivate(__generic const volatile void *p,
                                   int) __SPIRV_NOEXCEPT {
  return (__private const volatile void *)p;
 }
 #pragma pop_macro("__size_t")
 #pragma pop_macro("__uint32_t")
 #pragma pop_macro("__uint64_t")
 #undef __SPIRV_overloadable
 #undef __SPIRV_convergent
 #undef __SPIRV_inline
 #undef __global
 #undef __local
 #undef __constant
 #undef __generic
 #undef __SPIRV_BUILTIN_ALIAS
 #undef __SPIRV_NOEXCEPT
 #endif /* __SPIRV_BUILTIN_VARS_H */
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/stdarg_gnuc_va_list.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/stdarg_gnuc_va_list.h
@@ -1,13 +0,0 @@
 /*===---- __stdarg___gnuc_va_list.h - Definition of __gnuc_va_list ---------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef __GNUC_VA_LIST
 #define __GNUC_VA_LIST
 typedef __builtin_va_list __gnuc_va_list;
 #endif
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/stdarg_va_copy.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/stdarg_va_copy.h
@@ -1,12 +0,0 @@
 /*===---- __stdarg___va_copy.h - Definition of __va_copy -------------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef __va_copy
 #define __va_copy(d, s) __builtin_va_copy(d, s)
 #endif
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__stdarg_header_macro.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__stdarg_header_macro.h
@@ -1,12 +0,0 @@
 /*===---- __stdarg_header_macro.h ------------------------------------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef __STDARG_H
 #define __STDARG_H
 #endif
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__stdarg_va_arg.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__stdarg_va_arg.h
@@ -1,22 +0,0 @@
 /*===---- __stdarg_va_arg.h - Definitions of va_start, va_arg, va_end-------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef va_arg
 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
 /* C23 uses a special builtin. */
 #define va_start(...) __builtin_c23_va_start(__VA_ARGS__)
 #else
 /* Versions before C23 do require the second parameter. */
 #define va_start(ap, param) __builtin_va_start(ap, param)
 #endif
 #define va_end(ap) __builtin_va_end(ap)
 #define va_arg(ap, type) __builtin_va_arg(ap, type)
 #endif
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__stdarg_va_copy.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__stdarg_va_copy.h
@@ -1,12 +0,0 @@
 /*===---- __stdarg_va_copy.h - Definition of va_copy------------------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef va_copy
 #define va_copy(dest, src) __builtin_va_copy(dest, src)
 #endif
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__stdarg_va_list.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__stdarg_va_list.h
@@ -1,13 +0,0 @@
 /*===---- __stdarg_va_list.h - Definition of va_list -----------------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef _VA_LIST
 #define _VA_LIST
 typedef __builtin_va_list va_list;
 #endif
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__stddef_header_macro.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__stddef_header_macro.h
@@ -1,12 +0,0 @@
 /*===---- __stddef_header_macro.h ------------------------------------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef __STDDEF_H
 #define __STDDEF_H
 #endif
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__stddef_max_align_t.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__stddef_max_align_t.h
@@ -1,27 +0,0 @@
 /*===---- __stddef_max_align_t.h - Definition of max_align_t ---------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef __CLANG_MAX_ALIGN_T_DEFINED
 #define __CLANG_MAX_ALIGN_T_DEFINED
 #if defined(_MSC_VER)
 typedef double max_align_t;
 #elif defined(__APPLE__)
 typedef long double max_align_t;
 #else
 // Define 'max_align_t' to match the GCC definition.
 typedef struct {
  long long __clang_max_align_nonce1
      __attribute__((__aligned__(__alignof__(long long))));
  long double __clang_max_align_nonce2
      __attribute__((__aligned__(__alignof__(long double))));
 } max_align_t;
 #endif
 #endif
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__stddef_null.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__stddef_null.h
@@ -1,29 +0,0 @@
 /*===---- __stddef_null.h - Definition of NULL -----------------------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #if !defined(NULL) || !__building_module(_Builtin_stddef)
 /* linux/stddef.h will define NULL to 0. glibc (and other) headers then define
 * __need_NULL and rely on stddef.h to redefine NULL to the correct value again.
 * Modules don't support redefining macros like that, but support that pattern
 * in the non-modules case.
 */
 #undef NULL
 #ifdef __cplusplus
 #if !defined(__MINGW32__) && !defined(_MSC_VER)
 #define NULL __null
 #else
 #define NULL 0
 #endif
 #else
 #define NULL ((void*)0)
 #endif
 #endif
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__stddef_nullptr_t.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__stddef_nullptr_t.h
@@ -1,29 +0,0 @@
 /*===---- __stddef_nullptr_t.h - Definition of nullptr_t -------------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 /*
 * When -fbuiltin-headers-in-system-modules is set this is a non-modular header
 * and needs to behave as if it was textual.
 */
 #if !defined(_NULLPTR_T) ||                                                    \
    (__has_feature(modules) && !__building_module(_Builtin_stddef))
 #define _NULLPTR_T
 #ifdef __cplusplus
 #if defined(_MSC_EXTENSIONS) && defined(_NATIVE_NULLPTR_SUPPORTED)
 namespace std {
 typedef decltype(nullptr) nullptr_t;
 }
 using ::std::nullptr_t;
 #endif
 #elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
 typedef typeof(nullptr) nullptr_t;
 #endif
 #endif
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__stddef_offsetof.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__stddef_offsetof.h
@@ -1,17 +0,0 @@
 /*===---- __stddef_offsetof.h - Definition of offsetof ---------------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 /*
 * When -fbuiltin-headers-in-system-modules is set this is a non-modular header
 * and needs to behave as if it was textual.
 */
 #if !defined(offsetof) ||                                                      \
    (__has_feature(modules) && !__building_module(_Builtin_stddef))
 #define offsetof(t, d) __builtin_offsetof(t, d)
 #endif
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__stddef_ptrdiff_t.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__stddef_ptrdiff_t.h
@@ -1,20 +0,0 @@
 /*===---- __stddef_ptrdiff_t.h - Definition of ptrdiff_t -------------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 /*
 * When -fbuiltin-headers-in-system-modules is set this is a non-modular header
 * and needs to behave as if it was textual.
 */
 #if !defined(_PTRDIFF_T) ||                                                    \
    (__has_feature(modules) && !__building_module(_Builtin_stddef))
 #define _PTRDIFF_T
 typedef __PTRDIFF_TYPE__ ptrdiff_t;
 #endif
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__stddef_rsize_t.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__stddef_rsize_t.h
@@ -1,20 +0,0 @@
 /*===---- __stddef_rsize_t.h - Definition of rsize_t -----------------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 /*
 * When -fbuiltin-headers-in-system-modules is set this is a non-modular header
 * and needs to behave as if it was textual.
 */
 #if !defined(_RSIZE_T) ||                                                      \
    (__has_feature(modules) && !__building_module(_Builtin_stddef))
 #define _RSIZE_T
 typedef __SIZE_TYPE__ rsize_t;
 #endif
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__stddef_size_t.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__stddef_size_t.h
@@ -1,20 +0,0 @@
 /*===---- __stddef_size_t.h - Definition of size_t -------------------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 /*
 * When -fbuiltin-headers-in-system-modules is set this is a non-modular header
 * and needs to behave as if it was textual.
 */
 #if !defined(_SIZE_T) ||                                                       \
    (__has_feature(modules) && !__building_module(_Builtin_stddef))
 #define _SIZE_T
 typedef __SIZE_TYPE__ size_t;
 #endif
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__stddef_unreachable.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__stddef_unreachable.h
@@ -1,21 +0,0 @@
 /*===---- __stddef_unreachable.h - Definition of unreachable ---------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef __cplusplus
 /*
 * When -fbuiltin-headers-in-system-modules is set this is a non-modular header
 * and needs to behave as if it was textual.
 */
 #if !defined(unreachable) ||                                                   \
    (__has_feature(modules) && !__building_module(_Builtin_stddef))
 #define unreachable() __builtin_unreachable()
 #endif
 #endif
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__stddef_wchar_t.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__stddef_wchar_t.h
@@ -1,28 +0,0 @@
 /*===---- __stddef_wchar.h - Definition of wchar_t -------------------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #if !defined(__cplusplus) || (defined(_MSC_VER) && !_NATIVE_WCHAR_T_DEFINED)
 /*
 * When -fbuiltin-headers-in-system-modules is set this is a non-modular header
 * and needs to behave as if it was textual.
 */
 #if !defined(_WCHAR_T) ||                                                      \
    (__has_feature(modules) && !__building_module(_Builtin_stddef))
 #define _WCHAR_T
 #ifdef _MSC_EXTENSIONS
 #define _WCHAR_T_DEFINED
 #endif
 typedef __WCHAR_TYPE__ wchar_t;
 #endif
 #endif
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__stddef_wint_t.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__stddef_wint_t.h
@@ -1,15 +0,0 @@
 /*===---- __stddef_wint.h - Definition of wint_t ---------------------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef _WINT_T
 #define _WINT_T
 typedef __WINT_TYPE__ wint_t;
 #endif
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__wmmintrin_aes.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__wmmintrin_aes.h
@@ -1,140 +0,0 @@
 /*===---- __wmmintrin_aes.h - AES intrinsics -------------------------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef __WMMINTRIN_H
 #error "Never use <__wmmintrin_aes.h> directly; include <wmmintrin.h> instead."
 #endif
 #ifndef __WMMINTRIN_AES_H
 #define __WMMINTRIN_AES_H
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("aes"), __min_vector_width__(128)))
 /// Performs a single round of AES encryption using the Equivalent
 ///    Inverse Cipher, transforming the state value from the first source
 ///    operand using a 128-bit round key value contained in the second source
 ///    operand, and writes the result to the destination.
 ///
 /// \headerfile <x86intrin.h>
 ///
 /// This intrinsic corresponds to the <c> VAESENC </c> instruction.
 ///
 /// \param __V
 ///    A 128-bit integer vector containing the state value.
 /// \param __R
 ///    A 128-bit integer vector containing the round key value.
 /// \returns A 128-bit integer vector containing the encrypted value.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_aesenc_si128(__m128i __V, __m128i __R)
 {
  return (__m128i)__builtin_ia32_aesenc128((__v2di)__V, (__v2di)__R);
 }
 /// Performs the final round of AES encryption using the Equivalent
 ///    Inverse Cipher, transforming the state value from the first source
 ///    operand using a 128-bit round key value contained in the second source
 ///    operand, and writes the result to the destination.
 ///
 /// \headerfile <x86intrin.h>
 ///
 /// This intrinsic corresponds to the <c> VAESENCLAST </c> instruction.
 ///
 /// \param __V
 ///    A 128-bit integer vector containing the state value.
 /// \param __R
 ///    A 128-bit integer vector containing the round key value.
 /// \returns A 128-bit integer vector containing the encrypted value.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_aesenclast_si128(__m128i __V, __m128i __R)
 {
  return (__m128i)__builtin_ia32_aesenclast128((__v2di)__V, (__v2di)__R);
 }
 /// Performs a single round of AES decryption using the Equivalent
 ///    Inverse Cipher, transforming the state value from the first source
 ///    operand using a 128-bit round key value contained in the second source
 ///    operand, and writes the result to the destination.
 ///
 /// \headerfile <x86intrin.h>
 ///
 /// This intrinsic corresponds to the <c> VAESDEC </c> instruction.
 ///
 /// \param __V
 ///    A 128-bit integer vector containing the state value.
 /// \param __R
 ///    A 128-bit integer vector containing the round key value.
 /// \returns A 128-bit integer vector containing the decrypted value.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_aesdec_si128(__m128i __V, __m128i __R)
 {
  return (__m128i)__builtin_ia32_aesdec128((__v2di)__V, (__v2di)__R);
 }
 /// Performs the final round of AES decryption using the Equivalent
 ///    Inverse Cipher, transforming the state value from the first source
 ///    operand using a 128-bit round key value contained in the second source
 ///    operand, and writes the result to the destination.
 ///
 /// \headerfile <x86intrin.h>
 ///
 /// This intrinsic corresponds to the <c> VAESDECLAST </c> instruction.
 ///
 /// \param __V
 ///    A 128-bit integer vector containing the state value.
 /// \param __R
 ///    A 128-bit integer vector containing the round key value.
 /// \returns A 128-bit integer vector containing the decrypted value.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_aesdeclast_si128(__m128i __V, __m128i __R)
 {
  return (__m128i)__builtin_ia32_aesdeclast128((__v2di)__V, (__v2di)__R);
 }
 /// Applies the AES InvMixColumns() transformation to an expanded key
 ///    contained in the source operand, and writes the result to the
 ///    destination.
 ///
 /// \headerfile <x86intrin.h>
 ///
 /// This intrinsic corresponds to the <c> VAESIMC </c> instruction.
 ///
 /// \param __V
 ///    A 128-bit integer vector containing the expanded key.
 /// \returns A 128-bit integer vector containing the transformed value.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_aesimc_si128(__m128i __V)
 {
  return (__m128i)__builtin_ia32_aesimc128((__v2di)__V);
 }
 /// Generates a round key for AES encryption, operating on 128-bit data
 ///    specified in the first source operand and using an 8-bit round constant
 ///    specified by the second source operand, and writes the result to the
 ///    destination.
 ///
 /// \headerfile <x86intrin.h>
 ///
 /// \code
 /// __m128i _mm_aeskeygenassist_si128(__m128i C, const int R);
 /// \endcode
 ///
 /// This intrinsic corresponds to the <c> AESKEYGENASSIST </c> instruction.
 ///
 /// \param C
 ///    A 128-bit integer vector that is used to generate the AES encryption key.
 /// \param R
 ///    An 8-bit round constant used to generate the AES encryption key.
 /// \returns A 128-bit round key for AES encryption.
 #define _mm_aeskeygenassist_si128(C, R) \
  ((__m128i)__builtin_ia32_aeskeygenassist128((__v2di)(__m128i)(C), (int)(R)))
 #undef __DEFAULT_FN_ATTRS
 #endif  /* __WMMINTRIN_AES_H */
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__wmmintrin_pclmul.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/__wmmintrin_pclmul.h
@@ -1,48 +0,0 @@
 /*===---- __wmmintrin_pclmul.h - PCMUL intrinsics ---------------------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef __WMMINTRIN_H
 #error "Never use <__wmmintrin_pclmul.h> directly; include <wmmintrin.h> instead."
 #endif
 #ifndef __WMMINTRIN_PCLMUL_H
 #define __WMMINTRIN_PCLMUL_H
 /// Multiplies two 64-bit integer values, which are selected from source
 ///    operands using the immediate-value operand. The multiplication is a
 ///    carry-less multiplication, and the 128-bit integer product is stored in
 ///    the destination.
 ///
 /// \headerfile <x86intrin.h>
 ///
 /// \code
 /// __m128i _mm_clmulepi64_si128(__m128i X, __m128i Y, const int I);
 /// \endcode
 ///
 /// This intrinsic corresponds to the <c> VPCLMULQDQ </c> instruction.
 ///
 /// \param X
 ///    A 128-bit vector of [2 x i64] containing one of the source operands.
 /// \param Y
 ///    A 128-bit vector of [2 x i64] containing one of the source operands.
 /// \param I
 ///    An immediate value specifying which 64-bit values to select from the
 ///    operands. Bit 0 is used to select a value from operand \a X, and bit
 ///    4 is used to select a value from operand \a Y: \n
 ///    Bit[0]=0 indicates that bits[63:0] of operand \a X are used. \n
 ///    Bit[0]=1 indicates that bits[127:64] of operand \a X are used. \n
 ///    Bit[4]=0 indicates that bits[63:0] of operand \a Y are used. \n
 ///    Bit[4]=1 indicates that bits[127:64] of operand \a Y are used.
 /// \returns The 128-bit integer vector containing the result of the carry-less
 ///    multiplication of the selected 64-bit values.
 #define _mm_clmulepi64_si128(X, Y, I) \
  ((__m128i)__builtin_ia32_pclmulqdq128((__v2di)(__m128i)(X), \
                                        (__v2di)(__m128i)(Y), (char)(I)))
 #endif /* __WMMINTRIN_PCLMUL_H */
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/adcintrin.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/adcintrin.h
@@ -1,165 +0,0 @@
 /*===---- adcintrin.h - ADC intrinsics -------------------------------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef __ADCINTRIN_H
 #define __ADCINTRIN_H
 #if !defined(__i386__) && !defined(__x86_64__)
 #error "This header is only meant to be used on x86 and x64 architecture"
 #endif
 /* Define the default attributes for the functions in this file. */
 #if defined(__cplusplus) && (__cplusplus >= 201103L)
 #define __DEFAULT_FN_ATTRS                                                     \
  __attribute__((__always_inline__, __nodebug__)) constexpr
 #else
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
 #endif
 /* Use C++ inline semantics in C++, GNU inline for C mode. */
 #if defined(__cplusplus)
 #define __INLINE __inline
 #else
 #define __INLINE static __inline
 #endif
 #if defined(__cplusplus)
 extern "C" {
 #endif
 /// Adds unsigned 32-bit integers \a __x and \a __y, plus 0 or 1 as indicated
 ///    by the carry flag \a __cf. Stores the unsigned 32-bit sum in the memory
 ///    at \a __p, and returns the 8-bit carry-out (carry flag).
 ///
 /// \code{.operation}
 /// temp := (__cf == 0) ? 0 : 1
 /// Store32(__p, __x + __y + temp)
 /// result := CF
 /// \endcode
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the \c ADC instruction.
 ///
 /// \param __cf
 ///    The 8-bit unsigned carry flag; any non-zero value indicates carry.
 /// \param __x
 ///    A 32-bit unsigned addend.
 /// \param __y
 ///    A 32-bit unsigned addend.
 /// \param __p
 ///    Pointer to memory for storing the sum.
 /// \returns The 8-bit unsigned carry-out value.
 __INLINE unsigned char __DEFAULT_FN_ATTRS _addcarry_u32(unsigned char __cf,
                                                        unsigned int __x,
                                                        unsigned int __y,
                                                        unsigned int *__p) {
  return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p);
 }
 /// Adds unsigned 32-bit integer \a __y to 0 or 1 as indicated by the carry
 ///    flag \a __cf, and subtracts the result from unsigned 32-bit integer
 ///    \a __x. Stores the unsigned 32-bit difference in the memory at \a __p,
 ///    and returns the 8-bit carry-out (carry or overflow flag).
 ///
 /// \code{.operation}
 /// temp := (__cf == 0) ? 0 : 1
 /// Store32(__p, __x - (__y + temp))
 /// result := CF
 /// \endcode
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the \c SBB instruction.
 ///
 /// \param __cf
 ///    The 8-bit unsigned carry flag; any non-zero value indicates carry.
 /// \param __x
 ///    The 32-bit unsigned minuend.
 /// \param __y
 ///    The 32-bit unsigned subtrahend.
 /// \param __p
 ///    Pointer to memory for storing the difference.
 /// \returns The 8-bit unsigned carry-out value.
 __INLINE unsigned char __DEFAULT_FN_ATTRS _subborrow_u32(unsigned char __cf,
                                                         unsigned int __x,
                                                         unsigned int __y,
                                                         unsigned int *__p) {
  return __builtin_ia32_subborrow_u32(__cf, __x, __y, __p);
 }
 #ifdef __x86_64__
 /// Adds unsigned 64-bit integers \a __x and \a __y, plus 0 or 1 as indicated
 ///    by the carry flag \a __cf. Stores the unsigned 64-bit sum in the memory
 ///    at \a __p, and returns the 8-bit carry-out (carry flag).
 ///
 /// \code{.operation}
 /// temp := (__cf == 0) ? 0 : 1
 /// Store64(__p, __x + __y + temp)
 /// result := CF
 /// \endcode
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the \c ADC instruction.
 ///
 /// \param __cf
 ///    The 8-bit unsigned carry flag; any non-zero value indicates carry.
 /// \param __x
 ///    A 64-bit unsigned addend.
 /// \param __y
 ///    A 64-bit unsigned addend.
 /// \param __p
 ///    Pointer to memory for storing the sum.
 /// \returns The 8-bit unsigned carry-out value.
 __INLINE unsigned char __DEFAULT_FN_ATTRS
 _addcarry_u64(unsigned char __cf, unsigned long long __x,
              unsigned long long __y, unsigned long long *__p) {
  return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p);
 }
 /// Adds unsigned 64-bit integer \a __y to 0 or 1 as indicated by the carry
 ///    flag \a __cf, and subtracts the result from unsigned 64-bit integer
 ///    \a __x. Stores the unsigned 64-bit difference in the memory at \a __p,
 ///    and returns the 8-bit carry-out (carry or overflow flag).
 ///
 /// \code{.operation}
 /// temp := (__cf == 0) ? 0 : 1
 /// Store64(__p, __x - (__y + temp))
 /// result := CF
 /// \endcode
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the \c ADC instruction.
 ///
 /// \param __cf
 ///    The 8-bit unsigned carry flag; any non-zero value indicates carry.
 /// \param __x
 ///    The 64-bit unsigned minuend.
 /// \param __y
 ///    The 64-bit unsigned subtrahend.
 /// \param __p
 ///    Pointer to memory for storing the difference.
 /// \returns The 8-bit unsigned carry-out value.
 __INLINE unsigned char __DEFAULT_FN_ATTRS
 _subborrow_u64(unsigned char __cf, unsigned long long __x,
               unsigned long long __y, unsigned long long *__p) {
  return __builtin_ia32_subborrow_u64(__cf, __x, __y, __p);
 }
 #endif
 #if defined(__cplusplus)
 }
 #endif
 #undef __INLINE
 #undef __DEFAULT_FN_ATTRS
 #endif /* __ADCINTRIN_H */
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/adxintrin.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/adxintrin.h
@@ -1,107 +0,0 @@
 /*===---- adxintrin.h - ADX intrinsics -------------------------------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef __IMMINTRIN_H
 #error "Never use <adxintrin.h> directly; include <immintrin.h> instead."
 #endif
 #ifndef __ADXINTRIN_H
 #define __ADXINTRIN_H
 /* Define the default attributes for the functions in this file. */
 #if defined(__cplusplus) && (__cplusplus >= 201103L)
 #define __DEFAULT_FN_ATTRS                                                     \
  __attribute__((__always_inline__, __nodebug__, __target__("adx"))) constexpr
 #else
 #define __DEFAULT_FN_ATTRS                                                     \
  __attribute__((__always_inline__, __nodebug__, __target__("adx")))
 #endif
 /* Use C++ inline semantics in C++, GNU inline for C mode. */
 #if defined(__cplusplus)
 #define __INLINE __inline
 #else
 #define __INLINE static __inline
 #endif
 #if defined(__cplusplus)
 extern "C" {
 #endif
 /* Intrinsics that are available only if __ADX__ is defined. */
 /// Adds unsigned 32-bit integers \a __x and \a __y, plus 0 or 1 as indicated
 ///    by the carry flag \a __cf. Stores the unsigned 32-bit sum in the memory
 ///    at \a __p, and returns the 8-bit carry-out (carry flag).
 ///
 /// \code{.operation}
 /// temp := (__cf == 0) ? 0 : 1
 /// Store32(__p, __x + __y + temp)
 /// result := CF
 /// \endcode
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the \c ADCX instruction.
 ///
 /// \param __cf
 ///    The 8-bit unsigned carry flag; any non-zero value indicates carry.
 /// \param __x
 ///    A 32-bit unsigned addend.
 /// \param __y
 ///    A 32-bit unsigned addend.
 /// \param __p
 ///    Pointer to memory for storing the sum.
 /// \returns The 8-bit unsigned carry-out value.
 __INLINE unsigned char __DEFAULT_FN_ATTRS _addcarryx_u32(unsigned char __cf,
                                                         unsigned int __x,
                                                         unsigned int __y,
                                                         unsigned int *__p) {
  return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p);
 }
 #ifdef __x86_64__
 /// Adds unsigned 64-bit integers \a __x and \a __y, plus 0 or 1 as indicated
 ///    by the carry flag \a __cf. Stores the unsigned 64-bit sum in the memory
 ///    at \a __p, and returns the 8-bit carry-out (carry flag).
 ///
 /// \code{.operation}
 /// temp := (__cf == 0) ? 0 : 1
 /// Store64(__p, __x + __y + temp)
 /// result := CF
 /// \endcode
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the \c ADCX instruction.
 ///
 /// \param __cf
 ///    The 8-bit unsigned carry flag; any non-zero value indicates carry.
 /// \param __x
 ///    A 64-bit unsigned addend.
 /// \param __y
 ///    A 64-bit unsigned addend.
 /// \param __p
 ///    Pointer to memory for storing the sum.
 /// \returns The 8-bit unsigned carry-out value.
 __INLINE unsigned char __DEFAULT_FN_ATTRS
 _addcarryx_u64(unsigned char __cf, unsigned long long __x,
               unsigned long long __y, unsigned long long *__p) {
  return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p);
 }
 #endif
 #if defined(__cplusplus)
 }
 #endif
 #undef __INLINE
 #undef __DEFAULT_FN_ATTRS
 #endif /* __ADXINTRIN_H */
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/altivec.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/altivec.h
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/amdgpuintrin.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/amdgpuintrin.h
@@ -1,191 +0,0 @@
 //===-- amdgpuintrin.h - AMDPGU intrinsic functions -----------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 #ifndef __AMDGPUINTRIN_H
 #define __AMDGPUINTRIN_H
 #ifndef __AMDGPU__
 #error "This file is intended for AMDGPU targets or offloading to AMDGPU"
 #endif
 #ifndef __GPUINTRIN_H
 #error "Never use <amdgpuintrin.h> directly; include <gpuintrin.h> instead"
 #endif
 _Pragma("omp begin declare target device_type(nohost)");
 _Pragma("omp begin declare variant match(device = {arch(amdgcn)})");
 // Type aliases to the address spaces used by the AMDGPU backend.
 #define __gpu_private __attribute__((address_space(5)))
 #define __gpu_constant __attribute__((address_space(4)))
 #define __gpu_local __attribute__((address_space(3)))
 #define __gpu_global __attribute__((address_space(1)))
 #define __gpu_generic __attribute__((address_space(0)))
 // Attribute to declare a function as a kernel.
 #define __gpu_kernel __attribute__((amdgpu_kernel, visibility("protected")))
 // Returns the number of workgroups in the 'x' dimension of the grid.
 _DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_x(void) {
  return __builtin_amdgcn_grid_size_x() / __builtin_amdgcn_workgroup_size_x();
 }
 // Returns the number of workgroups in the 'y' dimension of the grid.
 _DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_y(void) {
  return __builtin_amdgcn_grid_size_y() / __builtin_amdgcn_workgroup_size_y();
 }
 // Returns the number of workgroups in the 'z' dimension of the grid.
 _DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_z(void) {
  return __builtin_amdgcn_grid_size_z() / __builtin_amdgcn_workgroup_size_z();
 }
 // Returns the 'x' dimension of the current AMD workgroup's id.
 _DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_block_id_x(void) {
  return __builtin_amdgcn_workgroup_id_x();
 }
 // Returns the 'y' dimension of the current AMD workgroup's id.
 _DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_block_id_y(void) {
  return __builtin_amdgcn_workgroup_id_y();
 }
 // Returns the 'z' dimension of the current AMD workgroup's id.
 _DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_block_id_z(void) {
  return __builtin_amdgcn_workgroup_id_z();
 }
 // Returns the number of workitems in the 'x' dimension.
 _DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_threads_x(void) {
  return __builtin_amdgcn_workgroup_size_x();
 }
 // Returns the number of workitems in the 'y' dimension.
 _DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_threads_y(void) {
  return __builtin_amdgcn_workgroup_size_y();
 }
 // Returns the number of workitems in the 'z' dimension.
 _DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_threads_z(void) {
  return __builtin_amdgcn_workgroup_size_z();
 }
 // Returns the 'x' dimension id of the workitem in the current AMD workgroup.
 _DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_thread_id_x(void) {
  return __builtin_amdgcn_workitem_id_x();
 }
 // Returns the 'y' dimension id of the workitem in the current AMD workgroup.
 _DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_thread_id_y(void) {
  return __builtin_amdgcn_workitem_id_y();
 }
 // Returns the 'z' dimension id of the workitem in the current AMD workgroup.
 _DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_thread_id_z(void) {
  return __builtin_amdgcn_workitem_id_z();
 }
 // Returns the size of an AMD wavefront, either 32 or 64 depending on hardware
 // and compilation options.
 _DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_lanes(void) {
  return __builtin_amdgcn_wavefrontsize();
 }
 // Returns the id of the thread inside of an AMD wavefront executing together.
 _DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_lane_id(void) {
  return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
 }
 // Returns the bit-mask of active threads in the current wavefront.
 _DEFAULT_FN_ATTRS static __inline__ uint64_t __gpu_lane_mask(void) {
  return __builtin_amdgcn_read_exec();
 }
 // Copies the value from the first active thread in the wavefront to the rest.
 _DEFAULT_FN_ATTRS static __inline__ uint32_t
 __gpu_read_first_lane_u32(uint64_t __lane_mask, uint32_t __x) {
  return __builtin_amdgcn_readfirstlane(__x);
 }
 // Returns a bitmask of threads in the current lane for which \p x is true.
 _DEFAULT_FN_ATTRS static __inline__ uint64_t __gpu_ballot(uint64_t __lane_mask,
                                                          bool __x) {
  // The lane_mask & gives the nvptx semantics when lane_mask is a subset of
  // the active threads
  return __lane_mask & __builtin_amdgcn_ballot_w64(__x);
 }
 // Waits for all the threads in the block to converge and issues a fence.
 _DEFAULT_FN_ATTRS static __inline__ void __gpu_sync_threads(void) {
  __builtin_amdgcn_s_barrier();
  __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup");
 }
 // Wait for all threads in the wavefront to converge, this is a noop on AMDGPU.
 _DEFAULT_FN_ATTRS static __inline__ void __gpu_sync_lane(uint64_t __lane_mask) {
  __builtin_amdgcn_wave_barrier();
 }
 // Shuffles the the lanes inside the wavefront according to the given index.
 _DEFAULT_FN_ATTRS static __inline__ uint32_t
 __gpu_shuffle_idx_u32(uint64_t __lane_mask, uint32_t __idx, uint32_t __x,
                      uint32_t __width) {
  uint32_t __lane = __idx + (__gpu_lane_id() & ~(__width - 1));
  return __builtin_amdgcn_ds_bpermute(__lane << 2, __x);
 }
 // Returns a bitmask marking all lanes that have the same value of __x.
 _DEFAULT_FN_ATTRS static __inline__ uint64_t
 __gpu_match_any_u32(uint64_t __lane_mask, uint32_t __x) {
  return __gpu_match_any_u32_impl(__lane_mask, __x);
 }
 // Returns a bitmask marking all lanes that have the same value of __x.
 _DEFAULT_FN_ATTRS static __inline__ uint64_t
 __gpu_match_any_u64(uint64_t __lane_mask, uint64_t __x) {
  return __gpu_match_any_u64_impl(__lane_mask, __x);
 }
 // Returns the current lane mask if every lane contains __x.
 _DEFAULT_FN_ATTRS static __inline__ uint64_t
 __gpu_match_all_u32(uint64_t __lane_mask, uint32_t __x) {
  return __gpu_match_all_u32_impl(__lane_mask, __x);
 }
 // Returns the current lane mask if every lane contains __x.
 _DEFAULT_FN_ATTRS static __inline__ uint64_t
 __gpu_match_all_u64(uint64_t __lane_mask, uint64_t __x) {
  return __gpu_match_all_u64_impl(__lane_mask, __x);
 }
 // Returns true if the flat pointer points to AMDGPU 'shared' memory.
 _DEFAULT_FN_ATTRS static __inline__ bool __gpu_is_ptr_local(void *ptr) {
  return __builtin_amdgcn_is_shared((void [[clang::address_space(0)]] *)((
      void [[clang::opencl_generic]] *)ptr));
 }
 // Returns true if the flat pointer points to AMDGPU 'private' memory.
 _DEFAULT_FN_ATTRS static __inline__ bool __gpu_is_ptr_private(void *ptr) {
  return __builtin_amdgcn_is_private((void [[clang::address_space(0)]] *)((
      void [[clang::opencl_generic]] *)ptr));
 }
 // Terminates execution of the associated wavefront.
 _DEFAULT_FN_ATTRS [[noreturn]] static __inline__ void __gpu_exit(void) {
  __builtin_amdgcn_endpgm();
 }
 // Suspend the thread briefly to assist the scheduler during busy loops.
 _DEFAULT_FN_ATTRS static __inline__ void __gpu_thread_suspend(void) {
  __builtin_amdgcn_s_sleep(2);
 }
 _Pragma("omp end declare variant");
 _Pragma("omp end declare target");
 #endif // __AMDGPUINTRIN_H
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/ammintrin.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/ammintrin.h
@@ -1,183 +0,0 @@
 /*===---- ammintrin.h - SSE4a intrinsics -----------------------------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef __AMMINTRIN_H
 #define __AMMINTRIN_H
 #if !defined(__i386__) && !defined(__x86_64__)
 #error "This header is only meant to be used on x86 and x64 architecture"
 #endif
 #include <pmmintrin.h>
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a"), __min_vector_width__(128)))
 /// Extracts the specified bits from the lower 64 bits of the 128-bit
 ///    integer vector operand at the index \a idx and of the length \a len.
 ///
 /// \headerfile <x86intrin.h>
 ///
 /// \code
 /// __m128i _mm_extracti_si64(__m128i x, const int len, const int idx);
 /// \endcode
 ///
 /// This intrinsic corresponds to the <c> EXTRQ </c> instruction.
 ///
 /// \param x
 ///    The value from which bits are extracted.
 /// \param len
 ///    Bits [5:0] specify the length; the other bits are ignored. If bits [5:0]
 ///    are zero, the length is interpreted as 64.
 /// \param idx
 ///    Bits [5:0] specify the index of the least significant bit; the other
 ///    bits are ignored. If the sum of the index and length is greater than 64,
 ///    the result is undefined. If the length and index are both zero, bits
 ///    [63:0] of parameter \a x are extracted. If the length is zero but the
 ///    index is non-zero, the result is undefined.
 /// \returns A 128-bit integer vector whose lower 64 bits contain the bits
 ///    extracted from the source operand.
 #define _mm_extracti_si64(x, len, idx) \
  ((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(x), \
                                  (char)(len), (char)(idx)))
 /// Extracts the specified bits from the lower 64 bits of the 128-bit
 ///    integer vector operand at the index and of the length specified by
 ///    \a __y.
 ///
 /// \headerfile <x86intrin.h>
 ///
 /// This intrinsic corresponds to the <c> EXTRQ </c> instruction.
 ///
 /// \param __x
 ///    The value from which bits are extracted.
 /// \param __y
 ///    Specifies the index of the least significant bit at [13:8] and the
 ///    length at [5:0]; all other bits are ignored. If bits [5:0] are zero, the
 ///    length is interpreted as 64. If the sum of the index and length is
 ///    greater than 64, the result is undefined. If the length and index are
 ///    both zero, bits [63:0] of parameter \a __x are extracted. If the length
 ///    is zero but the index is non-zero, the result is undefined.
 /// \returns A 128-bit vector whose lower 64 bits contain the bits extracted
 ///    from the source operand.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_extract_si64(__m128i __x, __m128i __y)
 {
  return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y);
 }
 /// Inserts bits of a specified length from the source integer vector
 ///    \a y into the lower 64 bits of the destination integer vector \a x at
 ///    the index \a idx and of the length \a len.
 ///
 /// \headerfile <x86intrin.h>
 ///
 /// \code
 /// __m128i _mm_inserti_si64(__m128i x, __m128i y, const int len,
 /// const int idx);
 /// \endcode
 ///
 /// This intrinsic corresponds to the <c> INSERTQ </c> instruction.
 ///
 /// \param x
 ///    The destination operand where bits will be inserted. The inserted bits
 ///    are defined by the length \a len and by the index \a idx specifying the
 ///    least significant bit.
 /// \param y
 ///    The source operand containing the bits to be extracted. The extracted
 ///    bits are the least significant bits of operand \a y of length \a len.
 /// \param len
 ///    Bits [5:0] specify the length; the other bits are ignored. If bits [5:0]
 ///    are zero, the length is interpreted as 64.
 /// \param idx
 ///    Bits [5:0] specify the index of the least significant bit; the other
 ///    bits are ignored. If the sum of the index and length is greater than 64,
 ///    the result is undefined. If the length and index are both zero, bits
 ///    [63:0] of parameter \a y are inserted into parameter \a x. If the length
 ///    is zero but the index is non-zero, the result is undefined.
 /// \returns A 128-bit integer vector containing the original lower 64-bits of
 ///    destination operand \a x with the specified bitfields replaced by the
 ///    lower bits of source operand \a y. The upper 64 bits of the return value
 ///    are undefined.
 #define _mm_inserti_si64(x, y, len, idx) \
  ((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(x), \
                                    (__v2di)(__m128i)(y), \
                                    (char)(len), (char)(idx)))
 /// Inserts bits of a specified length from the source integer vector
 ///    \a __y into the lower 64 bits of the destination integer vector \a __x
 ///    at the index and of the length specified by \a __y.
 ///
 /// \headerfile <x86intrin.h>
 ///
 /// This intrinsic corresponds to the <c> INSERTQ </c> instruction.
 ///
 /// \param __x
 ///    The destination operand where bits will be inserted. The inserted bits
 ///    are defined by the length and by the index of the least significant bit
 ///    specified by operand \a __y.
 /// \param __y
 ///    The source operand containing the bits to be extracted. The extracted
 ///    bits are the least significant bits of operand \a __y with length
 ///    specified by bits [69:64]. These are inserted into the destination at the
 ///    index specified by bits [77:72]; all other bits are ignored. If bits
 ///    [69:64] are zero, the length is interpreted as 64. If the sum of the
 ///    index and length is greater than 64, the result is undefined. If the
 ///    length and index are both zero, bits [63:0] of parameter \a __y are
 ///    inserted into parameter \a __x. If the length is zero but the index is
 ///    non-zero, the result is undefined.
 /// \returns A 128-bit integer vector containing the original lower 64-bits of
 ///    destination operand \a __x with the specified bitfields replaced by the
 ///    lower bits of source operand \a __y. The upper 64 bits of the return
 ///    value are undefined.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_insert_si64(__m128i __x, __m128i __y)
 {
  return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y);
 }
 /// Stores a 64-bit double-precision value in a 64-bit memory location.
 ///    To minimize caching, the data is flagged as non-temporal (unlikely to be
 ///    used again soon).
 ///
 /// \headerfile <x86intrin.h>
 ///
 /// This intrinsic corresponds to the <c> MOVNTSD </c> instruction.
 ///
 /// \param __p
 ///    The 64-bit memory location used to store the register value.
 /// \param __a
 ///    The 64-bit double-precision floating-point register value to be stored.
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_stream_sd(void *__p, __m128d __a)
 {
  __builtin_ia32_movntsd((double *)__p, (__v2df)__a);
 }
 /// Stores a 32-bit single-precision floating-point value in a 32-bit
 ///    memory location. To minimize caching, the data is flagged as
 ///    non-temporal (unlikely to be used again soon).
 ///
 /// \headerfile <x86intrin.h>
 ///
 /// This intrinsic corresponds to the <c> MOVNTSS </c> instruction.
 ///
 /// \param __p
 ///    The 32-bit memory location used to store the register value.
 /// \param __a
 ///    The 32-bit single-precision floating-point register value to be stored.
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_stream_ss(void *__p, __m128 __a)
 {
  __builtin_ia32_movntss((float *)__p, (__v4sf)__a);
 }
 #undef __DEFAULT_FN_ATTRS
 #endif /* __AMMINTRIN_H */
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/amxavx512intrin.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/amxavx512intrin.h
@@ -1,382 +0,0 @@
 /*===--------------------- amxavx512intrin.h - AMXAVX512 --------------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===------------------------------------------------------------------------===
 */
 #ifndef __IMMINTRIN_H
 #error "Never use <amxavx512intrin.h> directly; include <immintrin.h> instead."
 #endif // __IMMINTRIN_H
 #ifndef __AMX_AVX512INTRIN_H
 #define __AMX_AVX512INTRIN_H
 #if defined(__x86_64__) && defined(__SSE2__)
 #define __DEFAULT_FN_ATTRS_AVX512                                              \
  __attribute__((__always_inline__, __nodebug__,                               \
                 __target__("amx-avx512,avx10.2-512")))
 /// Moves a row from a tile register to a zmm destination register, converting
 ///    the int32 source elements to fp32. The row of the tile is selected by a
 ///    32b GPR.
 ///
 /// \headerfile <x86intrin.h>
 ///
 /// \code
 /// __m512i _tile_cvtrowd2ps(__tile tsrc, unsigned int row);
 /// \endcode
 ///
 /// \code{.operation}
 /// VL := 512
 /// VL_bytes := VL >> 3
 /// row_index := row & 0xffff
 /// row_chunk := ((row >> 16) & 0xffff) * VL_bytes
 /// FOR i := 0 TO (VL_bytes / 4) - 1
 ///     IF i + row_chunk / 4 >= tsrc.colsb / 4
 ///         dst.dword[i] := 0
 ///     ELSE
 ///         dst.f32[i] := CONVERT_INT32_TO_FP32(tsrc.row[row_index].dword[row_chunk/4+i], RNE)
 ///     FI
 /// ENDFOR
 /// dst[MAX_VL-1:VL] := 0
 /// zero_tileconfig_start()
 /// \endcode
 ///
 /// This intrinsic corresponds to the \c TCVTROWD2PS instruction.
 ///
 /// \param tsrc
 ///    The source tile. Max size is 1024 Bytes.
 /// \param row
 ///    The row of the source tile
 #define _tile_cvtrowd2ps(tsrc, row) __builtin_ia32_tcvtrowd2ps(tsrc, row)
 /// Moves a row from a tile register to a zmm destination register, converting
 ///    the fp32 source elements to bf16. It places the resulting bf16 elements
 ///    in the high 16 bits within each dword. The row of the tile is selected
 ///    by a 32b GPR.
 ///
 /// \headerfile <x86intrin.h>
 ///
 /// \code
 /// __m512i _tile_cvtrowps2bf16h(__tile tsrc, unsigned int row);
 /// \endcode
 ///
 /// \code{.operation}
 /// VL := 512
 /// VL_bytes := VL >> 3
 /// row_index := row & 0xffff
 /// row_chunk := ((row >> 16) & 0xffff) * VL_bytes
 /// FOR i := 0 TO (VL_bytes / 4) - 1
 ///     IF i + row_chunk / 4 >= tsrc.colsb / 4
 ///         dst.dword[i] := 0
 ///     ELSE
 ///         dst.word[2*i+0] := 0
 ///         dst.bf16[2*i+1] := CONVERT_FP32_TO_BF16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
 ///     FI
 /// ENDFOR
 /// dst[MAX_VL-1:VL] := 0
 /// zero_tileconfig_start()
 /// \endcode
 ///
 /// This intrinsic corresponds to the \c TCVTROWPS2BF16H instruction.
 ///
 /// \param tsrc
 ///    The source tile. Max size is 1024 Bytes.
 /// \param row
 ///    The the row of the source tile.
 #define _tile_cvtrowps2bf16h(tsrc, row)                                        \
  __builtin_ia32_tcvtrowps2bf16h(tsrc, row)
 /// Moves a row from a tile register to a zmm destination register, converting
 ///    the fp32 source elements to bf16. It places the resulting bf16 elements
 ///    in the low 16 bits within each dword. The row of the tile is selected
 ///    by a 32b GPR.
 ///
 /// \headerfile <x86intrin.h>
 ///
 /// \code
 /// __m512i _tile_cvtrowps2bf16l(__tile tsrc, unsigned int row);
 /// \endcode
 ///
 /// \code{.operation}
 /// VL := 512
 /// VL_bytes := VL >> 3
 /// row_index := row & 0xffff
 /// row_chunk := ((row >> 16) & 0xffff) * VL_bytes
 /// FOR i := 0 TO (VL_bytes / 4) - 1
 ///     IF i + row_chunk / 4 >= tsrc.colsb / 4
 ///         dst.dword[i] := 0
 ///     ELSE
 ///         dst.word[2*i+1] := 0
 ///         dst.bf16[2*i+0] := CONVERT_FP32_TO_BF16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
 ///     FI
 /// ENDFOR
 /// dst[MAX_VL-1:VL] := 0
 /// zero_tileconfig_start()
 /// \endcode
 ///
 /// This intrinsic corresponds to the \c TCVTROWPS2BF16L instruction.
 ///
 /// \param tsrc
 ///    The source tile. Max size is 1024 Bytes.
 /// \param row
 ///    The the row of the source tile.
 #define _tile_cvtrowps2bf16l(tsrc, row)                                        \
  __builtin_ia32_tcvtrowps2bf16l(tsrc, row)
 /// Moves a row from a tile register to a zmm destination register, converting
 ///    the fp32 source elements to fp16. It places the resulting fp16 elements
 ///    in the high 16 bits within each dword. The row of the tile is selected
 ///    by a 32b GPR.
 ///
 /// \headerfile <x86intrin.h>
 ///
 /// \code
 /// __m512i _tile_cvtrowps2phh(__tile tsrc, unsigned int row);
 /// \endcode
 ///
 /// \code{.operation}
 /// VL := 512
 /// VL_bytes := VL >> 3
 /// row_index := row & 0xffff
 /// row_chunk := ((row >> 16) & 0xffff) * VL_bytes
 /// FOR i := 0 TO (VL_bytes / 4) - 1
 ///     IF i + row_chunk / 4 >= tsrc.colsb / 4
 ///         dst.dword[i] := 0
 ///     ELSE
 ///         dst.word[2*i+0] := 0
 ///         dst.fp16[2*i+1] := CONVERT_FP32_TO_FP16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
 ///     FI
 /// ENDFOR
 /// dst[MAX_VL-1:VL] := 0
 /// zero_tileconfig_start()
 /// \endcode
 ///
 /// This intrinsic corresponds to the \c TCVTROWPS2PHH instruction.
 ///
 /// \param tsrc
 ///    The source tile. Max size is 1024 Bytes.
 /// \param row
 ///    The the row of the source tile.
 #define _tile_cvtrowps2phh(tsrc, row) __builtin_ia32_tcvtrowps2phh(tsrc, row)
 /// Moves a row from a tile register to a zmm destination register, converting
 ///    the fp32 source elements to fp16. It places the resulting fp16 elements
 ///    in the low 16 bits within each dword. The row of the tile is selected
 ///    by a 32b GPR.
 ///
 /// \headerfile <x86intrin.h>
 ///
 /// \code
 /// __m512i _tile_cvtrowps2phl(__tile tsrc, unsigned int row);
 /// \endcode
 ///
 /// \code{.operation}
 /// VL := 512
 /// VL_bytes := VL >> 3
 /// row_index := row & 0xffff
 /// row_chunk := ((row >> 16) & 0xffff) * VL_bytes
 /// FOR i := 0 TO (VL_bytes / 4) - 1
 ///     IF i + row_chunk / 4 >= tsrc.colsb / 4
 ///         dst.dword[i] := 0
 ///     ELSE
 ///         dst.word[2*i+1] := 0
 ///         dst.fp16[2*i+0] := CONVERT_FP32_TO_FP16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
 ///     FI
 /// ENDFOR
 /// dst[MAX_VL-1:VL] := 0
 /// zero_tileconfig_start()
 /// \endcode
 ///
 /// This intrinsic corresponds to the \c TCVTROWPS2PHL instruction.
 ///
 /// \param tsrc
 ///    The source tile. Max size is 1024 Bytes.
 /// \param row
 ///    The the row of the source tile.
 #define _tile_cvtrowps2phl(tsrc, row) __builtin_ia32_tcvtrowps2phl(tsrc, row)
 /// Move one row of a tile data to a v16f32 data.
 /// The row of the tile is selected by a 32b GPR.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// \code
 /// __m512 _tile_movrow(__tile a, unsigned b);
 /// \endcode
 ///
 /// This intrinsic corresponds to the <c> TILEMOVROW </c> instruction.
 ///
 /// \param a
 ///     The 1st source tile. Max size is 1024 Bytes.
 /// \param b
 ///     The 2nd source r32. Size is 4 Bytes.
 /// \returns
 ///     The destination v16f32 data. Size is 64 Bytes.
 ///
 /// \code{.operation}
 /// VL := 512
 /// VL_bytes := VL>>3
 /// row_index := b&0xffff
 /// row_chunk := ((b>>16)&0xffff) * VL_bytes
 /// FOR i := 0 TO (VL_bytes-1)
 ///     IF (row_chunk + i >= a.colsb)
 ///             dst.byte[i] := 0
 ///     ELSE
 ///             dst.byte[i] := a.row[row_index].byte[row_chunk+i]
 /// ENDFOR
 /// \endcode
 #define _tile_movrow(a, b) ((__m512i)__builtin_ia32_tilemovrow(a, b))
 /// This is internal intrinsic. C/C++ user should avoid calling it directly.
 static __inline__ __m512 __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowd2ps_internal(
    unsigned short m, unsigned short n, _tile1024i src, unsigned u) {
  return __builtin_ia32_tcvtrowd2ps_internal(m, n, src, u);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512
 _tile_cvtrowps2bf16h_internal(unsigned short m, unsigned short n,
                              _tile1024i src, unsigned u) {
  return __builtin_ia32_tcvtrowps2bf16h_internal(m, n, src, u);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512
 _tile_cvtrowps2bf16l_internal(unsigned short m, unsigned short n,
                              _tile1024i src, unsigned u) {
  return __builtin_ia32_tcvtrowps2bf16l_internal(m, n, src, u);
 }
 static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowps2phh_internal(
    unsigned short m, unsigned short n, _tile1024i src, unsigned u) {
  return __builtin_ia32_tcvtrowps2phh_internal(m, n, src, u);
 }
 static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowps2phl_internal(
    unsigned short m, unsigned short n, _tile1024i src, unsigned u) {
  return __builtin_ia32_tcvtrowps2phl_internal(m, n, src, u);
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS_AVX512 _tile_movrow_internal(
    unsigned short m, unsigned short n, _tile1024i src, unsigned u) {
  return (__m512i)__builtin_ia32_tilemovrow_internal(m, n, src, u);
 }
 /// Move a row from a tile (src0) to a v16f32 dst, converting the int32 source
 /// elements to fp32. No SIMD exceptions are generated. Rounding is done as if
 /// MXCSR.RC=RNE. Embedded rounding is not supported.
 /// The row and chunk elements of tile is fetched from 32bit src1.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TCVTROWD2PS </c> instruction.
 ///
 /// \param src0
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param src1
 ///    The 2nd source r32. Size is 4 Bytes.
 /// \returns
 ///    The destination v16f32 data. Size is 64 Bytes.
 __DEFAULT_FN_ATTRS_AVX512
 static __m512 __tile_cvtrowd2ps(__tile1024i src0, unsigned src1) {
  return _tile_cvtrowd2ps_internal(src0.row, src0.col, src0.tile, src1);
 }
 /// Move a row from a tile (src0) to a v32bf16 dst, converting the fp32 source
 /// elements to bf16 at high 16-bits of each dword.
 /// The row and chunk elements of tile is fetched from 32bit src1.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TCVTROWPS2BF16H </c> instruction.
 ///
 /// \param src0
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param src1
 ///    The 2nd source r32. Size is 4 Bytes.
 /// \returns
 ///    The destination v32bf16 data. Size is 64 Bytes.
 __DEFAULT_FN_ATTRS_AVX512
 static __m512bh __tile_cvtrowps2bf16h(__tile1024i src0, unsigned src1) {
  return _tile_cvtrowps2bf16h_internal(src0.row, src0.col, src0.tile, src1);
 }
 /// Move a row from a tile (src0) to a v32bf16 dst, converting the fp32 source
 /// elements to bf16 at low 16-bits of each dword.
 /// The row and chunk elements of tile is fetched from 32bit src1.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TCVTROWPS2BF16L </c> instruction.
 ///
 /// \param src0
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param src1
 ///    The 2nd source r32. Size is 4 Bytes.
 /// \returns
 ///    The destination v32bf16 data. Size is 64 Bytes.
 __DEFAULT_FN_ATTRS_AVX512
 static __m512bh __tile_cvtrowps2bf16l(__tile1024i src0, unsigned src1) {
  return _tile_cvtrowps2bf16l_internal(src0.row, src0.col, src0.tile, src1);
 }
 /// Move a row from a tile (src0) to a v32fp16 dst, converting the fp32 source
 /// elements to fp16 at high 16-bits of each dword.
 /// The row and chunk elements of tile is fetched from 32bit src1.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TCVTROWPS2PHH </c> instruction.
 ///
 /// \param src0
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param src1
 ///    The 2nd source r32. Size is 4 Bytes.
 /// \returns
 ///    The destination v32fp16 data. Size is 64 Bytes.
 __DEFAULT_FN_ATTRS_AVX512
 static __m512h __tile_cvtrowps2phh(__tile1024i src0, unsigned src1) {
  return _tile_cvtrowps2phh_internal(src0.row, src0.col, src0.tile, src1);
 }
 /// Move a row from a tile (src0) to a v32fp16 dst, converting the fp32 source
 /// elements to fp16 at low 16-bits of each dword.
 /// The row and chunk elements of tile is fetched from 32bit src1.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TCVTROWPS2PHL </c> instruction.
 ///
 /// \param src0
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param src1
 ///    The 2nd source r32. Size is 4 Bytes.
 /// \returns
 ///    The destination v32fp16 data. Size is 64 Bytes.
 __DEFAULT_FN_ATTRS_AVX512
 static __m512h __tile_cvtrowps2phl(__tile1024i src0, unsigned src1) {
  return _tile_cvtrowps2phl_internal(src0.row, src0.col, src0.tile, src1);
 }
 /// Move one row of a tile data to a v16f32 data.
 /// The row of the tile is selected by a 32b GPR.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TILEMOVROW </c> instruction.
 ///
 /// \param src0
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param src1
 ///    The 2nd source r32. Size is 4 Bytes.
 /// \returns
 ///    The destination v16i32 data. Size is 64 Bytes.
 __DEFAULT_FN_ATTRS_AVX512
 static __m512i __tile_movrow(__tile1024i src0, unsigned src1) {
  return (__m512i)_tile_movrow_internal(src0.row, src0.col, src0.tile, src1);
 }
 #endif // __x86_64__ && __SSE2__
 #endif // __AMX_AVX512INTRIN_H
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/amxbf16transposeintrin.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/amxbf16transposeintrin.h
@@ -1,94 +0,0 @@
 /*===----- amxbf16transposeintrin.h - AMX-BF16 and AMX-TRANSPOSE ------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===------------------------------------------------------------------------===
 */
 #ifndef __IMMINTRIN_H
 #error                                                                         \
    "Never use <amxbf16transposeintrin.h> directly; use <immintrin.h> instead."
 #endif /* __IMMINTRIN_H */
 #ifndef __AMX_BF16TRANSPOSEINTRIN_H
 #define __AMX_BF16TRANSPOSEINTRIN_H
 #ifdef __x86_64__
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS                                                     \
  __attribute__((__always_inline__, __nodebug__,                               \
                 __target__("amx-bf16,amx-transpose")))
 /// Compute transpose and dot-product of BF16 (16-bit) floating-point pairs in
 ///    tiles \a a and \a b, accumulating the intermediate single-precision
 ///    (32-bit) floating-point elements with elements in \a dst, and store the
 ///    32-bit result back to tile \a dst.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// \code
 /// void _tile_tdpbf16ps (__tile dst, __tile a, __tile b)
 /// \endcode
 ///
 /// \code{.operation}
 /// FOR m := 0 TO dst.rows - 1
 ///	tmp := dst.row[m]
 ///	FOR k := 0 TO (a.colsb / 4) - 1
 ///		FOR n := 0 TO (dst.colsb / 4) - 1
 ///			tmp.bf32[n] += FP32(a.row[m].bf16[2*k+0]) *
 ///					FP32(b.row[k].bf16[2*n+0])
 ///			tmp.bf32[n] += FP32(a.row[m].bf16[2*k+1]) *
 ///					FP32(b.row[k].bf16[2*n+1])
 ///		ENDFOR
 ///	ENDFOR
 ///	write_row_and_zero(dst, m, tmp, dst.colsb)
 /// ENDFOR
 /// zero_upper_rows(dst, dst.rows)
 /// zero_tileconfig_start()
 /// \endcode
 ///
 /// This intrinsic corresponds to the \c TTDPBF16PS instruction.
 ///
 /// \param dst
 ///    The destination tile. Max size is 1024 Bytes.
 /// \param a
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param b
 ///    The 2nd source tile. Max size is 1024 Bytes.
 #define _tile_tdpbf16ps(dst, a, b) __builtin_ia32_ttdpbf16ps((dst), (a), (b))
 /// This is internal intrinsic. C/C++ user should avoid calling it directly.
 static __inline__ _tile1024i __DEFAULT_FN_ATTRS
 _tile_tdpbf16ps_internal(unsigned short m, unsigned short n, unsigned short k,
                         _tile1024i dst, _tile1024i src1, _tile1024i src2) {
  return __builtin_ia32_ttdpbf16ps_internal(m, n, k, dst, src1, src2);
 }
 /// Compute transpose and dot-product of BF16 (16-bit) floating-point pairs in
 ///    tiles src0 and src1, accumulating the intermediate single-precision
 ///    (32-bit) floating-point elements with elements in "dst", and store the
 ///    32-bit result back to tile "dst".
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TTDPBF16PS </c> instruction.
 ///
 /// \param dst
 ///    The destination tile. Max size is 1024 Bytes.
 /// \param src0
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param src1
 ///    The 2nd source tile. Max size is 1024 Bytes.
 __DEFAULT_FN_ATTRS
 static __inline__ void __tile_tdpbf16ps(__tile1024i *dst, __tile1024i src0,
                                        __tile1024i src1) {
  dst->tile = _tile_tdpbf16ps_internal(src0.row, src1.col, src0.col, dst->tile,
                                       src0.tile, src1.tile);
 }
 #undef __DEFAULT_FN_ATTRS
 #endif /* __x86_64__ */
 #endif /* __AMX_BF16TRANSPOSEINTRIN_H */
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/amxcomplexintrin.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/amxcomplexintrin.h
@@ -1,167 +0,0 @@
 /*===--------- amxcomplexintrin.h - AMXCOMPLEX intrinsics -*- C++ -*---------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===------------------------------------------------------------------------===
 */
 #ifndef __IMMINTRIN_H
 #error "Never use <amxcomplexintrin.h> directly; include <immintrin.h> instead."
 #endif // __IMMINTRIN_H
 #ifndef __AMX_COMPLEXINTRIN_H
 #define __AMX_COMPLEXINTRIN_H
 #ifdef __x86_64__
 #define __DEFAULT_FN_ATTRS_COMPLEX                                             \
  __attribute__((__always_inline__, __nodebug__, __target__("amx-complex")))
 /// Perform matrix multiplication of two tiles containing complex elements and
 ///    accumulate the results into a packed single precision tile. Each dword
 ///    element in input tiles \a a and \a b is interpreted as a complex number
 ///    with FP16 real part and FP16 imaginary part.
 /// Calculates the imaginary part of the result. For each possible combination
 ///    of (row of \a a, column of \a b), it performs a set of multiplication
 ///    and accumulations on all corresponding complex numbers (one from \a a
 ///    and one from \a b). The imaginary part of the \a a element is multiplied
 ///    with the real part of the corresponding \a b element, and the real part
 ///    of the \a a element is multiplied with the imaginary part of the
 ///    corresponding \a b elements. The two accumulated results are added, and
 ///    then accumulated into the corresponding row and column of \a dst.
 ///
 /// \headerfile <x86intrin.h>
 ///
 /// \code
 /// void _tile_cmmimfp16ps(__tile dst, __tile a, __tile b);
 /// \endcode
 ///
 /// \code{.operation}
 /// FOR m := 0 TO dst.rows - 1
 ///	tmp := dst.row[m]
 ///	FOR k := 0 TO (a.colsb / 4) - 1
 ///		FOR n := 0 TO (dst.colsb / 4) - 1
 ///			tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+1])
 ///			tmp.fp32[n] += FP32(a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+0])
 ///		ENDFOR
 ///	ENDFOR
 ///	write_row_and_zero(dst, m, tmp, dst.colsb)
 /// ENDFOR
 /// zero_upper_rows(dst, dst.rows)
 /// zero_tileconfig_start()
 /// \endcode
 ///
 /// This intrinsic corresponds to the \c TCMMIMFP16PS instruction.
 ///
 /// \param dst
 ///    The destination tile. Max size is 1024 Bytes.
 /// \param a
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param b
 ///    The 2nd source tile. Max size is 1024 Bytes.
 #define _tile_cmmimfp16ps(dst, a, b) __builtin_ia32_tcmmimfp16ps(dst, a, b)
 /// Perform matrix multiplication of two tiles containing complex elements and
 ///    accumulate the results into a packed single precision tile. Each dword
 ///    element in input tiles \a a and \a b is interpreted as a complex number
 ///    with FP16 real part and FP16 imaginary part.
 /// Calculates the real part of the result. For each possible combination
 ///    of (row of \a a, column of \a b), it performs a set of multiplication
 ///    and accumulations on all corresponding complex numbers (one from \a a
 ///    and one from \a b). The real part of the \a a element is multiplied
 ///    with the real part of the corresponding \a b element, and the negated
 ///    imaginary part of the \a a element is multiplied with the imaginary
 ///    part of the corresponding \a b elements. The two accumulated results
 ///    are added, and then accumulated into the corresponding row and column
 ///    of \a dst.
 ///
 /// \headerfile <x86intrin.h>
 ///
 /// \code
 /// void _tile_cmmrlfp16ps(__tile dst, __tile a, __tile b);
 /// \endcode
 ///
 /// \code{.operation}
 /// FOR m := 0 TO dst.rows - 1
 ///	tmp := dst.row[m]
 ///	FOR k := 0 TO (a.colsb / 4) - 1
 ///		FOR n := 0 TO (dst.colsb / 4) - 1
 ///			tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+0])
 ///			tmp.fp32[n] += FP32(-a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+1])
 ///		ENDFOR
 ///	ENDFOR
 ///	write_row_and_zero(dst, m, tmp, dst.colsb)
 /// ENDFOR
 /// zero_upper_rows(dst, dst.rows)
 /// zero_tileconfig_start()
 /// \endcode
 ///
 /// This intrinsic corresponds to the \c TCMMIMFP16PS instruction.
 ///
 /// \param dst
 ///    The destination tile. Max size is 1024 Bytes.
 /// \param a
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param b
 ///    The 2nd source tile. Max size is 1024 Bytes.
 #define _tile_cmmrlfp16ps(dst, a, b) __builtin_ia32_tcmmrlfp16ps(dst, a, b)
 static __inline__ _tile1024i __DEFAULT_FN_ATTRS_COMPLEX
 _tile_cmmimfp16ps_internal(unsigned short m, unsigned short n, unsigned short k,
                           _tile1024i dst, _tile1024i src1, _tile1024i src2) {
  return __builtin_ia32_tcmmimfp16ps_internal(m, n, k, dst, src1, src2);
 }
 static __inline__ _tile1024i __DEFAULT_FN_ATTRS_COMPLEX
 _tile_cmmrlfp16ps_internal(unsigned short m, unsigned short n, unsigned short k,
                           _tile1024i dst, _tile1024i src1, _tile1024i src2) {
  return __builtin_ia32_tcmmrlfp16ps_internal(m, n, k, dst, src1, src2);
 }
 /// Perform matrix multiplication of two tiles containing complex elements and
 /// accumulate the results into a packed single precision tile. Each dword
 /// element in input tiles src0 and src1 is interpreted as a complex number with
 /// FP16 real part and FP16 imaginary part.
 /// This function calculates the imaginary part of the result.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TCMMIMFP16PS </c> instruction.
 ///
 /// \param dst
 ///    The destination tile. Max size is 1024 Bytes.
 /// \param src0
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param src1
 ///    The 2nd source tile. Max size is 1024 Bytes.
 static __inline__ void __DEFAULT_FN_ATTRS_COMPLEX
 __tile_cmmimfp16ps(__tile1024i *dst, __tile1024i src0, __tile1024i src1) {
  dst->tile = _tile_cmmimfp16ps_internal(src0.row, src1.col, src0.col,
                                         dst->tile, src0.tile, src1.tile);
 }
 /// Perform matrix multiplication of two tiles containing complex elements and
 /// accumulate the results into a packed single precision tile. Each dword
 /// element in input tiles src0 and src1 is interpreted as a complex number with
 /// FP16 real part and FP16 imaginary part.
 /// This function calculates the real part of the result.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TCMMRLFP16PS </c> instruction.
 ///
 /// \param dst
 ///    The destination tile. Max size is 1024 Bytes.
 /// \param src0
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param src1
 ///    The 2nd source tile. Max size is 1024 Bytes.
 static __inline__ void __DEFAULT_FN_ATTRS_COMPLEX
 __tile_cmmrlfp16ps(__tile1024i *dst, __tile1024i src0, __tile1024i src1) {
  dst->tile = _tile_cmmrlfp16ps_internal(src0.row, src1.col, src0.col,
                                         dst->tile, src0.tile, src1.tile);
 }
 #endif // __x86_64__
 #endif // __AMX_COMPLEXINTRIN_H
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/amxcomplextransposeintrin.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/amxcomplextransposeintrin.h
@@ -1,303 +0,0 @@
 /*===----- amxcomplextransposeintrin.h - AMX-COMPLEX and AMX-TRANSPOSE ------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===------------------------------------------------------------------------===
 */
 #ifndef __IMMINTRIN_H
 #error                                                                         \
    "Never use <amxcomplextransposeintrin.h> directly; include <immintrin.h> instead."
 #endif // __IMMINTRIN_H
 #ifndef __AMX_COMPLEXTRANSPOSEINTRIN_H
 #define __AMX_COMPLEXTRANSPOSEINTRIN_H
 #ifdef __x86_64__
 #define __DEFAULT_FN_ATTRS                                                     \
  __attribute__((__always_inline__, __nodebug__,                               \
                 __target__("amx-complex,amx-transpose")))
 /// Perform matrix multiplication of two tiles containing complex elements and
 ///    accumulate the results into a packed single precision tile. Each dword
 ///    element in input tiles \a a and \a b is interpreted as a complex number
 ///    with FP16 real part and FP16 imaginary part.
 /// Calculates the imaginary part of the result. For each possible combination
 ///    of (transposed column of \a a, column of \a b), it performs a set of
 ///    multiplication and accumulations on all corresponding complex numbers
 ///    (one from \a a and one from \a b). The imaginary part of the \a a element
 ///    is multiplied with the real part of the corresponding \a b element, and
 ///    the real part of the \a a element is multiplied with the imaginary part
 ///    of the corresponding \a b elements. The two accumulated results are
 ///    added, and then accumulated into the corresponding row and column of
 ///    \a dst.
 ///
 /// \headerfile <x86intrin.h>
 ///
 /// \code
 /// void _tile_tcmmimfp16ps(__tile dst, __tile a, __tile b);
 /// \endcode
 ///
 /// \code{.operation}
 /// FOR m := 0 TO dst.rows - 1
 ///	tmp := dst.row[m]
 ///	FOR k := 0 TO a.rows - 1
 ///		FOR n := 0 TO (dst.colsb / 4) - 1
 ///			tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+1])
 ///			tmp.fp32[n] += FP32(a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+0])
 ///		ENDFOR
 ///	ENDFOR
 ///	write_row_and_zero(dst, m, tmp, dst.colsb)
 /// ENDFOR
 /// zero_upper_rows(dst, dst.rows)
 /// zero_tileconfig_start()
 /// \endcode
 ///
 /// This intrinsic corresponds to the \c TTCMMIMFP16PS instruction.
 ///
 /// \param dst
 ///    The destination tile. Max size is 1024 Bytes.
 /// \param a
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param b
 ///    The 2nd source tile. Max size is 1024 Bytes.
 #define _tile_tcmmimfp16ps(dst, a, b)                                          \
  __builtin_ia32_ttcmmimfp16ps((dst), (a), (b))
 /// Perform matrix multiplication of two tiles containing complex elements and
 ///    accumulate the results into a packed single precision tile. Each dword
 ///    element in input tiles \a a and \a b is interpreted as a complex number
 ///    with FP16 real part and FP16 imaginary part.
 /// Calculates the real part of the result. For each possible combination
 ///    of (rtransposed colum of \a a, column of \a b), it performs a set of
 ///    multiplication and accumulations on all corresponding complex numbers
 ///    (one from \a a and one from \a b). The real part of the \a a element is
 ///    multiplied with the real part of the corresponding \a b element, and the
 ///    negated imaginary part of the \a a element is multiplied with the
 ///    imaginary part of the corresponding \a b elements. The two accumulated
 ///    results are added, and then accumulated into the corresponding row and
 ///    column of \a dst.
 ///
 /// \headerfile <x86intrin.h>
 ///
 /// \code
 /// void _tile_tcmmrlfp16ps(__tile dst, __tile a, __tile b);
 /// \endcode
 ///
 /// \code{.operation}
 /// FOR m := 0 TO dst.rows - 1
 ///	tmp := dst.row[m]
 ///	FOR k := 0 TO a.rows - 1
 ///		FOR n := 0 TO (dst.colsb / 4) - 1
 ///			tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+0])
 ///			tmp.fp32[n] += FP32(-a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+1])
 ///		ENDFOR
 ///	ENDFOR
 ///	write_row_and_zero(dst, m, tmp, dst.colsb)
 /// ENDFOR
 /// zero_upper_rows(dst, dst.rows)
 /// zero_tileconfig_start()
 /// \endcode
 ///
 /// This intrinsic corresponds to the \c TTCMMIMFP16PS instruction.
 ///
 /// \param dst
 ///    The destination tile. Max size is 1024 Bytes.
 /// \param a
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param b
 ///    The 2nd source tile. Max size is 1024 Bytes.
 #define _tile_tcmmrlfp16ps(dst, a, b)                                          \
  __builtin_ia32_ttcmmrlfp16ps((dst), (a), (b))
 /// Perform matrix conjugate transpose and multiplication of two tiles
 ///    containing complex elements and accumulate the results into a packed
 ///    single precision tile. Each dword element in input tiles \a a and \a b
 ///    is interpreted as a complex number with FP16 real part and FP16 imaginary
 ///    part.
 /// Calculates the imaginary part of the result. For each possible combination
 ///    of (transposed column of \a a, column of \a b), it performs a set of
 ///    multiplication and accumulations on all corresponding complex numbers
 ///    (one from \a a and one from \a b). The negated imaginary part of the \a a
 ///    element is multiplied with the real part of the corresponding \a b
 ///    element, and the real part of the \a a element is multiplied with the
 ///    imaginary part of the corresponding \a b elements. The two accumulated
 ///    results are added, and then accumulated into the corresponding row and
 ///    column of \a dst.
 ///
 /// \headerfile <x86intrin.h>
 ///
 /// \code
 /// void _tile_conjtcmmimfp16ps(__tile dst, __tile a, __tile b);
 /// \endcode
 ///
 /// \code{.operation}
 /// FOR m := 0 TO dst.rows - 1
 ///	tmp := dst.row[m]
 ///	FOR k := 0 TO a.rows - 1
 ///		FOR n := 0 TO (dst.colsb / 4) - 1
 ///			tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+1])
 ///			tmp.fp32[n] += FP32(-a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+0])
 ///		ENDFOR
 ///	ENDFOR
 ///	write_row_and_zero(dst, m, tmp, dst.colsb)
 /// ENDFOR
 /// zero_upper_rows(dst, dst.rows)
 /// zero_tileconfig_start()
 /// \endcode
 ///
 /// This intrinsic corresponds to the \c TCONJTCMMIMFP16PS instruction.
 ///
 /// \param dst
 ///    The destination tile. Max size is 1024 Bytes.
 /// \param a
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param b
 ///    The 2nd source tile. Max size is 1024 Bytes.
 #define _tile_conjtcmmimfp16ps(dst, a, b)                                      \
  __builtin_ia32_tconjtcmmimfp16ps((dst), (a), (b))
 /// Perform conjugate transpose of an FP16-pair of complex elements from \a a
 ///    and writes the result to \a dst.
 ///
 /// \headerfile <x86intrin.h>
 ///
 /// \code
 /// void _tile_conjtfp16(__tile dst, __tile a);
 /// \endcode
 ///
 /// \code{.operation}
 /// FOR i := 0 TO dst.rows - 1
 ///	FOR j := 0 TO (dst.colsb / 4) - 1
 ///		tmp.fp16[2*j+0] := a.row[j].fp16[2*i+0]
 ///		tmp.fp16[2*j+1] := -a.row[j].fp16[2*i+1]
 ///	ENDFOR
 ///	write_row_and_zero(dst, i, tmp, dst.colsb)
 /// ENDFOR
 /// zero_upper_rows(dst, dst.rows)
 /// zero_tileconfig_start()
 /// \endcode
 ///
 /// This intrinsic corresponds to the \c TCONJTFP16 instruction.
 ///
 /// \param dst
 ///    The destination tile. Max size is 1024 Bytes.
 /// \param a
 ///    The source tile. Max size is 1024 Bytes.
 #define _tile_conjtfp16(dst, a) __builtin_ia32_tconjtfp16((dst), (a))
 static __inline__ _tile1024i __DEFAULT_FN_ATTRS _tile_tcmmimfp16ps_internal(
    unsigned short m, unsigned short n, unsigned short k, _tile1024i dst,
    _tile1024i src1, _tile1024i src2) {
  return __builtin_ia32_ttcmmimfp16ps_internal(m, n, k, dst, src1, src2);
 }
 static __inline__ _tile1024i __DEFAULT_FN_ATTRS _tile_tcmmrlfp16ps_internal(
    unsigned short m, unsigned short n, unsigned short k, _tile1024i dst,
    _tile1024i src1, _tile1024i src2) {
  return __builtin_ia32_ttcmmrlfp16ps_internal(m, n, k, dst, src1, src2);
 }
 static __inline__ _tile1024i __DEFAULT_FN_ATTRS _tile_conjtcmmimfp16ps_internal(
    unsigned short m, unsigned short n, unsigned short k, _tile1024i dst,
    _tile1024i src1, _tile1024i src2) {
  return __builtin_ia32_tconjtcmmimfp16ps_internal(m, n, k, dst, src1, src2);
 }
 static __inline__ _tile1024i __DEFAULT_FN_ATTRS
 _tile_conjtfp16_internal(unsigned short m, unsigned short n, _tile1024i src) {
  return __builtin_ia32_tconjtfp16_internal(m, n, src);
 }
 /// Perform matrix multiplication of two tiles containing complex elements and
 ///    accumulate the results into a packed single precision tile. Each dword
 ///    element in input tiles src0 and src1 is interpreted as a complex number
 ///    with FP16 real part and FP16 imaginary part.
 ///    This function calculates the imaginary part of the result.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TTCMMIMFP16PS </c> instruction.
 ///
 /// \param dst
 ///    The destination tile. Max size is 1024 Bytes.
 /// \param src0
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param src1
 ///    The 2nd source tile. Max size is 1024 Bytes.
 __DEFAULT_FN_ATTRS
 static void __tile_tcmmimfp16ps(__tile1024i *dst, __tile1024i src0,
                                __tile1024i src1) {
  dst->tile = _tile_tcmmimfp16ps_internal(src0.row, src1.col, src0.col,
                                          dst->tile, src0.tile, src1.tile);
 }
 /// Perform matrix multiplication of two tiles containing complex elements and
 ///    accumulate the results into a packed single precision tile. Each dword
 ///    element in input tiles src0 and src1 is interpreted as a complex number
 ///    with FP16 real part and FP16 imaginary part.
 ///    This function calculates the real part of the result.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TTCMMRLFP16PS </c> instruction.
 ///
 /// \param dst
 ///    The destination tile. Max size is 1024 Bytes.
 /// \param src0
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param src1
 ///    The 2nd source tile. Max size is 1024 Bytes.
 __DEFAULT_FN_ATTRS
 static void __tile_tcmmrlfp16ps(__tile1024i *dst, __tile1024i src0,
                                __tile1024i src1) {
  dst->tile = _tile_tcmmrlfp16ps_internal(src0.row, src1.col, src0.col,
                                          dst->tile, src0.tile, src1.tile);
 }
 /// Perform matrix conjugate transpose and multiplication of two tiles
 ///    containing complex elements and accumulate the results into a packed
 ///    single precision tile. Each dword element in input tiles src0 and src1
 ///    is interpreted as a complex number with FP16 real part and FP16 imaginary
 ///    part.
 ///    This function calculates the imaginary part of the result.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TCONJTCMMIMFP16PS </c> instruction.
 ///
 /// \param dst
 ///    The destination tile. Max size is 1024 Bytes.
 /// \param src0
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param src1
 ///    The 2nd source tile. Max size is 1024 Bytes.
 __DEFAULT_FN_ATTRS
 static void __tile_conjtcmmimfp16ps(__tile1024i *dst, __tile1024i src0,
                                    __tile1024i src1) {
  dst->tile = _tile_conjtcmmimfp16ps_internal(src0.row, src1.col, src0.col,
                                              dst->tile, src0.tile, src1.tile);
 }
 /// Perform conjugate transpose of an FP16-pair of complex elements from src and
 ///    writes the result to dst.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TCONJTFP16 </c> instruction.
 ///
 /// \param dst
 ///    The destination tile. Max size is 1024 Bytes.
 /// \param src
 ///    The source tile. Max size is 1024 Bytes.
 __DEFAULT_FN_ATTRS
 static void __tile_conjtfp16(__tile1024i *dst, __tile1024i src) {
  dst->tile = _tile_conjtfp16_internal(src.row, src.col, src.tile);
 }
 #undef __DEFAULT_FN_ATTRS
 #endif // __x86_64__
 #endif // __AMX_COMPLEXTRANSPOSEINTRIN_H
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/amxfp16intrin.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/amxfp16intrin.h
@@ -1,93 +0,0 @@
 /*===------------- amxfp16intrin.h - AMX_FP16 intrinsics -*- C++ -*---------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===------------------------------------------------------------------------===
 */
 #ifndef __IMMINTRIN_H
 #error "Never use <amxfp16intrin.h> directly; use <immintrin.h> instead."
 #endif /* __IMMINTRIN_H */
 #ifndef __AMX_FP16INTRIN_H
 #define __AMX_FP16INTRIN_H
 #ifdef __x86_64__
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS                                                     \
  __attribute__((__always_inline__, __nodebug__, __target__("amx-fp16")))
 /// Compute dot-product of FP16 (16-bit) floating-point pairs in tiles \a a
 ///    and \a b, accumulating the intermediate single-precision (32-bit)
 ///    floating-point elements with elements in \a dst, and store the 32-bit
 ///    result back to tile \a dst.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// \code
 /// void _tile_dpfp16ps (__tile dst, __tile a, __tile b)
 /// \endcode
 ///
 /// \code{.operation}
 /// FOR m := 0 TO dst.rows - 1
 ///	tmp := dst.row[m]
 ///	FOR k := 0 TO (a.colsb / 4) - 1
 ///		FOR n := 0 TO (dst.colsb / 4) - 1
 ///			tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) *
 ///					FP32(b.row[k].fp16[2*n+0])
 ///			tmp.fp32[n] += FP32(a.row[m].fp16[2*k+1]) *
 ///					FP32(b.row[k].fp16[2*n+1])
 ///		ENDFOR
 ///	ENDFOR
 ///	write_row_and_zero(dst, m, tmp, dst.colsb)
 /// ENDFOR
 /// zero_upper_rows(dst, dst.rows)
 /// zero_tileconfig_start()
 /// \endcode
 ///
 /// This intrinsic corresponds to the \c TDPFP16PS instruction.
 ///
 /// \param dst
 ///    The destination tile. Max size is 1024 Bytes.
 /// \param a
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param b
 ///    The 2nd source tile. Max size is 1024 Bytes.
 #define _tile_dpfp16ps(dst, a, b)                                \
  __builtin_ia32_tdpfp16ps(dst, a, b)
 /// This is internal intrinsic. C/C++ user should avoid calling it directly.
 static __inline__ _tile1024i __DEFAULT_FN_ATTRS
 _tile_dpfp16ps_internal(unsigned short m, unsigned short n, unsigned short k,
                        _tile1024i dst, _tile1024i src1, _tile1024i src2) {
  return __builtin_ia32_tdpfp16ps_internal(m, n, k, dst, src1, src2);
 }
 /// Compute dot-product of FP16 (16-bit) floating-point pairs in tiles src0 and
 /// src1, accumulating the intermediate single-precision (32-bit) floating-point
 /// elements with elements in "dst", and store the 32-bit result back to tile
 /// "dst".
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TDPFP16PS </c> instruction.
 ///
 /// \param dst
 ///    The destination tile. Max size is 1024 Bytes.
 /// \param src0
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param src1
 ///    The 2nd source tile. Max size is 1024 Bytes.
 __DEFAULT_FN_ATTRS
 static __inline__ void __tile_dpfp16ps(__tile1024i *dst, __tile1024i src0,
                                       __tile1024i src1) {
  dst->tile = _tile_dpfp16ps_internal(src0.row, src1.col, src0.col, dst->tile,
                                      src0.tile, src1.tile);
 }
 #undef __DEFAULT_FN_ATTRS
 #endif /* __x86_64__ */
 #endif /* __AMX_FP16INTRIN_H */
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/amxfp16transposeintrin.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/amxfp16transposeintrin.h
@@ -1,94 +0,0 @@
 /*===----- amxfp16transposeintrin.h - AMX-FP16 and AMX-TRANSPOSE ------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===------------------------------------------------------------------------===
 */
 #ifndef __IMMINTRIN_H
 #error                                                                         \
    "Never use <amxfp16transposeintrin.h> directly; use <immintrin.h> instead."
 #endif /* __IMMINTRIN_H */
 #ifndef __AMX_FP16TRANSPOSEINTRIN_H
 #define __AMX_FP16TRANSPOSEINTRIN_H
 #ifdef __x86_64__
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS                                                     \
  __attribute__((__always_inline__, __nodebug__,                               \
                 __target__("amx-fp16,amx-transpose")))
 /// Compute transpose and dot-product of FP16 (16-bit) floating-point pairs in
 ///    tiles \a a and \a b, accumulating the intermediate single-precision
 ///    (32-bit) floating-point elements with elements in \a dst, and store the
 ///    32-bit result back to tile \a dst.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// \code
 /// void _tile_tdpfp16ps (__tile dst, __tile a, __tile b)
 /// \endcode
 ///
 /// \code{.operation}
 /// FOR m := 0 TO dst.rows - 1
 ///	tmp := dst.row[m]
 ///	FOR k := 0 TO (a.colsb / 4) - 1
 ///		FOR n := 0 TO (dst.colsb / 4) - 1
 ///			tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) *
 ///					FP32(b.row[k].fp16[2*n+0])
 ///			tmp.fp32[n] += FP32(a.row[m].fp16[2*k+1]) *
 ///					FP32(b.row[k].fp16[2*n+1])
 ///		ENDFOR
 ///	ENDFOR
 ///	write_row_and_zero(dst, m, tmp, dst.colsb)
 /// ENDFOR
 /// zero_upper_rows(dst, dst.rows)
 /// zero_tileconfig_start()
 /// \endcode
 ///
 /// This intrinsic corresponds to the \c TTDPFP16PS instruction.
 ///
 /// \param dst
 ///    The destination tile. Max size is 1024 Bytes.
 /// \param a
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param b
 ///    The 2nd source tile. Max size is 1024 Bytes.
 #define _tile_tdpfp16ps(dst, a, b) __builtin_ia32_ttdpfp16ps((dst), (a), (b))
 /// This is internal intrinsic. C/C++ user should avoid calling it directly.
 static __inline__ _tile1024i __DEFAULT_FN_ATTRS
 _tile_tdpfp16ps_internal(unsigned short m, unsigned short n, unsigned short k,
                         _tile1024i dst, _tile1024i src1, _tile1024i src2) {
  return __builtin_ia32_ttdpfp16ps_internal(m, n, k, dst, src1, src2);
 }
 /// Compute transpose and dot-product of FP16 (16-bit) floating-point pairs in
 ///    tiles src0 and src1, accumulating the intermediate single-precision
 ///    (32-bit) floating-point elements with elements in "dst", and store the
 ///    32-bit result back to tile "dst".
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TTDPFP16PS </c> instruction.
 ///
 /// \param dst
 ///    The destination tile. Max size is 1024 Bytes.
 /// \param src0
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param src1
 ///    The 2nd source tile. Max size is 1024 Bytes.
 __DEFAULT_FN_ATTRS
 static __inline__ void __tile_tdpfp16ps(__tile1024i *dst, __tile1024i src0,
                                        __tile1024i src1) {
  dst->tile = _tile_tdpfp16ps_internal(src0.row, src1.col, src0.col, dst->tile,
                                       src0.tile, src1.tile);
 }
 #undef __DEFAULT_FN_ATTRS
 #endif /* __x86_64__ */
 #endif /* __AMX_FP16TRANSPOSEINTRIN_H */
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/amxfp8intrin.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/amxfp8intrin.h
@@ -1,230 +0,0 @@
 /*===------------- amxfp8intrin.h - AMX intrinsics -*- C++ -*----------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===------------------------------------------------------------------------===
 */
 #ifndef __IMMINTRIN_H
 #error "Never use <amxfp8intrin.h> directly; include <immintrin.h> instead."
 #endif /* __IMMINTRIN_H */
 #ifndef __AMXFP8INTRIN_H
 #define __AMXFP8INTRIN_H
 #ifdef __x86_64__
 #define __DEFAULT_FN_ATTRS_FP8                                                 \
  __attribute__((__always_inline__, __nodebug__, __target__("amx-fp8")))
 static __inline__ _tile1024i __DEFAULT_FN_ATTRS_FP8
 _tile_dpbf8ps_internal(unsigned short m, unsigned short n, unsigned short k,
                       _tile1024i dst, _tile1024i src1, _tile1024i src2) {
  return __builtin_ia32_tdpbf8ps_internal(m, n, k, dst, src1, src2);
 }
 /// Perform the dot product of a BF8 value \a src1 by a BF8 value \a src2
 /// accumulating into a Single Precision (FP32) source/dest \a dst.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// \code
 /// void __tile_dpbf8ps (__tile1024i *dst, __tile1024i src1, __tile1024i src2)
 /// \endcode
 ///
 /// \code{.operation}
 /// FOR m := 0 TO dst.rows - 1
 ///   temp1[(dst.colsb / 4 - 1) : 0] = 0
 ///   FOR k := 0 TO src1.colsb / 4 - 1
 ///     FOR n := 0 TO dst.colsb / 4 - 1
 ///       temp1[n] +=
 ///         INT64(src1.row[m].float8[4*k+0]) * INT64(src2.row[k].float8[4*n+0])
 ///         + INT64(src1.row[m].float8[4*k+1]) * INT64(src2.row[k].float8[4*n+1])
 ///         + INT64(src1.row[m].float8[4*k+2]) * INT64(src2.row[k].float8[4*n+2])
 ///         + INT64(src1.row[m].float8[4*k+3]) * INT64(src2.row[k].float8[4*n+3])
 ///     ENDFOR
 ///   ENDFOR
 ///   FOR n := 0 TO dst.colsb / 4 - 1
 ///     tmp.row[m].fp32[n] = dst.row[m].fp32[n] + FP32(temp1[n])
 ///   ENDFOR
 /// write_row_and_zero(dst, m, tmp, dst.colsb)
 /// zero_upper_rows(dst, dst.rows)
 /// zero_tileconfig_start()
 /// \endcode
 ///
 /// This intrinsic corresponds to the \c TDPBF8PS instruction.
 ///
 /// \param dst
 ///    The destination tile. Max size is 1024 Bytes.
 /// \param src1
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param src2
 ///    The 2nd source tile. Max size is 1024 Bytes.
 __DEFAULT_FN_ATTRS_FP8 static void
 __tile_dpbf8ps(__tile1024i *dst, __tile1024i src1, __tile1024i src2) {
  dst->tile = _tile_dpbf8ps_internal(src1.row, src2.col, src1.col, dst->tile,
                                     src1.tile, src2.tile);
 }
 static __inline__ _tile1024i __DEFAULT_FN_ATTRS_FP8
 _tile_dpbhf8ps_internal(unsigned short m, unsigned short n, unsigned short k,
                        _tile1024i dst, _tile1024i src1, _tile1024i src2) {
  return __builtin_ia32_tdpbhf8ps_internal(m, n, k, dst, src1, src2);
 }
 /// Perform the dot product of a BF8 value \a src1 by an HF8 value \a src2
 /// accumulating into a Single Precision (FP32) source/dest \a dst.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// \code
 /// void __tile_dpbhf8ps (__tile1024i dst, __tile1024i src1, __tile1024i src2)
 /// \endcode
 ///
 /// \code{.operation}
 /// FOR m := 0 TO dst.rows - 1
 ///   temp1[(dst.colsb / 4 - 1) : 0] = 0
 ///   FOR k := 0 TO src1.colsb / 4 - 1
 ///     FOR n := 0 TO dst.colsb / 4 - 1
 ///       temp1[n] +=
 ///         INT64(src1.row[m].float8[4*k+0]) * INT64(src2.row[k].float8[4*n+0])
 ///         + INT64(src1.row[m].float8[4*k+1]) * INT64(src2.row[k].float8[4*n+1])
 ///         + INT64(src1.row[m].float8[4*k+2]) * INT64(src2.row[k].float8[4*n+2])
 ///         + INT64(src1.row[m].float8[4*k+3]) * INT64(src2.row[k].float8[4*n+3])
 ///     ENDFOR
 ///   ENDFOR
 ///   FOR n := 0 TO dst.colsb / 4 - 1
 ///     tmp.row[m].fp32[n] = dst.row[m].fp32[n] + FP32(temp1[n])
 ///   ENDFOR
 /// write_row_and_zero(dst, m, tmp, dst.colsb)
 /// zero_upper_rows(dst, dst.rows)
 /// zero_tileconfig_start()
 /// \endcode
 ///
 /// This intrinsic corresponds to the \c TDPBHF8PS instruction.
 ///
 /// \param dst
 ///    The destination tile. Max size is 1024 Bytes.
 /// \param src1
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param src2
 ///    The 2nd source tile. Max size is 1024 Bytes.
 __DEFAULT_FN_ATTRS_FP8 static void
 __tile_dpbhf8ps(__tile1024i *dst, __tile1024i src1, __tile1024i src2) {
  dst->tile = _tile_dpbhf8ps_internal(src1.row, src2.col, src1.col, dst->tile,
                                      src1.tile, src2.tile);
 }
 static __inline__ _tile1024i __DEFAULT_FN_ATTRS_FP8
 _tile_dphbf8ps_internal(unsigned short m, unsigned short n, unsigned short k,
                        _tile1024i dst, _tile1024i src1, _tile1024i src2) {
  return __builtin_ia32_tdphbf8ps_internal(m, n, k, dst, src1, src2);
 }
 /// Perform the dot product of an HF8 value \a src1 by a BF8 value \a src2
 /// accumulating into a Single Precision (FP32) source/dest \a dst.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// \code
 /// void __tile_dphbf8ps (__tile1024i dst, __tile1024i src1, __tile1024i src2)
 /// \endcode
 ///
 /// \code{.operation}
 /// FOR m := 0 TO dst.rows - 1
 ///   temp1[(dst.colsb / 4 - 1) : 0] = 0
 ///   FOR k := 0 TO src1.colsb / 4 - 1
 ///     FOR n := 0 TO dst.colsb / 4 - 1
 ///       temp1[n] +=
 ///         INT64(src1.row[m].float8[4*k+0]) * INT64(src2.row[k].float8[4*n+0])
 ///         + INT64(src1.row[m].float8[4*k+1]) * INT64(src2.row[k].float8[4*n+1])
 ///         + INT64(src1.row[m].float8[4*k+2]) * INT64(src2.row[k].float8[4*n+2])
 ///         + INT64(src1.row[m].float8[4*k+3]) * INT64(src2.row[k].float8[4*n+3])
 ///     ENDFOR
 ///   ENDFOR
 ///   FOR n := 0 TO dst.colsb / 4 - 1
 ///     tmp.row[m].fp32[n] = dst.row[m].fp32[n] + FP32(temp1[n])
 ///   ENDFOR
 /// write_row_and_zero(dst, m, tmp, dst.colsb)
 /// zero_upper_rows(dst, dst.rows)
 /// zero_tileconfig_start()
 /// \endcode
 ///
 /// This intrinsic corresponds to the \c TDPHBF8PS instruction.
 ///
 /// \param dst
 ///    The destination tile. Max size is 1024 Bytes.
 /// \param src1
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param src2
 ///    The 2nd source tile. Max size is 1024 Bytes.
 __DEFAULT_FN_ATTRS_FP8 static void
 __tile_dphbf8ps(__tile1024i *dst, __tile1024i src1, __tile1024i src2) {
  dst->tile = _tile_dphbf8ps_internal(src1.row, src2.col, src1.col, dst->tile,
                                      src1.tile, src2.tile);
 }
 static __inline__ _tile1024i __DEFAULT_FN_ATTRS_FP8
 _tile_dphf8ps_internal(unsigned short m, unsigned short n, unsigned short k,
                       _tile1024i dst, _tile1024i src1, _tile1024i src2) {
  return __builtin_ia32_tdphf8ps_internal(m, n, k, dst, src1, src2);
 }
 /// Perform the dot product of an HF8 value \a src1 by an HF8 value \a src2
 /// accumulating into a Single Precision (FP32) source/dest \a dst.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// \code
 /// void __tile_dphf8ps (__tile1024i dst, __tile1024i src1, __tile1024i src2)
 /// \endcode
 ///
 /// \code{.operation}
 /// FOR m := 0 TO dst.rows - 1
 ///   temp1[(dst.colsb / 4 - 1) : 0] = 0
 ///   FOR k := 0 TO src1.colsb / 4 - 1
 ///     FOR n := 0 TO dst.colsb / 4 - 1
 ///       temp1[n] +=
 ///         INT64(src1.row[m].float8[4*k+0]) * INT64(src2.row[k].float8[4*n+0])
 ///         + INT64(src1.row[m].float8[4*k+1]) * INT64(src2.row[k].float8[4*n+1])
 ///         + INT64(src1.row[m].float8[4*k+2]) * INT64(src2.row[k].float8[4*n+2])
 ///         + INT64(src1.row[m].float8[4*k+3]) * INT64(src2.row[k].float8[4*n+3])
 ///     ENDFOR
 ///   ENDFOR
 ///   FOR n := 0 TO dst.colsb / 4 - 1
 ///     tmp.row[m].fp32[n] = dst.row[m].fp32[n] + FP32(temp1[n])
 ///   ENDFOR
 /// write_row_and_zero(dst, m, tmp, dst.colsb)
 /// zero_upper_rows(dst, dst.rows)
 /// zero_tileconfig_start()
 /// \endcode
 ///
 /// This intrinsic corresponds to the \c TDPHF8PS instruction.
 ///
 /// \param dst
 ///    The destination tile. Max size is 1024 Bytes.
 /// \param src1
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param src2
 ///    The 2nd source tile. Max size is 1024 Bytes.
 __DEFAULT_FN_ATTRS_FP8 static void
 __tile_dphf8ps(__tile1024i *dst, __tile1024i src1, __tile1024i src2) {
  dst->tile = _tile_dphf8ps_internal(src1.row, src2.col, src1.col, dst->tile,
                                     src1.tile, src2.tile);
 }
 #define _tile_dpbf8ps(dst, src1, src2)                                         \
  __builtin_ia32_tdpbf8ps((dst), (src1), (src2))
 #define _tile_dpbhf8ps(dst, src1, src2)                                        \
  __builtin_ia32_tdpbhf8ps((dst), (src1), (src2))
 #define _tile_dphbf8ps(dst, src1, src2)                                        \
  __builtin_ia32_tdphbf8ps((dst), (src1), (src2))
 #define _tile_dphf8ps(dst, src1, src2)                                         \
  __builtin_ia32_tdphf8ps((dst), (src1), (src2))
 #undef __DEFAULT_FN_ATTRS_FP8
 #endif /* __x86_64__ */
 #endif /* __AMXFP8INTRIN_H */
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/amxintrin.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/amxintrin.h
@@ -1,494 +0,0 @@
 /*===--------------- amxintrin.h - AMX intrinsics -*- C/C++ -*---------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===------------------------------------------------------------------------===
 */
 #ifndef __IMMINTRIN_H
 #error "Never use <amxintrin.h> directly; include <immintrin.h> instead."
 #endif /* __IMMINTRIN_H */
 #ifndef __AMXINTRIN_H
 #define __AMXINTRIN_H
 #ifdef __x86_64__
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS_TILE                                                \
  __attribute__((__always_inline__, __nodebug__, __target__("amx-tile")))
 #define __DEFAULT_FN_ATTRS_INT8                                                \
  __attribute__((__always_inline__, __nodebug__, __target__("amx-int8")))
 #define __DEFAULT_FN_ATTRS_BF16                                                \
  __attribute__((__always_inline__, __nodebug__, __target__("amx-bf16")))
 /// Load tile configuration from a 64-byte memory location specified by
 /// "mem_addr". The tile configuration includes the tile type palette, the
 /// number of bytes per row, and the number of rows. If the specified
 /// palette_id is zero, that signifies the init state for both the tile
 /// config and the tile data, and the tiles are zeroed. Any invalid
 /// configurations will result in #GP fault.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> LDTILECFG </c> instruction.
 ///
 /// \param __config
 ///    A pointer to 512-bits configuration
 static __inline__ void __DEFAULT_FN_ATTRS_TILE
 _tile_loadconfig(const void *__config) {
  __builtin_ia32_tile_loadconfig(__config);
 }
 /// Stores the current tile configuration to a 64-byte memory location
 /// specified by "mem_addr". The tile configuration includes the tile type
 /// palette, the number of bytes per row, and the number of rows. If tiles
 /// are not configured, all zeroes will be stored to memory.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> STTILECFG </c> instruction.
 ///
 /// \param __config
 ///    A pointer to 512-bits configuration
 static __inline__ void __DEFAULT_FN_ATTRS_TILE
 _tile_storeconfig(void *__config) {
  __builtin_ia32_tile_storeconfig(__config);
 }
 /// Release the tile configuration to return to the init state, which
 /// releases all storage it currently holds.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TILERELEASE </c> instruction.
 static __inline__ void __DEFAULT_FN_ATTRS_TILE _tile_release(void) {
  __builtin_ia32_tilerelease();
 }
 /// Load tile rows from memory specifieid by "base" address and "stride" into
 /// destination tile "dst" using the tile configuration previously configured
 /// via "_tile_loadconfig".
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TILELOADD </c> instruction.
 ///
 /// \param dst
 ///    A destination tile. Max size is 1024 Bytes.
 /// \param base
 ///    A pointer to base address.
 /// \param stride
 ///    The stride between the rows' data to be loaded in memory.
 #define _tile_loadd(dst, base, stride)                                         \
  __builtin_ia32_tileloadd64((dst), ((const void *)(base)),                    \
                             (__SIZE_TYPE__)(stride))
 /// Load tile rows from memory specifieid by "base" address and "stride" into
 /// destination tile "dst" using the tile configuration previously configured
 /// via "_tile_loadconfig". This intrinsic provides a hint to the implementation
 /// that the data will likely not be reused in the near future and the data
 /// caching can be optimized accordingly.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TILELOADDT1 </c> instruction.
 ///
 /// \param dst
 ///    A destination tile. Max size is 1024 Bytes.
 /// \param base
 ///    A pointer to base address.
 /// \param stride
 ///    The stride between the rows' data to be loaded in memory.
 #define _tile_stream_loadd(dst, base, stride)                                  \
  __builtin_ia32_tileloaddt164((dst), ((const void *)(base)),                  \
                               (__SIZE_TYPE__)(stride))
 /// Store the tile specified by "src" to memory specifieid by "base" address and
 /// "stride" using the tile configuration previously configured via
 /// "_tile_loadconfig".
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TILESTORED </c> instruction.
 ///
 /// \param dst
 ///    A destination tile. Max size is 1024 Bytes.
 /// \param base
 ///    A pointer to base address.
 /// \param stride
 ///    The stride between the rows' data to be stored in memory.
 #define _tile_stored(dst, base, stride)                                        \
  __builtin_ia32_tilestored64((dst), ((void *)(base)), (__SIZE_TYPE__)(stride))
 /// Zero the tile specified by "tdest".
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TILEZERO </c> instruction.
 ///
 /// \param tile
 ///    The destination tile to be zero. Max size is 1024 Bytes.
 #define _tile_zero(tile) __builtin_ia32_tilezero((tile))
 /// Compute dot-product of bytes in tiles with a source/destination accumulator.
 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in src0 with
 /// corresponding signed 8-bit integers in src1, producing 4 intermediate 32-bit
 /// results. Sum these 4 results with the corresponding 32-bit integer in "dst",
 /// and store the 32-bit result back to tile "dst".
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TDPBSSD </c> instruction.
 ///
 /// \param dst
 ///    The destination tile. Max size is 1024 Bytes.
 /// \param src0
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param src1
 ///    The 2nd source tile. Max size is 1024 Bytes.
 #define _tile_dpbssd(dst, src0, src1)                                          \
  __builtin_ia32_tdpbssd((dst), (src0), (src1))
 /// Compute dot-product of bytes in tiles with a source/destination accumulator.
 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in src0 with
 /// corresponding unsigned 8-bit integers in src1, producing 4 intermediate
 /// 32-bit results. Sum these 4 results with the corresponding 32-bit integer
 /// in "dst", and store the 32-bit result back to tile "dst".
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TDPBSUD </c> instruction.
 ///
 /// \param dst
 ///    The destination tile. Max size is 1024 Bytes.
 /// \param src0
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param src1
 ///    The 2nd source tile. Max size is 1024 Bytes.
 #define _tile_dpbsud(dst, src0, src1)                                          \
  __builtin_ia32_tdpbsud((dst), (src0), (src1))
 /// Compute dot-product of bytes in tiles with a source/destination accumulator.
 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in src0 with
 /// corresponding signed 8-bit integers in src1, producing 4 intermediate 32-bit
 /// results. Sum these 4 results with the corresponding 32-bit integer in "dst",
 /// and store the 32-bit result back to tile "dst".
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TDPBUSD </c> instruction.
 ///
 /// \param dst
 ///    The destination tile. Max size is 1024 Bytes.
 /// \param src0
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param src1
 ///    The 2nd source tile. Max size is 1024 Bytes.
 #define _tile_dpbusd(dst, src0, src1)                                          \
  __builtin_ia32_tdpbusd((dst), (src0), (src1))
 /// Compute dot-product of bytes in tiles with a source/destination accumulator.
 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in src0 with
 /// corresponding unsigned 8-bit integers in src1, producing 4 intermediate
 /// 32-bit results. Sum these 4 results with the corresponding 32-bit integer in
 /// "dst", and store the 32-bit result back to tile "dst".
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TDPBUUD </c> instruction.
 ///
 /// \param dst
 ///    The destination tile. Max size is 1024 Bytes.
 /// \param src0
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param src1
 ///    The 2nd source tile. Max size is 1024 Bytes.
 #define _tile_dpbuud(dst, src0, src1)                                          \
  __builtin_ia32_tdpbuud((dst), (src0), (src1))
 /// Compute dot-product of BF16 (16-bit) floating-point pairs in tiles src0 and
 /// src1, accumulating the intermediate single-precision (32-bit) floating-point
 /// elements with elements in "dst", and store the 32-bit result back to tile
 /// "dst".
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TDPBF16PS </c> instruction.
 ///
 /// \param dst
 ///    The destination tile. Max size is 1024 Bytes.
 /// \param src0
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param src1
 ///    The 2nd source tile. Max size is 1024 Bytes.
 #define _tile_dpbf16ps(dst, src0, src1)                                        \
  __builtin_ia32_tdpbf16ps((dst), (src0), (src1))
 /// AMX tile register size can be configured, the maximum size is 16x64=1024
 /// bytes. Since there is no 2D type in llvm IR, we use vector type to
 /// represent 2D tile and the fixed size is maximum amx tile register size.
 typedef int _tile1024i __attribute__((__vector_size__(1024), __aligned__(64)));
 typedef int _tile1024i_1024a
    __attribute__((__vector_size__(1024), __aligned__(1024)));
 /// This is internal intrinsic. C/C++ user should avoid calling it directly.
 static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TILE
 _tile_loadd_internal(unsigned short m, unsigned short n, const void *base,
                     __SIZE_TYPE__ stride) {
  return __builtin_ia32_tileloadd64_internal(m, n, base,
                                             (__SIZE_TYPE__)(stride));
 }
 /// This is internal intrinsic. C/C++ user should avoid calling it directly.
 static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TILE
 _tile_loaddt1_internal(unsigned short m, unsigned short n, const void *base,
                       __SIZE_TYPE__ stride) {
  return __builtin_ia32_tileloaddt164_internal(m, n, base,
                                               (__SIZE_TYPE__)(stride));
 }
 /// This is internal intrinsic. C/C++ user should avoid calling it directly.
 static __inline__ _tile1024i __DEFAULT_FN_ATTRS_INT8
 _tile_dpbssd_internal(unsigned short m, unsigned short n, unsigned short k,
                      _tile1024i dst, _tile1024i src1, _tile1024i src2) {
  return __builtin_ia32_tdpbssd_internal(m, n, k, dst, src1, src2);
 }
 /// This is internal intrinsic. C/C++ user should avoid calling it directly.
 static __inline__ _tile1024i __DEFAULT_FN_ATTRS_INT8
 _tile_dpbsud_internal(unsigned short m, unsigned short n, unsigned short k,
                      _tile1024i dst, _tile1024i src1, _tile1024i src2) {
  return __builtin_ia32_tdpbsud_internal(m, n, k, dst, src1, src2);
 }
 /// This is internal intrinsic. C/C++ user should avoid calling it directly.
 static __inline__ _tile1024i __DEFAULT_FN_ATTRS_INT8
 _tile_dpbusd_internal(unsigned short m, unsigned short n, unsigned short k,
                      _tile1024i dst, _tile1024i src1, _tile1024i src2) {
  return __builtin_ia32_tdpbusd_internal(m, n, k, dst, src1, src2);
 }
 /// This is internal intrinsic. C/C++ user should avoid calling it directly.
 static __inline__ _tile1024i __DEFAULT_FN_ATTRS_INT8
 _tile_dpbuud_internal(unsigned short m, unsigned short n, unsigned short k,
                      _tile1024i dst, _tile1024i src1, _tile1024i src2) {
  return __builtin_ia32_tdpbuud_internal(m, n, k, dst, src1, src2);
 }
 /// This is internal intrinsic. C/C++ user should avoid calling it directly.
 static __inline__ void __DEFAULT_FN_ATTRS_TILE
 _tile_stored_internal(unsigned short m, unsigned short n, void *base,
                      __SIZE_TYPE__ stride, _tile1024i tile) {
  return __builtin_ia32_tilestored64_internal(m, n, base,
                                              (__SIZE_TYPE__)(stride), tile);
 }
 /// This is internal intrinsic. C/C++ user should avoid calling it directly.
 static __inline__ _tile1024i __DEFAULT_FN_ATTRS_BF16
 _tile_dpbf16ps_internal(unsigned short m, unsigned short n, unsigned short k,
                        _tile1024i dst, _tile1024i src1, _tile1024i src2) {
  return __builtin_ia32_tdpbf16ps_internal(m, n, k, dst, src1, src2);
 }
 /// This struct pack the shape and tile data together for user. We suggest
 /// initializing the struct as early as possible, because compiler depends
 /// on the shape information to do configure. The constant value is preferred
 /// for optimization by compiler.
 typedef struct __tile1024i_str {
  const unsigned short row;
  const unsigned short col;
  _tile1024i tile;
 } __tile1024i;
 /// Load tile rows from memory specifieid by "base" address and "stride" into
 /// destination tile "dst".
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TILELOADD </c> instruction.
 ///
 /// \param dst
 ///    A destination tile. Max size is 1024 Bytes.
 /// \param base
 ///    A pointer to base address.
 /// \param stride
 ///    The stride between the rows' data to be loaded in memory.
 __DEFAULT_FN_ATTRS_TILE
 static __inline__ void __tile_loadd(__tile1024i *dst, const void *base,
                                    __SIZE_TYPE__ stride) {
  dst->tile = _tile_loadd_internal(dst->row, dst->col, base, stride);
 }
 /// Load tile rows from memory specifieid by "base" address and "stride" into
 /// destination tile "dst". This intrinsic provides a hint to the implementation
 /// that the data will likely not be reused in the near future and the data
 /// caching can be optimized accordingly.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TILELOADDT1 </c> instruction.
 ///
 /// \param dst
 ///    A destination tile. Max size is 1024 Bytes.
 /// \param base
 ///    A pointer to base address.
 /// \param stride
 ///    The stride between the rows' data to be loaded in memory.
 __DEFAULT_FN_ATTRS_TILE
 static __inline__ void __tile_stream_loadd(__tile1024i *dst, const void *base,
                                           __SIZE_TYPE__ stride) {
  dst->tile = _tile_loaddt1_internal(dst->row, dst->col, base, stride);
 }
 /// Compute dot-product of bytes in tiles with a source/destination accumulator.
 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in src0 with
 /// corresponding signed 8-bit integers in src1, producing 4 intermediate 32-bit
 /// results. Sum these 4 results with the corresponding 32-bit integer in "dst",
 /// and store the 32-bit result back to tile "dst".
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TDPBSSD </c> instruction.
 ///
 /// \param dst
 ///    The destination tile. Max size is 1024 Bytes.
 /// \param src0
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param src1
 ///    The 2nd source tile. Max size is 1024 Bytes.
 __DEFAULT_FN_ATTRS_INT8
 static __inline__ void __tile_dpbssd(__tile1024i *dst, __tile1024i src0,
                                     __tile1024i src1) {
  dst->tile = _tile_dpbssd_internal(src0.row, src1.col, src0.col, dst->tile,
                                    src0.tile, src1.tile);
 }
 /// Compute dot-product of bytes in tiles with a source/destination accumulator.
 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in src0 with
 /// corresponding unsigned 8-bit integers in src1, producing 4 intermediate
 /// 32-bit results. Sum these 4 results with the corresponding 32-bit integer
 /// in "dst", and store the 32-bit result back to tile "dst".
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TDPBSUD </c> instruction.
 ///
 /// \param dst
 ///    The destination tile. Max size is 1024 Bytes.
 /// \param src0
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param src1
 ///    The 2nd source tile. Max size is 1024 Bytes.
 __DEFAULT_FN_ATTRS_INT8
 static __inline__ void __tile_dpbsud(__tile1024i *dst, __tile1024i src0,
                                     __tile1024i src1) {
  dst->tile = _tile_dpbsud_internal(src0.row, src1.col, src0.col, dst->tile,
                                    src0.tile, src1.tile);
 }
 /// Compute dot-product of bytes in tiles with a source/destination accumulator.
 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in src0 with
 /// corresponding signed 8-bit integers in src1, producing 4 intermediate 32-bit
 /// results. Sum these 4 results with the corresponding 32-bit integer in "dst",
 /// and store the 32-bit result back to tile "dst".
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TDPBUSD </c> instruction.
 ///
 /// \param dst
 ///    The destination tile. Max size is 1024 Bytes.
 /// \param src0
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param src1
 ///    The 2nd source tile. Max size is 1024 Bytes.
 __DEFAULT_FN_ATTRS_INT8
 static __inline__ void __tile_dpbusd(__tile1024i *dst, __tile1024i src0,
                                     __tile1024i src1) {
  dst->tile = _tile_dpbusd_internal(src0.row, src1.col, src0.col, dst->tile,
                                    src0.tile, src1.tile);
 }
 /// Compute dot-product of bytes in tiles with a source/destination accumulator.
 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in src0 with
 /// corresponding unsigned 8-bit integers in src1, producing 4 intermediate
 /// 32-bit results. Sum these 4 results with the corresponding 32-bit integer in
 /// "dst", and store the 32-bit result back to tile "dst".
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TDPBUUD </c> instruction.
 ///
 /// \param dst
 ///    The destination tile. Max size is 1024 Bytes.
 /// \param src0
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param src1
 ///    The 2nd source tile. Max size is 1024 Bytes.
 __DEFAULT_FN_ATTRS_INT8
 static __inline__ void __tile_dpbuud(__tile1024i *dst, __tile1024i src0,
                                     __tile1024i src1) {
  dst->tile = _tile_dpbuud_internal(src0.row, src1.col, src0.col, dst->tile,
                                    src0.tile, src1.tile);
 }
 /// Store the tile specified by "src" to memory specifieid by "base" address and
 /// "stride".
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TILESTORED </c> instruction.
 ///
 /// \param base
 ///    A pointer to base address.
 /// \param stride
 ///    The stride between the rows' data to be stored in memory.
 __DEFAULT_FN_ATTRS_TILE
 static __inline__ void __tile_stored(void *base, __SIZE_TYPE__ stride,
                                     __tile1024i src) {
  _tile_stored_internal(src.row, src.col, base, stride, src.tile);
 }
 /// Zero the tile specified by "dst".
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TILEZERO </c> instruction.
 ///
 /// \param dst
 ///    The destination tile to be zero. Max size is 1024 Bytes.
 __DEFAULT_FN_ATTRS_TILE
 static __inline__ void __tile_zero(__tile1024i *dst) {
  dst->tile = __builtin_ia32_tilezero_internal(dst->row, dst->col);
 }
 /// Compute dot-product of BF16 (16-bit) floating-point pairs in tiles src0 and
 /// src1, accumulating the intermediate single-precision (32-bit) floating-point
 /// elements with elements in "dst", and store the 32-bit result back to tile
 /// "dst".
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TDPBF16PS </c> instruction.
 ///
 /// \param dst
 ///    The destination tile. Max size is 1024 Bytes.
 /// \param src0
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param src1
 ///    The 2nd source tile. Max size is 1024 Bytes.
 __DEFAULT_FN_ATTRS_BF16
 static __inline__ void __tile_dpbf16ps(__tile1024i *dst, __tile1024i src0,
                                       __tile1024i src1) {
  dst->tile = _tile_dpbf16ps_internal(src0.row, src1.col, src0.col, dst->tile,
                                      src0.tile, src1.tile);
 }
 #undef __DEFAULT_FN_ATTRS_TILE
 #undef __DEFAULT_FN_ATTRS_INT8
 #undef __DEFAULT_FN_ATTRS_BF16
 #endif /* __x86_64__ */
 #endif /* __AMXINTRIN_H */
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/amxmovrsintrin.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/amxmovrsintrin.h
@@ -1,48 +0,0 @@
 /*===-------- amxmovrsintrin.h - AMX MOVRS intrinsics -*- C++ -*---------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 * ===-------------------------------------------------------------------=== */
 #ifndef __IMMINTRIN_H
 #error "Never use <amxmovrsintrin.h> directly; include <immintrin.h> instead."
 #endif /* __IMMINTRIN_H */
 #ifndef __AMXMOVRSINTRIN_H
 #define __AMXMOVRSINTRIN_H
 #ifdef __x86_64__
 #define __DEFAULT_FN_ATTRS_MOVRS                                               \
  __attribute__((__always_inline__, __nodebug__, __target__("amx-movrs")))
 #define _tile_loaddrs(dst, base, stride)                                       \
  __builtin_ia32_tileloaddrs64((dst), ((const void *)(base)),                  \
                               (__SIZE_TYPE__)(stride))
 #define _tile_stream_loaddrs(dst, base, stride)                                \
  __builtin_ia32_tileloaddrst164((dst), ((const void *)(base)),                \
                                 (__SIZE_TYPE__)(stride))
 static __inline__ _tile1024i __DEFAULT_FN_ATTRS_MOVRS
 _tile_loaddrs_internal(unsigned short m, unsigned short n, const void *base,
                       __SIZE_TYPE__ stride) {
  return __builtin_ia32_tileloaddrs64_internal(m, n, base,
                                               (__SIZE_TYPE__)(stride));
 }
 static __inline__ _tile1024i __DEFAULT_FN_ATTRS_MOVRS
 _tile_loaddrst1_internal(unsigned short m, unsigned short n, const void *base,
                         __SIZE_TYPE__ stride) {
  return __builtin_ia32_tileloaddrst164_internal(m, n, base,
                                                 (__SIZE_TYPE__)(stride));
 }
 static __inline__ void __DEFAULT_FN_ATTRS_MOVRS
 __tile_loaddrs(__tile1024i *dst, const void *base, __SIZE_TYPE__ stride) {
  dst->tile = _tile_loaddrs_internal(dst->row, dst->col, base, stride);
 }
 static __inline__ void __DEFAULT_FN_ATTRS_MOVRS __tile_stream_loaddrs(
    __tile1024i *dst, const void *base, __SIZE_TYPE__ stride) {
  dst->tile = _tile_loaddrst1_internal(dst->row, dst->col, base, stride);
 }
 #undef __DEFAULT_FN_ATTRS_MOVRS
 #endif /* __x86_64__ */
 #endif /* __AMXMOVRSINTRIN_H */
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/amxmovrstransposeintrin.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/amxmovrstransposeintrin.h
@@ -1,200 +0,0 @@
 /* ===--- amxmovrstransposeintrin.h - AMX_MOVRS_TRANSPOSE intrinsics --------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 * ===-----------------------------------------------------------------------===
 */
 #ifndef __IMMINTRIN_H
 #error                                                                         \
    "Never use <amxmovrstransposeintrin.h> directly; use <immintrin.h> instead."
 #endif /* __IMMINTRIN_H */
 #ifndef __AMX_MOVRS_TRANSPOSEINTRIN_H
 #define __AMX_MOVRS_TRANSPOSEINTRIN_H
 #ifdef __x86_64__
 #define __DEFAULT_FN_ATTRS                                                     \
  __attribute__((__always_inline__, __nodebug__,                               \
                 __target__("amx-transpose,amx-movrs")))
 #define _tile_2rpntlvwz0rs(tdst, base, stride)                                 \
  __builtin_ia32_t2rpntlvwz0rs(tdst, base, stride)
 #define _tile_2rpntlvwz0rst1(tdst, base, stride)                               \
  __builtin_ia32_t2rpntlvwz0rst1(tdst, base, stride)
 #define _tile_2rpntlvwz1rs(tdst, base, stride)                                 \
  __builtin_ia32_t2rpntlvwz1rs(tdst, base, stride)
 #define _tile_2rpntlvwz1rst1(tdst, base, stride)                               \
  __builtin_ia32_t2rpntlvwz1rst1(tdst, base, stride)
 static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz0rs_internal(
    unsigned short row, unsigned short col0, unsigned short col1,
    _tile1024i *dst0, _tile1024i *dst1, const void *base,
    __SIZE_TYPE__ stride) {
  // Use __tile1024i_1024a* to escape the alignment check in
  // clang/test/Headers/x86-intrinsics-headers-clean.cpp
  __builtin_ia32_t2rpntlvwz0rs_internal(
      row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
      (__SIZE_TYPE__)(stride));
 }
 static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz0rst1_internal(
    unsigned short row, unsigned short col0, unsigned short col1,
    _tile1024i *dst0, _tile1024i *dst1, const void *base,
    __SIZE_TYPE__ stride) {
  __builtin_ia32_t2rpntlvwz0rst1_internal(
      row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
      (__SIZE_TYPE__)(stride));
 }
 static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz1rs_internal(
    unsigned short row, unsigned short col0, unsigned short col1,
    _tile1024i *dst0, _tile1024i *dst1, const void *base,
    __SIZE_TYPE__ stride) {
  __builtin_ia32_t2rpntlvwz1rs_internal(
      row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
      (__SIZE_TYPE__)(stride));
 }
 static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz1rst1_internal(
    unsigned short row, unsigned short col0, unsigned short col1,
    _tile1024i *dst0, _tile1024i *dst1, const void *base,
    __SIZE_TYPE__ stride) {
  __builtin_ia32_t2rpntlvwz1rst1_internal(
      row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
      (__SIZE_TYPE__)(stride));
 }
 /// Converts a pair of tiles from memory into VNNI format, and places the
 /// results in a pair of destinations specified by dst. The pair of tiles
 /// in memory is specified via a tsib; the second tile is after the first
 /// one, separated by the same stride that separates each row.
 /// The tile configuration for the destination tiles indicates the amount
 /// of data to read from memory. The instruction will load a number of rows
 /// that is equal to twice the number of rows in tmm1. The size of each row
 /// is equal to the average width of the destination tiles. If the second
 /// tile is configured with zero rows and columns, only the first tile will
 /// be written.
 /// Provides a hint to the implementation that the data will likely become
 /// read shared in the near future and the data caching can be optimized.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> T2RPNTLVWZ0RS </c> instruction.
 ///
 /// \param dst0
 ///    First tile of destination tile pair. Max size is 1024i*2 Bytes.
 /// \param dst1
 ///    Second tile of destination tile pair. Max size is 1024i*2 Bytes.
 /// \param base
 ///    A pointer to base address.
 /// \param stride
 ///    The stride between the rows' data to be loaded in memory.
 __DEFAULT_FN_ATTRS
 static void __tile_2rpntlvwz0rs(__tile1024i *dst0, __tile1024i *dst1,
                                const void *base, __SIZE_TYPE__ stride) {
  _tile_2rpntlvwz0rs_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
                              &dst1->tile, base, stride);
 }
 /// Converts a pair of tiles from memory into VNNI format, and places the
 /// results in a pair of destinations specified by dst. The pair of tiles
 /// in memory is specified via a tsib; the second tile is after the first
 /// one, separated by the same stride that separates each row.
 /// The tile configuration for the destination tiles indicates the amount
 /// of data to read from memory. The instruction will load a number of rows
 /// that is equal to twice the number of rows in tmm1. The size of each row
 /// is equal to the average width of the destination tiles. If the second
 /// tile is configured with zero rows and columns, only the first tile will
 /// be written.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> T2RPNTLVWZ0T1RS </c> instruction.
 ///
 /// \param dst0
 ///    First tile of destination tile pair. Max size is 1024i*2 Bytes.
 /// \param dst1
 ///    Second tile of destination tile pair. Max size is 1024i*2 Bytes.
 /// \param base
 ///    A pointer to base address.
 /// \param stride
 ///    The stride between the rows' data to be loaded in memory.
 __DEFAULT_FN_ATTRS
 static void __tile_2rpntlvwz0rst1(__tile1024i *dst0, __tile1024i *dst1,
                                  const void *base, __SIZE_TYPE__ stride) {
  _tile_2rpntlvwz0rst1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
                                &dst1->tile, base, stride);
 }
 /// Converts a pair of tiles from memory into VNNI format, and places the
 /// results in a pair of destinations specified by dst. The pair of tiles
 /// in memory is specified via a tsib; the second tile is after the first
 /// one, separated by the same stride that separates each row.
 /// The tile configuration for the destination tiles indicates the amount
 /// of data to read from memory. The instruction will load a number of rows
 /// that is equal to twice the number of rows in tmm1. The size of each row
 /// is equal to the average width of the destination tiles. If the second
 /// tile is configured with zero rows and columns, only the first tile will
 /// be written. The last row will be not be read from memory but instead
 /// filled with zeros.
 /// Provides a hint to the implementation that the data will likely become
 /// read shared in the near future and the data caching can be optimized.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> T2RPNTLVWZ1 </c> instruction.
 ///
 /// \param dst0
 ///    First tile of destination tile pair. Max size is 1024i*2 Bytes.
 /// \param dst1
 ///    Second tile of destination tile pair. Max size is 1024i*2 Bytes.
 /// \param base
 ///    A pointer to base address.
 /// \param stride
 ///    The stride between the rows' data to be loaded in memory.
 __DEFAULT_FN_ATTRS
 static void __tile_2rpntlvwz1rs(__tile1024i *dst0, __tile1024i *dst1,
                                const void *base, __SIZE_TYPE__ stride) {
  _tile_2rpntlvwz1rs_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
                              &dst1->tile, base, stride);
 }
 /// Converts a pair of tiles from memory into VNNI format, and places the
 /// results in a pair of destinations specified by dst. The pair of tiles
 /// in memory is specified via a tsib; the second tile is after the first
 /// one, separated by the same stride that separates each row.
 /// The tile configuration for the destination tiles indicates the amount
 /// of data to read from memory. The instruction will load a number of rows
 /// that is equal to twice the number of rows in tmm1. The size of each row
 /// is equal to the average width of the destination tiles. If the second
 /// tile is configured with zero rows and columns, only the first tile will
 /// be written. The last row will be not be read from memory but instead
 /// filled with zeros.
 /// Provides a hint to the implementation that the data will likely become
 /// read shared in the near future and the data caching can be optimized.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> T2RPNTLVWZ1T1RS </c> instruction.
 ///
 /// \param dst0
 ///    First tile of destination tile pair. Max size is 1024i*2 Bytes.
 /// \param dst1
 ///    Second tile of destination tile pair. Max size is 1024i*2 Bytes.
 /// \param base
 ///    A pointer to base address.
 /// \param stride
 ///    The stride between the rows' data to be loaded in memory.
 __DEFAULT_FN_ATTRS
 static void __tile_2rpntlvwz1rst1(__tile1024i *dst0, __tile1024i *dst1,
                                  const void *base, __SIZE_TYPE__ stride) {
  _tile_2rpntlvwz1rst1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
                                &dst1->tile, base, stride);
 }
 #undef __DEFAULT_FN_ATTRS
 #endif /* __x86_64__ */
 #endif /* __AMX_MOVRS_TRANSPOSEINTRIN_H */
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/amxtf32intrin.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/amxtf32intrin.h
@@ -1,108 +0,0 @@
 /*===------------- amxtf32intrin.h - AMX_TF32 intrinsics -*- C++ -*---------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===------------------------------------------------------------------------===
 */
 #ifndef __IMMINTRIN_H
 #error "Never use <amxtf32intrin.h> directly; include <immintrin.h> instead."
 #endif // __IMMINTRIN_H
 #ifndef __AMX_TF32INTRIN_H
 #define __AMX_TF32INTRIN_H
 #ifdef __x86_64__
 #define __DEFAULT_FN_ATTRS_TF32                                                \
  __attribute__((__always_inline__, __nodebug__, __target__("amx-tf32")))
 /// Do Matrix Multiplication of \a a and \a b, and then do Matrix Plus
 /// with \a srcdst.
 /// All the calculation is base on float32 but with the lower 13-bit set to 0.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// \code
 /// void _tile_mmultf32ps(constexpr int srcdst, constexpr int a, \
 ///                       constexpr int b);
 /// \endcode
 ///
 /// This intrinsic corresponds to the <c> TMMULTF32PS </c> instruction.
 ///
 /// \param srcdst
 /// 	The destination tile. Max size is 1024 Bytes.
 /// \param a
 /// 	The 1st source tile. Max size is 1024 Bytes.
 /// \param b
 /// 	The 2nd source tile. Max size is 1024 Bytes.
 ///
 /// \code{.operation}
 /// DEFINE zero_lower_mantissa_bits_fp32(x[31:0]) {
 ///	dword[12:0] := 0
 ///	dword[31:13] := x[31:13]
 ///	return dword
 /// }
 ///
 /// DEFINE silence_snan_fp32(x[31:0]) {
 /// 	IF (x.exponent == 255 and x.fraction != 0 and x.fraction[22] == 0)
 /// 		x.fraction[22] := 1
 /// 	return x
 /// }
 ///
 /// elements_a := a.colsb / 4
 /// elements_dest := srcdst.colsb / 4
 ///
 /// FOR m = 0 TO (srcdst.rows-1)
 /// 	tmp[511:0] := 0
 /// 	FOR k = 0 TO (elements_a-1)
 /// 		FOR n = 0 TO (elements_dest-1)
 /// 			af := silence_snan_fp32(a.row[m].fp32[k])
 /// 			bf := silence_snan_fp32(b.row[k].fp32[n])
 /// 			tmp.fp32[n] += zero_lower_mantissa_bits_fp32(af)
 /// 					* zero_lower_mantissa_bits_fp32(bf)
 /// 		ENDFOR
 /// 	ENDFOR
 ///
 /// 	FOR n = 0 TO (elements_dest-1)
 /// 		tmp.fp32[n] += srcdst.row[m].fp32[n]
 /// 	ENDFOR
 ///	write_row_and_zero(srcdst, m, tmp, srcdst.colsb)
 ///
 /// ENDFOR
 ///
 /// zero_upper_rows(srcdst, srcdst.rows)
 /// zero_tileconfig_start()
 /// \endcode
 #define _tile_mmultf32ps(srcdst, a, b)                                         \
  __builtin_ia32_tmmultf32ps((srcdst), (a), (b))
 static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TF32
 _tile_mmultf32ps_internal(unsigned short m, unsigned short n, unsigned short k,
                          _tile1024i dst, _tile1024i src1, _tile1024i src2) {
  return __builtin_ia32_tmmultf32ps_internal(m, n, k, dst, src1, src2);
 }
 /// Do Matrix Multiplication of src0 and src1, and then do Matrix Plus with dst.
 /// All the calculation is base on float32 but with the lower 13-bit set to 0.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TMMULTF32PS </c> instruction.
 ///
 /// \param dst
 ///    The destination tile. Max size is 1024 Bytes.
 /// \param src0
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param src1
 ///    The 2nd source tile. Max size is 1024 Bytes.
 __DEFAULT_FN_ATTRS_TF32
 static void __tile_mmultf32ps(__tile1024i *dst, __tile1024i src0,
                              __tile1024i src1) {
  dst->tile = _tile_mmultf32ps_internal(src0.row, src1.col, src0.col, dst->tile,
                                        src0.tile, src1.tile);
 }
 #endif // __x86_64__
 #endif // __AMX_TF32INTRIN_H
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/amxtf32transposeintrin.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/amxtf32transposeintrin.h
@@ -1,105 +0,0 @@
 /*===--------- amxtf32transposeintrin.h - AMX-TF32 and AMX-TRANSPOSE --------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===------------------------------------------------------------------------===
 */
 #ifndef __IMMINTRIN_H
 #error                                                                         \
    "Never use <amxtf32transposeintrin.h> directly; include <immintrin.h> instead."
 #endif // __IMMINTRIN_H
 #ifndef __AMX_TF32TRANSPOSEINTRIN_H
 #define __AMX_TF32TRANSPOSEINTRIN_H
 #ifdef __x86_64__
 #define __DEFAULT_FN_ATTRS_TF32_TRANSPOSE                                      \
  __attribute__((__always_inline__, __nodebug__,                               \
                 __target__("amx-tf32,amx-transpose")))
 /// \code
 /// void _tile_tmmultf32ps(constexpr int srcdst, constexpr int a, \
 ///                        constexpr int b);
 /// \endcode
 ///
 /// This intrinsic corresponds to the <c> TTMMULTF32PS </c> instruction.
 ///
 /// \param srcdst
 /// 	The destination tile. Max size is 1024 Bytes.
 /// \param a
 /// 	The 1st source tile. Max size is 1024 Bytes.
 /// \param b
 /// 	The 2nd source tile. Max size is 1024 Bytes.
 ///
 /// \code{.operation}
 /// DEFINE zero_lower_mantissa_bits_fp32(x[31:0]) {
 /// 	dword[12:0] := 0
 /// 	dword[31:13] := x[31:13]
 /// 	return dword
 /// }
 ///
 /// DEFINE silence_snan_fp32(x[31:0]) {
 /// 	IF (x.exponent == 255 and x.fraction != 0 and x.fraction[22] == 0)
 /// 		x.fraction[22] := 1
 /// 	return x
 /// }
 ///
 /// elements_dest:= srcdst.colsb/4
 ///
 /// FOR m := 0 TO (srcdst.rows-1)
 /// 	tmp[511:0] := 0
 /// 	FOR k := 0 TO (a.rows-1)
 /// 		FOR n := 0 TO (elements_dest-1)
 /// 			a1e := silence_snan_fp32(a.row[k].fp32[m])
 /// 			a2e := silence_snan_fp32(b.row[k].fp32[n])
 /// 			s1e := zero_lower_mantissa_bits_fp32(a1e)
 /// 			s2e := zero_lower_mantissa_bits_fp32(a2e)
 /// 			tmp.fp32[n] += s1e * s2e
 /// 		ENDFOR
 /// 	ENDFOR
 ///
 /// 	FOR n := 0 TO (elements_dest-1)
 /// 		tmp.fp32[n] += srcdst.row[m].fp32[n]
 /// 	ENDFOR
 ///	write_row_and_zero(srcdst, m, tmp, srcdst.colsb)
 ///
 /// ENDFOR
 ///
 /// zero_upper_rows(srcdst, srcdst.rows)
 /// zero_tileconfig_start()
 /// \endcode
 #define _tile_tmmultf32ps(srcdst, a, b)                                        \
  __builtin_ia32_ttmmultf32ps((srcdst), (a), (b))
 // dst = m x n (srcdest), src1 = k x m, src2 = k x n
 static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TF32_TRANSPOSE
 _tile_tmmultf32ps_internal(unsigned short m, unsigned short n, unsigned short k,
                           _tile1024i dst, _tile1024i src1, _tile1024i src2) {
  return __builtin_ia32_ttmmultf32ps_internal(m, n, k, dst, src1, src2);
 }
 /// Compute transpose and do Matrix Multiplication of src0 and src1, and then do
 /// Matrix Plus with dst. All the calculation is base on float32 but with the
 /// lower 13-bit set to 0.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TTMMULTF32PS </c> instruction.
 ///
 /// \param dst
 ///    The destination tile. Max size is 1024 Bytes.
 /// \param src0
 ///    The 1st source tile. Max size is 1024 Bytes.
 /// \param src1
 ///    The 2nd source tile. Max size is 1024 Bytes.
 __DEFAULT_FN_ATTRS_TF32_TRANSPOSE
 static void __tile_tmmultf32ps(__tile1024i *dst, __tile1024i src0,
                               __tile1024i src1) {
  dst->tile = _tile_tmmultf32ps_internal(src0.row, src1.col, src0.col,
                                         dst->tile, src0.tile, src1.tile);
 }
 #endif // __x86_64__
 #endif // __AMX_TF32TRANSPOSEINTRIN_H
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/amxtransposeintrin.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/amxtransposeintrin.h
@@ -1,248 +0,0 @@
 /* ===--- amxtransposeintrin.h - AMX_TRANSPOSE intrinsics -*- C++ -*---------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 * ===-----------------------------------------------------------------------===
 */
 #ifndef __IMMINTRIN_H
 #error "Never use <amxtransposeintrin.h> directly; use <immintrin.h> instead."
 #endif /* __IMMINTRIN_H */
 #ifndef __AMX_TRANSPOSEINTRIN_H
 #define __AMX_TRANSPOSEINTRIN_H
 #ifdef __x86_64__
 #define __DEFAULT_FN_ATTRS_TRANSPOSE                                           \
  __attribute__((__always_inline__, __nodebug__, __target__("amx-transpose")))
 #define _tile_2rpntlvwz0(tdst, base, stride)                                   \
  __builtin_ia32_t2rpntlvwz0(tdst, base, stride)
 #define _tile_2rpntlvwz0t1(tdst, base, stride)                                 \
  __builtin_ia32_t2rpntlvwz0t1(tdst, base, stride)
 #define _tile_2rpntlvwz1(tdst, base, stride)                                   \
  __builtin_ia32_t2rpntlvwz1(tdst, base, stride)
 #define _tile_2rpntlvwz1t1(tdst, base, stride)                                 \
  __builtin_ia32_t2rpntlvwz1t1(tdst, base, stride)
 /// Transpose 32-bit elements from \a src and write the result to \a dst.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// \code
 /// void _tile_transposed(__tile dst, __tile src);
 /// \endcode
 ///
 /// This intrinsic corresponds to the <c> TTRANSPOSED </c> instruction.
 ///
 /// \param dst
 /// 	The destination tile. Max size is 1024 Bytes.
 /// \param src
 /// 	The source tile. Max size is 1024 Bytes.
 ///
 /// \code{.operation}
 ///
 /// FOR i := 0 TO (dst.rows-1)
 /// 	tmp[511:0] := 0
 /// 	FOR j := 0 TO (dst.colsb/4-1)
 /// 		tmp.dword[j] := src.row[j].dword[i]
 /// 	ENDFOR
 /// 	dst.row[i] := tmp
 /// ENDFOR
 ///
 /// zero_upper_rows(dst, dst.rows)
 /// zero_tileconfig_start()
 /// \endcode
 #define _tile_transposed(dst, src) __builtin_ia32_ttransposed(dst, src)
 static __inline__ void __DEFAULT_FN_ATTRS_TRANSPOSE _tile_2rpntlvwz0_internal(
    unsigned short row, unsigned short col0, unsigned short col1,
    _tile1024i *dst0, _tile1024i *dst1, const void *base,
    __SIZE_TYPE__ stride) {
  // Use __tile1024i_1024a* to escape the alignment check in
  // clang/test/Headers/x86-intrinsics-headers-clean.cpp
  __builtin_ia32_t2rpntlvwz0_internal(row, col0, col1, (_tile1024i_1024a *)dst0,
                                      (_tile1024i_1024a *)dst1, base,
                                      (__SIZE_TYPE__)(stride));
 }
 static __inline__ void __DEFAULT_FN_ATTRS_TRANSPOSE _tile_2rpntlvwz0t1_internal(
    unsigned short row, unsigned short col0, unsigned short col1,
    _tile1024i *dst0, _tile1024i *dst1, const void *base,
    __SIZE_TYPE__ stride) {
  __builtin_ia32_t2rpntlvwz0t1_internal(
      row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
      (__SIZE_TYPE__)(stride));
 }
 static __inline__ void __DEFAULT_FN_ATTRS_TRANSPOSE _tile_2rpntlvwz1_internal(
    unsigned short row, unsigned short col0, unsigned short col1,
    _tile1024i *dst0, _tile1024i *dst1, const void *base,
    __SIZE_TYPE__ stride) {
  __builtin_ia32_t2rpntlvwz1_internal(row, col0, col1, (_tile1024i_1024a *)dst0,
                                      (_tile1024i_1024a *)dst1, base,
                                      (__SIZE_TYPE__)(stride));
 }
 static __inline__ void __DEFAULT_FN_ATTRS_TRANSPOSE _tile_2rpntlvwz1t1_internal(
    unsigned short row, unsigned short col0, unsigned short col1,
    _tile1024i *dst0, _tile1024i *dst1, const void *base,
    __SIZE_TYPE__ stride) {
  __builtin_ia32_t2rpntlvwz1t1_internal(
      row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
      (__SIZE_TYPE__)(stride));
 }
 // This is internal intrinsic. C/C++ user should avoid calling it directly.
 static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TRANSPOSE
 _tile_transposed_internal(unsigned short m, unsigned short n, _tile1024i src) {
  return __builtin_ia32_ttransposed_internal(m, n, src);
 }
 /// Converts a pair of tiles from memory into VNNI format, and places the
 /// results in a pair of destinations specified by dst. The pair of tiles
 /// in memory is specified via a tsib; the second tile is after the first
 /// one, separated by the same stride that separates each row.
 /// The tile configuration for the destination tiles indicates the amount
 /// of data to read from memory. The instruction will load a number of rows
 /// that is equal to twice the number of rows in tmm1. The size of each row
 /// is equal to the average width of the destination tiles. If the second
 /// tile is configured with zero rows and columns, only the first tile will
 /// be written.
 /// Provides a hint to the implementation that the data will likely not be
 /// reused in the near future and the data caching can be optimized.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> T2RPNTLVWZ0 </c> instruction.
 ///
 /// \param dst0
 ///    First tile of destination tile pair. Max size is 1024i*2 Bytes.
 /// \param dst1
 ///    Second tile of destination tile pair. Max size is 1024i*2 Bytes.
 /// \param base
 ///    A pointer to base address.
 /// \param stride
 ///    The stride between the rows' data to be loaded in memory.
 __DEFAULT_FN_ATTRS_TRANSPOSE
 static void __tile_2rpntlvwz0(__tile1024i *dst0, __tile1024i *dst1,
                              const void *base, __SIZE_TYPE__ stride) {
  _tile_2rpntlvwz0_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
                            &dst1->tile, base, stride);
 }
 /// Converts a pair of tiles from memory into VNNI format, and places the
 /// results in a pair of destinations specified by dst. The pair of tiles
 /// in memory is specified via a tsib; the second tile is after the first
 /// one, separated by the same stride that separates each row.
 /// The tile configuration for the destination tiles indicates the amount
 /// of data to read from memory. The instruction will load a number of rows
 /// that is equal to twice the number of rows in tmm1. The size of each row
 /// is equal to the average width of the destination tiles. If the second
 /// tile is configured with zero rows and columns, only the first tile will
 /// be written.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> T2RPNTLVWZ0T1 </c> instruction.
 ///
 /// \param dst0
 ///    First tile of destination tile pair. Max size is 1024i*2 Bytes.
 /// \param dst1
 ///    Second tile of destination tile pair. Max size is 1024i*2 Bytes.
 /// \param base
 ///    A pointer to base address.
 /// \param stride
 ///    The stride between the rows' data to be loaded in memory.
 __DEFAULT_FN_ATTRS_TRANSPOSE
 static void __tile_2rpntlvwz0t1(__tile1024i *dst0, __tile1024i *dst1,
                                const void *base, __SIZE_TYPE__ stride) {
  _tile_2rpntlvwz0t1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
                              &dst1->tile, base, stride);
 }
 /// Converts a pair of tiles from memory into VNNI format, and places the
 /// results in a pair of destinations specified by dst. The pair of tiles
 /// in memory is specified via a tsib; the second tile is after the first
 /// one, separated by the same stride that separates each row.
 /// The tile configuration for the destination tiles indicates the amount
 /// of data to read from memory. The instruction will load a number of rows
 /// that is equal to twice the number of rows in tmm1. The size of each row
 /// is equal to the average width of the destination tiles. If the second
 /// tile is configured with zero rows and columns, only the first tile will
 /// be written. The last row will be not be read from memory but instead
 /// filled with zeros.
 /// Provides a hint to the implementation that the data will likely not be
 /// reused in the near future and the data caching can be optimized.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> T2RPNTLVWZ1 </c> instruction.
 ///
 /// \param dst0
 ///    First tile of destination tile pair. Max size is 1024i*2 Bytes.
 /// \param dst1
 ///    Second tile of destination tile pair. Max size is 1024i*2 Bytes.
 /// \param base
 ///    A pointer to base address.
 /// \param stride
 ///    The stride between the rows' data to be loaded in memory.
 __DEFAULT_FN_ATTRS_TRANSPOSE
 static void __tile_2rpntlvwz1(__tile1024i *dst0, __tile1024i *dst1,
                              const void *base, __SIZE_TYPE__ stride) {
  _tile_2rpntlvwz1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
                            &dst1->tile, base, stride);
 }
 /// Converts a pair of tiles from memory into VNNI format, and places the
 /// results in a pair of destinations specified by dst. The pair of tiles
 /// in memory is specified via a tsib; the second tile is after the first
 /// one, separated by the same stride that separates each row.
 /// The tile configuration for the destination tiles indicates the amount
 /// of data to read from memory. The instruction will load a number of rows
 /// that is equal to twice the number of rows in tmm1. The size of each row
 /// is equal to the average width of the destination tiles. If the second
 /// tile is configured with zero rows and columns, only the first tile will
 /// be written. The last row will be not be read from memory but instead
 /// filled with zeros.
 /// Provides a hint to the implementation that the data will likely not be
 /// reused in the near future and the data caching can be optimized.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> T2RPNTLVWZ1T1 </c> instruction.
 ///
 /// \param dst0
 ///    First tile of destination tile pair. Max size is 1024i*2 Bytes.
 /// \param dst1
 ///    Second tile of destination tile pair. Max size is 1024i*2 Bytes.
 /// \param base
 ///    A pointer to base address.
 /// \param stride
 ///    The stride between the rows' data to be loaded in memory.
 __DEFAULT_FN_ATTRS_TRANSPOSE
 static void __tile_2rpntlvwz1t1(__tile1024i *dst0, __tile1024i *dst1,
                                const void *base, __SIZE_TYPE__ stride) {
  _tile_2rpntlvwz1t1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
                              &dst1->tile, base, stride);
 }
 /// Transpose 32-bit elements from src and write the result to dst.
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> TTRANSPOSED </c> instruction.
 ///
 /// \param dst
 ///    The destination tile. Max size is 1024 Bytes.
 /// \param src
 ///    The source tile. Max size is 1024 Bytes.
 __DEFAULT_FN_ATTRS_TRANSPOSE
 static void __tile_transposed(__tile1024i *dst, __tile1024i src) {
  dst->tile = _tile_transposed_internal(dst->row, dst->col, src.tile);
 }
 #endif /* __x86_64__ */
 #endif /* __AMX_TRANSPOSEINTRIN_H */
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/andes_vector.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/andes_vector.h
@@ -1,16 +0,0 @@
 //===----- andes_vector.h - Andes Vector definitions ----------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 #ifndef _ANDES_VECTOR_H_
 #define _ANDES_VECTOR_H_
 #include "riscv_vector.h"
 #pragma clang riscv intrinsic andes_vector
 #endif //_ANDES_VECTOR_H_
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/arm64intr.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/arm64intr.h
@@ -1,35 +0,0 @@
 /*===---- arm64intr.h - ARM64 Windows intrinsics -------------------------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 /* Only include this if we're compiling for the windows platform. */
 #ifndef _MSC_VER
 #include_next <arm64intr.h>
 #else
 #ifndef __ARM64INTR_H
 #define __ARM64INTR_H
 typedef enum
 {
  _ARM64_BARRIER_SY    = 0xF,
  _ARM64_BARRIER_ST    = 0xE,
  _ARM64_BARRIER_LD    = 0xD,
  _ARM64_BARRIER_ISH   = 0xB,
  _ARM64_BARRIER_ISHST = 0xA,
  _ARM64_BARRIER_ISHLD = 0x9,
  _ARM64_BARRIER_NSH   = 0x7,
  _ARM64_BARRIER_NSHST = 0x6,
  _ARM64_BARRIER_NSHLD = 0x5,
  _ARM64_BARRIER_OSH   = 0x3,
  _ARM64_BARRIER_OSHST = 0x2,
  _ARM64_BARRIER_OSHLD = 0x1
 } _ARM64INTR_BARRIER_TYPE;
 #endif /* __ARM64INTR_H */
 #endif /* _MSC_VER */
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/arm_acle.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/arm_acle.h
@@ -1,855 +0,0 @@
 /*===---- arm_acle.h - ARM Non-Neon intrinsics -----------------------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 * The Arm C Language Extensions specifications can be found in the following
 * link: https://github.com/ARM-software/acle/releases
 *
 * The ACLE section numbers are subject to change. When consulting the
 * specifications, it is recommended to search using section titles if
 * the section numbers look outdated.
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef __ARM_ACLE_H
 #define __ARM_ACLE_H
 #ifndef __ARM_ACLE
 #error "ACLE intrinsics support not enabled."
 #endif
 #include <stdint.h>
 #if defined(__cplusplus)
 extern "C" {
 #endif
 /* 7 SYNCHRONIZATION, BARRIER AND HINT INTRINSICS */
 /* 7.3 Memory barriers */
 void __dmb(unsigned int);
 void __dsb(unsigned int);
 void __isb(unsigned int);
 /* 7.4 Hints */
 void __wfi(void);
 void __wfe(void);
 void __sev(void);
 void __sevl(void);
 void __yield(void);
 #if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE
 #define __dbg(t) __builtin_arm_dbg(t)
 #endif
 #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
 #define _CHKFEAT_GCS 1
 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
 __chkfeat(uint64_t __features) {
  return __builtin_arm_chkfeat(__features) ^ __features;
 }
 #endif
 /* 7.5 Swap */
 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
 __swp(uint32_t __x, volatile uint32_t *__p) {
  uint32_t v;
  do
    v = __builtin_arm_ldrex(__p);
  while (__builtin_arm_strex(__x, __p));
  return v;
 }
 /* 7.6 Memory prefetch intrinsics */
 /* 7.6.1 Data prefetch */
 #define __pld(addr) __pldx(0, 0, 0, addr)
 #if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE
 #define __pldx(access_kind, cache_level, retention_policy, addr) \
  __builtin_arm_prefetch(addr, access_kind, 1)
 #else
 #define __pldx(access_kind, cache_level, retention_policy, addr) \
  __builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1)
 #endif
 /* 7.6.2 Instruction prefetch */
 #define __pli(addr) __plix(0, 0, addr)
 #if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE
 #define __plix(cache_level, retention_policy, addr) \
  __builtin_arm_prefetch(addr, 0, 0)
 #else
 #define __plix(cache_level, retention_policy, addr) \
  __builtin_arm_prefetch(addr, 0, cache_level, retention_policy, 0)
 #endif
 /* 7.7 NOP */
 #if !defined(_MSC_VER) || (!defined(__aarch64__) && !defined(__arm64ec__))
 static __inline__ void __attribute__((__always_inline__, __nodebug__)) __nop(void) {
  __builtin_arm_nop();
 }
 #endif
 /* 8 DATA-PROCESSING INTRINSICS */
 /* 8.2 Miscellaneous data-processing intrinsics */
 /* ROR */
 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
 __ror(uint32_t __x, uint32_t __y) {
  __y %= 32;
  if (__y == 0)
    return __x;
  return (__x >> __y) | (__x << (32 - __y));
 }
 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
 __rorll(uint64_t __x, uint32_t __y) {
  __y %= 64;
  if (__y == 0)
    return __x;
  return (__x >> __y) | (__x << (64 - __y));
 }
 static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
 __rorl(unsigned long __x, uint32_t __y) {
 #if __SIZEOF_LONG__ == 4
  return __ror(__x, __y);
 #else
  return __rorll(__x, __y);
 #endif
 }
 /* CLZ */
 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
 __clz(uint32_t __t) {
  return __builtin_arm_clz(__t);
 }
 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
 __clzl(unsigned long __t) {
 #if __SIZEOF_LONG__ == 4
  return __builtin_arm_clz(__t);
 #else
  return __builtin_arm_clz64(__t);
 #endif
 }
 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
 __clzll(uint64_t __t) {
  return __builtin_arm_clz64(__t);
 }
 /* CLS */
 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
 __cls(uint32_t __t) {
  return __builtin_arm_cls(__t);
 }
 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
 __clsl(unsigned long __t) {
 #if __SIZEOF_LONG__ == 4
  return __builtin_arm_cls(__t);
 #else
  return __builtin_arm_cls64(__t);
 #endif
 }
 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
 __clsll(uint64_t __t) {
  return __builtin_arm_cls64(__t);
 }
 /* REV */
 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
 __rev(uint32_t __t) {
  return __builtin_bswap32(__t);
 }
 static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
 __revl(unsigned long __t) {
 #if __SIZEOF_LONG__ == 4
  return __builtin_bswap32(__t);
 #else
  return __builtin_bswap64(__t);
 #endif
 }
 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
 __revll(uint64_t __t) {
  return __builtin_bswap64(__t);
 }
 /* REV16 */
 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
 __rev16(uint32_t __t) {
  return __ror(__rev(__t), 16);
 }
 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
 __rev16ll(uint64_t __t) {
  return (((uint64_t)__rev16(__t >> 32)) << 32) | (uint64_t)__rev16((uint32_t)__t);
 }
 static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
 __rev16l(unsigned long __t) {
 #if __SIZEOF_LONG__ == 4
    return __rev16(__t);
 #else
    return __rev16ll(__t);
 #endif
 }
 /* REVSH */
 static __inline__ int16_t __attribute__((__always_inline__, __nodebug__))
 __revsh(int16_t __t) {
  return (int16_t)__builtin_bswap16((uint16_t)__t);
 }
 /* RBIT */
 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
 __rbit(uint32_t __t) {
  return __builtin_arm_rbit(__t);
 }
 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
 __rbitll(uint64_t __t) {
 #if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE
  return (((uint64_t)__builtin_arm_rbit(__t)) << 32) |
         __builtin_arm_rbit(__t >> 32);
 #else
  return __builtin_arm_rbit64(__t);
 #endif
 }
 static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
 __rbitl(unsigned long __t) {
 #if __SIZEOF_LONG__ == 4
  return __rbit(__t);
 #else
  return __rbitll(__t);
 #endif
 }
 /* 8.3 16-bit multiplications */
 #if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE
 static __inline__ int32_t __attribute__((__always_inline__,__nodebug__, target("dsp")))
 __smulbb(int32_t __a, int32_t __b) {
  return __builtin_arm_smulbb(__a, __b);
 }
 static __inline__ int32_t __attribute__((__always_inline__,__nodebug__, target("dsp")))
 __smulbt(int32_t __a, int32_t __b) {
  return __builtin_arm_smulbt(__a, __b);
 }
 static __inline__ int32_t __attribute__((__always_inline__,__nodebug__, target("dsp")))
 __smultb(int32_t __a, int32_t __b) {
  return __builtin_arm_smultb(__a, __b);
 }
 static __inline__ int32_t __attribute__((__always_inline__,__nodebug__, target("dsp")))
 __smultt(int32_t __a, int32_t __b) {
  return __builtin_arm_smultt(__a, __b);
 }
 static __inline__ int32_t __attribute__((__always_inline__,__nodebug__, target("dsp")))
 __smulwb(int32_t __a, int32_t __b) {
  return __builtin_arm_smulwb(__a, __b);
 }
 static __inline__ int32_t __attribute__((__always_inline__,__nodebug__, target("dsp")))
 __smulwt(int32_t __a, int32_t __b) {
  return __builtin_arm_smulwt(__a, __b);
 }
 #endif
 /*
 * 8.4 Saturating intrinsics
 *
 * FIXME: Change guard to their corresponding __ARM_FEATURE flag when Q flag
 * intrinsics are implemented and the flag is enabled.
 */
 /* 8.4.1 Width-specified saturation intrinsics */
 #if defined(__ARM_FEATURE_SAT) && __ARM_FEATURE_SAT
 #define __ssat(x, y) __builtin_arm_ssat(x, y)
 #define __usat(x, y) __builtin_arm_usat(x, y)
 #endif
 /* 8.4.2 Saturating addition and subtraction intrinsics */
 #if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE
 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp")))
 __qadd(int32_t __t, int32_t __v) {
  return __builtin_arm_qadd(__t, __v);
 }
 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp")))
 __qsub(int32_t __t, int32_t __v) {
  return __builtin_arm_qsub(__t, __v);
 }
 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp")))
 __qdbl(int32_t __t) {
  return __builtin_arm_qadd(__t, __t);
 }
 #endif
 /* 8.4.3 Accumulating multiplications */
 #if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE
 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp")))
 __smlabb(int32_t __a, int32_t __b, int32_t __c) {
  return __builtin_arm_smlabb(__a, __b, __c);
 }
 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp")))
 __smlabt(int32_t __a, int32_t __b, int32_t __c) {
  return __builtin_arm_smlabt(__a, __b, __c);
 }
 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp")))
 __smlatb(int32_t __a, int32_t __b, int32_t __c) {
  return __builtin_arm_smlatb(__a, __b, __c);
 }
 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp")))
 __smlatt(int32_t __a, int32_t __b, int32_t __c) {
  return __builtin_arm_smlatt(__a, __b, __c);
 }
 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp")))
 __smlawb(int32_t __a, int32_t __b, int32_t __c) {
  return __builtin_arm_smlawb(__a, __b, __c);
 }
 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp")))
 __smlawt(int32_t __a, int32_t __b, int32_t __c) {
  return __builtin_arm_smlawt(__a, __b, __c);
 }
 #endif
 /* 8.5.4 Parallel 16-bit saturation */
 #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
 #define __ssat16(x, y) __builtin_arm_ssat16(x, y)
 #define __usat16(x, y) __builtin_arm_usat16(x, y)
 #endif
 /* 8.5.5 Packing and unpacking */
 #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
 typedef int32_t int8x4_t;
 typedef int32_t int16x2_t;
 typedef uint32_t uint8x4_t;
 typedef uint32_t uint16x2_t;
 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
 __sxtab16(int16x2_t __a, int8x4_t __b) {
  return __builtin_arm_sxtab16(__a, __b);
 }
 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
 __sxtb16(int8x4_t __a) {
  return __builtin_arm_sxtb16(__a);
 }
 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
 __uxtab16(int16x2_t __a, int8x4_t __b) {
  return __builtin_arm_uxtab16(__a, __b);
 }
 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
 __uxtb16(int8x4_t __a) {
  return __builtin_arm_uxtb16(__a);
 }
 #endif
 /* 8.5.6 Parallel selection */
 #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
 static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
 __sel(uint8x4_t __a, uint8x4_t __b) {
  return __builtin_arm_sel(__a, __b);
 }
 #endif
 /* 8.5.7 Parallel 8-bit addition and subtraction */
 #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
 static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
 __qadd8(int8x4_t __a, int8x4_t __b) {
  return __builtin_arm_qadd8(__a, __b);
 }
 static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
 __qsub8(int8x4_t __a, int8x4_t __b) {
  return __builtin_arm_qsub8(__a, __b);
 }
 static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
 __sadd8(int8x4_t __a, int8x4_t __b) {
  return __builtin_arm_sadd8(__a, __b);
 }
 static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
 __shadd8(int8x4_t __a, int8x4_t __b) {
  return __builtin_arm_shadd8(__a, __b);
 }
 static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
 __shsub8(int8x4_t __a, int8x4_t __b) {
  return __builtin_arm_shsub8(__a, __b);
 }
 static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
 __ssub8(int8x4_t __a, int8x4_t __b) {
  return __builtin_arm_ssub8(__a, __b);
 }
 static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
 __uadd8(uint8x4_t __a, uint8x4_t __b) {
  return __builtin_arm_uadd8(__a, __b);
 }
 static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
 __uhadd8(uint8x4_t __a, uint8x4_t __b) {
  return __builtin_arm_uhadd8(__a, __b);
 }
 static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
 __uhsub8(uint8x4_t __a, uint8x4_t __b) {
  return __builtin_arm_uhsub8(__a, __b);
 }
 static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
 __uqadd8(uint8x4_t __a, uint8x4_t __b) {
  return __builtin_arm_uqadd8(__a, __b);
 }
 static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
 __uqsub8(uint8x4_t __a, uint8x4_t __b) {
  return __builtin_arm_uqsub8(__a, __b);
 }
 static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
 __usub8(uint8x4_t __a, uint8x4_t __b) {
  return __builtin_arm_usub8(__a, __b);
 }
 #endif
 /* 8.5.8 Sum of 8-bit absolute differences */
 #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
 __usad8(uint8x4_t __a, uint8x4_t __b) {
  return __builtin_arm_usad8(__a, __b);
 }
 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
 __usada8(uint8x4_t __a, uint8x4_t __b, uint32_t __c) {
  return __builtin_arm_usada8(__a, __b, __c);
 }
 #endif
 /* 8.5.9 Parallel 16-bit addition and subtraction */
 #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
 __qadd16(int16x2_t __a, int16x2_t __b) {
  return __builtin_arm_qadd16(__a, __b);
 }
 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
 __qasx(int16x2_t __a, int16x2_t __b) {
  return __builtin_arm_qasx(__a, __b);
 }
 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
 __qsax(int16x2_t __a, int16x2_t __b) {
  return __builtin_arm_qsax(__a, __b);
 }
 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
 __qsub16(int16x2_t __a, int16x2_t __b) {
  return __builtin_arm_qsub16(__a, __b);
 }
 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
 __sadd16(int16x2_t __a, int16x2_t __b) {
  return __builtin_arm_sadd16(__a, __b);
 }
 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
 __sasx(int16x2_t __a, int16x2_t __b) {
  return __builtin_arm_sasx(__a, __b);
 }
 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
 __shadd16(int16x2_t __a, int16x2_t __b) {
  return __builtin_arm_shadd16(__a, __b);
 }
 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
 __shasx(int16x2_t __a, int16x2_t __b) {
  return __builtin_arm_shasx(__a, __b);
 }
 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
 __shsax(int16x2_t __a, int16x2_t __b) {
  return __builtin_arm_shsax(__a, __b);
 }
 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
 __shsub16(int16x2_t __a, int16x2_t __b) {
  return __builtin_arm_shsub16(__a, __b);
 }
 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
 __ssax(int16x2_t __a, int16x2_t __b) {
  return __builtin_arm_ssax(__a, __b);
 }
 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
 __ssub16(int16x2_t __a, int16x2_t __b) {
  return __builtin_arm_ssub16(__a, __b);
 }
 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
 __uadd16(uint16x2_t __a, uint16x2_t __b) {
  return __builtin_arm_uadd16(__a, __b);
 }
 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
 __uasx(uint16x2_t __a, uint16x2_t __b) {
  return __builtin_arm_uasx(__a, __b);
 }
 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
 __uhadd16(uint16x2_t __a, uint16x2_t __b) {
  return __builtin_arm_uhadd16(__a, __b);
 }
 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
 __uhasx(uint16x2_t __a, uint16x2_t __b) {
  return __builtin_arm_uhasx(__a, __b);
 }
 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
 __uhsax(uint16x2_t __a, uint16x2_t __b) {
  return __builtin_arm_uhsax(__a, __b);
 }
 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
 __uhsub16(uint16x2_t __a, uint16x2_t __b) {
  return __builtin_arm_uhsub16(__a, __b);
 }
 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
 __uqadd16(uint16x2_t __a, uint16x2_t __b) {
  return __builtin_arm_uqadd16(__a, __b);
 }
 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
 __uqasx(uint16x2_t __a, uint16x2_t __b) {
  return __builtin_arm_uqasx(__a, __b);
 }
 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
 __uqsax(uint16x2_t __a, uint16x2_t __b) {
  return __builtin_arm_uqsax(__a, __b);
 }
 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
 __uqsub16(uint16x2_t __a, uint16x2_t __b) {
  return __builtin_arm_uqsub16(__a, __b);
 }
 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
 __usax(uint16x2_t __a, uint16x2_t __b) {
  return __builtin_arm_usax(__a, __b);
 }
 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
 __usub16(uint16x2_t __a, uint16x2_t __b) {
  return __builtin_arm_usub16(__a, __b);
 }
 #endif
 /* 8.5.10 Parallel 16-bit multiplication */
 #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
 __smlad(int16x2_t __a, int16x2_t __b, int32_t __c) {
  return __builtin_arm_smlad(__a, __b, __c);
 }
 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
 __smladx(int16x2_t __a, int16x2_t __b, int32_t __c) {
  return __builtin_arm_smladx(__a, __b, __c);
 }
 static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
 __smlald(int16x2_t __a, int16x2_t __b, int64_t __c) {
  return __builtin_arm_smlald(__a, __b, __c);
 }
 static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
 __smlaldx(int16x2_t __a, int16x2_t __b, int64_t __c) {
  return __builtin_arm_smlaldx(__a, __b, __c);
 }
 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
 __smlsd(int16x2_t __a, int16x2_t __b, int32_t __c) {
  return __builtin_arm_smlsd(__a, __b, __c);
 }
 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
 __smlsdx(int16x2_t __a, int16x2_t __b, int32_t __c) {
  return __builtin_arm_smlsdx(__a, __b, __c);
 }
 static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
 __smlsld(int16x2_t __a, int16x2_t __b, int64_t __c) {
  return __builtin_arm_smlsld(__a, __b, __c);
 }
 static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
 __smlsldx(int16x2_t __a, int16x2_t __b, int64_t __c) {
  return __builtin_arm_smlsldx(__a, __b, __c);
 }
 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
 __smuad(int16x2_t __a, int16x2_t __b) {
  return __builtin_arm_smuad(__a, __b);
 }
 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
 __smuadx(int16x2_t __a, int16x2_t __b) {
  return __builtin_arm_smuadx(__a, __b);
 }
 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
 __smusd(int16x2_t __a, int16x2_t __b) {
  return __builtin_arm_smusd(__a, __b);
 }
 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
 __smusdx(int16x2_t __a, int16x2_t __b) {
  return __builtin_arm_smusdx(__a, __b);
 }
 #endif
 /* 8.6 Floating-point data-processing intrinsics */
 #if (defined(__ARM_FEATURE_DIRECTED_ROUNDING)    &&                         \
  (__ARM_FEATURE_DIRECTED_ROUNDING))             &&                         \
  (defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE)
 static __inline__ double __attribute__((__always_inline__, __nodebug__))
 __rintn(double __a) {
  return __builtin_roundeven(__a);
 }
 static __inline__ float __attribute__((__always_inline__, __nodebug__))
 __rintnf(float __a) {
  return __builtin_roundevenf(__a);
 }
 #endif
 /* 8.8 CRC32 intrinsics */
 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
 __crc32b(uint32_t __a, uint8_t __b) {
  return __builtin_arm_crc32b(__a, __b);
 }
 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
 __crc32h(uint32_t __a, uint16_t __b) {
  return __builtin_arm_crc32h(__a, __b);
 }
 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
 __crc32w(uint32_t __a, uint32_t __b) {
  return __builtin_arm_crc32w(__a, __b);
 }
 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
 __crc32d(uint32_t __a, uint64_t __b) {
  return __builtin_arm_crc32d(__a, __b);
 }
 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
 __crc32cb(uint32_t __a, uint8_t __b) {
  return __builtin_arm_crc32cb(__a, __b);
 }
 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
 __crc32ch(uint32_t __a, uint16_t __b) {
  return __builtin_arm_crc32ch(__a, __b);
 }
 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
 __crc32cw(uint32_t __a, uint32_t __b) {
  return __builtin_arm_crc32cw(__a, __b);
 }
 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
 __crc32cd(uint32_t __a, uint64_t __b) {
  return __builtin_arm_crc32cd(__a, __b);
 }
 /* 8.6 Floating-point data-processing intrinsics */
 /* Armv8.3-A Javascript conversion intrinsic */
 #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("v8.3a")))
 __jcvt(double __a) {
  return __builtin_arm_jcvt(__a);
 }
 #endif
 /* Armv8.5-A FP rounding intrinsics */
 #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
 static __inline__ float __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
 __rint32zf(float __a) {
  return __builtin_arm_rint32zf(__a);
 }
 static __inline__ double __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
 __rint32z(double __a) {
  return __builtin_arm_rint32z(__a);
 }
 static __inline__ float __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
 __rint64zf(float __a) {
  return __builtin_arm_rint64zf(__a);
 }
 static __inline__ double __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
 __rint64z(double __a) {
  return __builtin_arm_rint64z(__a);
 }
 static __inline__ float __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
 __rint32xf(float __a) {
  return __builtin_arm_rint32xf(__a);
 }
 static __inline__ double __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
 __rint32x(double __a) {
  return __builtin_arm_rint32x(__a);
 }
 static __inline__ float __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
 __rint64xf(float __a) {
  return __builtin_arm_rint64xf(__a);
 }
 static __inline__ double __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
 __rint64x(double __a) {
  return __builtin_arm_rint64x(__a);
 }
 #endif
 /* 8.9 Armv8.7-A load/store 64-byte intrinsics */
 #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
 typedef struct {
    uint64_t val[8];
 } data512_t;
 static __inline__ data512_t __attribute__((__always_inline__, __nodebug__, target("ls64")))
 __arm_ld64b(const void *__addr) {
  data512_t __value;
  __builtin_arm_ld64b(__addr, __value.val);
  return __value;
 }
 static __inline__ void __attribute__((__always_inline__, __nodebug__, target("ls64")))
 __arm_st64b(void *__addr, data512_t __value) {
  __builtin_arm_st64b(__addr, __value.val);
 }
 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__, target("ls64")))
 __arm_st64bv(void *__addr, data512_t __value) {
  return __builtin_arm_st64bv(__addr, __value.val);
 }
 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__, target("ls64")))
 __arm_st64bv0(void *__addr, data512_t __value) {
  return __builtin_arm_st64bv0(__addr, __value.val);
 }
 #endif
 /* 11.1 Special register intrinsics */
 #define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg)
 #define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg)
 #define __arm_rsr128(sysreg) __builtin_arm_rsr128(sysreg)
 #define __arm_rsrp(sysreg) __builtin_arm_rsrp(sysreg)
 #define __arm_rsrf(sysreg) __builtin_bit_cast(float, __arm_rsr(sysreg))
 #define __arm_rsrf64(sysreg) __builtin_bit_cast(double, __arm_rsr64(sysreg))
 #define __arm_wsr(sysreg, v) __builtin_arm_wsr(sysreg, v)
 #define __arm_wsr64(sysreg, v) __builtin_arm_wsr64(sysreg, v)
 #define __arm_wsr128(sysreg, v) __builtin_arm_wsr128(sysreg, v)
 #define __arm_wsrp(sysreg, v) __builtin_arm_wsrp(sysreg, v)
 #define __arm_wsrf(sysreg, v) __arm_wsr(sysreg, __builtin_bit_cast(uint32_t, v))
 #define __arm_wsrf64(sysreg, v) __arm_wsr64(sysreg, __builtin_bit_cast(uint64_t, v))
 /* 10.3 MTE intrinsics */
 #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
 #define __arm_mte_create_random_tag(__ptr, __mask)  __builtin_arm_irg(__ptr, __mask)
 #define __arm_mte_increment_tag(__ptr, __tag_offset)  __builtin_arm_addg(__ptr, __tag_offset)
 #define __arm_mte_exclude_tag(__ptr, __excluded)  __builtin_arm_gmi(__ptr, __excluded)
 #define __arm_mte_get_tag(__ptr) __builtin_arm_ldg(__ptr)
 #define __arm_mte_set_tag(__ptr) __builtin_arm_stg(__ptr)
 #define __arm_mte_ptrdiff(__ptra, __ptrb) __builtin_arm_subp(__ptra, __ptrb)
 /* 18 memcpy family of operations intrinsics - MOPS */
 #define __arm_mops_memset_tag(__tagged_address, __value, __size)    \
  __builtin_arm_mops_memset_tag(__tagged_address, __value, __size)
 #endif
 /* 11.3 Coprocessor Intrinsics */
 #if defined(__ARM_FEATURE_COPROC)
 #if (__ARM_FEATURE_COPROC & 0x1)
 #if (__ARM_ARCH < 8)
 #define __arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2)                           \
  __builtin_arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2)
 #endif /* __ARM_ARCH < 8 */
 #define __arm_ldc(coproc, CRd, p) __builtin_arm_ldc(coproc, CRd, p)
 #define __arm_stc(coproc, CRd, p) __builtin_arm_stc(coproc, CRd, p)
 #define __arm_mcr(coproc, opc1, value, CRn, CRm, opc2)                         \
  __builtin_arm_mcr(coproc, opc1, value, CRn, CRm, opc2)
 #define __arm_mrc(coproc, opc1, CRn, CRm, opc2)                                \
  __builtin_arm_mrc(coproc, opc1, CRn, CRm, opc2)
 #if (__ARM_ARCH != 4) && (__ARM_ARCH < 8)
 #define __arm_ldcl(coproc, CRd, p) __builtin_arm_ldcl(coproc, CRd, p)
 #define __arm_stcl(coproc, CRd, p) __builtin_arm_stcl(coproc, CRd, p)
 #endif /* (__ARM_ARCH != 4) && (__ARM_ARCH != 8) */
 #if (__ARM_ARCH_8M_MAIN__) || (__ARM_ARCH_8_1M_MAIN__)
 #define __arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2)                           \
  __builtin_arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2)
 #define __arm_ldcl(coproc, CRd, p) __builtin_arm_ldcl(coproc, CRd, p)
 #define __arm_stcl(coproc, CRd, p) __builtin_arm_stcl(coproc, CRd, p)
 #endif /* ___ARM_ARCH_8M_MAIN__ */
 #endif /* __ARM_FEATURE_COPROC & 0x1 */
 #if (__ARM_FEATURE_COPROC & 0x2)
 #define __arm_cdp2(coproc, opc1, CRd, CRn, CRm, opc2)                          \
  __builtin_arm_cdp2(coproc, opc1, CRd, CRn, CRm, opc2)
 #define __arm_ldc2(coproc, CRd, p) __builtin_arm_ldc2(coproc, CRd, p)
 #define __arm_stc2(coproc, CRd, p) __builtin_arm_stc2(coproc, CRd, p)
 #define __arm_ldc2l(coproc, CRd, p) __builtin_arm_ldc2l(coproc, CRd, p)
 #define __arm_stc2l(coproc, CRd, p) __builtin_arm_stc2l(coproc, CRd, p)
 #define __arm_mcr2(coproc, opc1, value, CRn, CRm, opc2)                        \
  __builtin_arm_mcr2(coproc, opc1, value, CRn, CRm, opc2)
 #define __arm_mrc2(coproc, opc1, CRn, CRm, opc2)                               \
  __builtin_arm_mrc2(coproc, opc1, CRn, CRm, opc2)
 #endif
 #if (__ARM_FEATURE_COPROC & 0x4)
 #define __arm_mcrr(coproc, opc1, value, CRm)                                   \
  __builtin_arm_mcrr(coproc, opc1, value, CRm)
 #define __arm_mrrc(coproc, opc1, CRm) __builtin_arm_mrrc(coproc, opc1, CRm)
 #endif
 #if (__ARM_FEATURE_COPROC & 0x8)
 #define __arm_mcrr2(coproc, opc1, value, CRm)                                  \
  __builtin_arm_mcrr2(coproc, opc1, value, CRm)
 #define __arm_mrrc2(coproc, opc1, CRm) __builtin_arm_mrrc2(coproc, opc1, CRm)
 #endif
 #endif // __ARM_FEATURE_COPROC
 /* 17 Transactional Memory Extension (TME) Intrinsics */
 #if defined(__ARM_FEATURE_TME) && __ARM_FEATURE_TME
 #define _TMFAILURE_REASON  0x00007fffu
 #define _TMFAILURE_RTRY    0x00008000u
 #define _TMFAILURE_CNCL    0x00010000u
 #define _TMFAILURE_MEM     0x00020000u
 #define _TMFAILURE_IMP     0x00040000u
 #define _TMFAILURE_ERR     0x00080000u
 #define _TMFAILURE_SIZE    0x00100000u
 #define _TMFAILURE_NEST    0x00200000u
 #define _TMFAILURE_DBG     0x00400000u
 #define _TMFAILURE_INT     0x00800000u
 #define _TMFAILURE_TRIVIAL 0x01000000u
 #define __tstart()        __builtin_arm_tstart()
 #define __tcommit()       __builtin_arm_tcommit()
 #define __tcancel(__arg)  __builtin_arm_tcancel(__arg)
 #define __ttest()         __builtin_arm_ttest()
 #endif /* __ARM_FEATURE_TME */
 /* 8.7 Armv8.5-A Random number generation intrinsics */
 #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
 static __inline__ int __attribute__((__always_inline__, __nodebug__, target("rand")))
 __rndr(uint64_t *__p) {
  return __builtin_arm_rndr(__p);
 }
 static __inline__ int __attribute__((__always_inline__, __nodebug__, target("rand")))
 __rndrrs(uint64_t *__p) {
  return __builtin_arm_rndrrs(__p);
 }
 #endif
 /* 11.2 Guarded Control Stack intrinsics */
 #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
 static __inline__ void * __attribute__((__always_inline__, __nodebug__))
 __gcspr() {
  return (void *)__builtin_arm_rsr64("gcspr_el0");
 }
 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__, target("gcs")))
 __gcspopm() {
  return __builtin_arm_gcspopm(0);
 }
 static __inline__ void *__attribute__((__always_inline__, __nodebug__,
                                       target("gcs")))
 __gcsss(void *__stack) {
  return __builtin_arm_gcsss(__stack);
 }
 #endif
 #if defined(__cplusplus)
 }
 #endif
 #endif /* __ARM_ACLE_H */
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/arm_bf16.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/arm_bf16.h
@@ -1,20 +0,0 @@
 /*===---- arm_bf16.h - ARM BF16 intrinsics -----------------------------------===
 *
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef __ARM_BF16_H
 #define __ARM_BF16_H
 typedef __bf16 bfloat16_t;
 #define __ai static __inline__ __attribute__((__always_inline__, __nodebug__))
 #undef __ai
 #endif
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/arm_cde.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/arm_cde.h
@@ -1,410 +0,0 @@
 /*===---- arm_cde.h - ARM CDE intrinsics -----------------------------------===
 *
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef __ARM_CDE_H
 #define __ARM_CDE_H
 #if !__ARM_FEATURE_CDE
 #error "CDE support not enabled"
 #endif
 #include <stdint.h>
 #ifdef __cplusplus
 extern "C" {
 #endif
 static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_cx1)))
 uint32_t __arm_cx1(int, uint32_t);
 static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_cx1a)))
 uint32_t __arm_cx1a(int, uint32_t, uint32_t);
 static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_cx1d)))
 uint64_t __arm_cx1d(int, uint32_t);
 static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_cx1da)))
 uint64_t __arm_cx1da(int, uint64_t, uint32_t);
 static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_cx2)))
 uint32_t __arm_cx2(int, uint32_t, uint32_t);
 static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_cx2a)))
 uint32_t __arm_cx2a(int, uint32_t, uint32_t, uint32_t);
 static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_cx2d)))
 uint64_t __arm_cx2d(int, uint32_t, uint32_t);
 static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_cx2da)))
 uint64_t __arm_cx2da(int, uint64_t, uint32_t, uint32_t);
 static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_cx3)))
 uint32_t __arm_cx3(int, uint32_t, uint32_t, uint32_t);
 static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_cx3a)))
 uint32_t __arm_cx3a(int, uint32_t, uint32_t, uint32_t, uint32_t);
 static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_cx3d)))
 uint64_t __arm_cx3d(int, uint32_t, uint32_t, uint32_t);
 static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_cx3da)))
 uint64_t __arm_cx3da(int, uint64_t, uint32_t, uint32_t, uint32_t);
 static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx1_u32)))
 uint32_t __arm_vcx1_u32(int, uint32_t);
 static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx1a_u32)))
 uint32_t __arm_vcx1a_u32(int, uint32_t, uint32_t);
 static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx1d_u64)))
 uint64_t __arm_vcx1d_u64(int, uint32_t);
 static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx1da_u64)))
 uint64_t __arm_vcx1da_u64(int, uint64_t, uint32_t);
 static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx2_u32)))
 uint32_t __arm_vcx2_u32(int, uint32_t, uint32_t);
 static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx2a_u32)))
 uint32_t __arm_vcx2a_u32(int, uint32_t, uint32_t, uint32_t);
 static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx2d_u64)))
 uint64_t __arm_vcx2d_u64(int, uint64_t, uint32_t);
 static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx2da_u64)))
 uint64_t __arm_vcx2da_u64(int, uint64_t, uint64_t, uint32_t);
 static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx3_u32)))
 uint32_t __arm_vcx3_u32(int, uint32_t, uint32_t, uint32_t);
 static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx3a_u32)))
 uint32_t __arm_vcx3a_u32(int, uint32_t, uint32_t, uint32_t, uint32_t);
 static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx3d_u64)))
 uint64_t __arm_vcx3d_u64(int, uint64_t, uint64_t, uint32_t);
 static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx3da_u64)))
 uint64_t __arm_vcx3da_u64(int, uint64_t, uint64_t, uint64_t, uint32_t);
 #if __ARM_FEATURE_MVE
 typedef uint16_t mve_pred16_t;
 typedef __attribute__((__neon_vector_type__(8), __clang_arm_mve_strict_polymorphism)) int16_t int16x8_t;
 typedef __attribute__((__neon_vector_type__(4), __clang_arm_mve_strict_polymorphism)) int32_t int32x4_t;
 typedef __attribute__((__neon_vector_type__(2), __clang_arm_mve_strict_polymorphism)) int64_t int64x2_t;
 typedef __attribute__((__neon_vector_type__(16), __clang_arm_mve_strict_polymorphism)) int8_t int8x16_t;
 typedef __attribute__((__neon_vector_type__(8), __clang_arm_mve_strict_polymorphism)) uint16_t uint16x8_t;
 typedef __attribute__((__neon_vector_type__(4), __clang_arm_mve_strict_polymorphism)) uint32_t uint32x4_t;
 typedef __attribute__((__neon_vector_type__(2), __clang_arm_mve_strict_polymorphism)) uint64_t uint64x2_t;
 typedef __attribute__((__neon_vector_type__(16), __clang_arm_mve_strict_polymorphism)) uint8_t uint8x16_t;
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1q_m_s16)))
 int16x8_t __arm_vcx1q_m(int, int16x8_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1q_m_s32)))
 int32x4_t __arm_vcx1q_m(int, int32x4_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1q_m_s64)))
 int64x2_t __arm_vcx1q_m(int, int64x2_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1q_m_s8)))
 int8x16_t __arm_vcx1q_m(int, int8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1q_m_u16)))
 uint16x8_t __arm_vcx1q_m(int, uint16x8_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1q_m_u32)))
 uint32x4_t __arm_vcx1q_m(int, uint32x4_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1q_m_u64)))
 uint64x2_t __arm_vcx1q_m(int, uint64x2_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1q_m_u8)))
 uint8x16_t __arm_vcx1q_m(int, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx1q_u8)))
 uint8x16_t __arm_vcx1q_u8(int, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_m_s16)))
 int16x8_t __arm_vcx1qa_m(int, int16x8_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_m_s32)))
 int32x4_t __arm_vcx1qa_m(int, int32x4_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_m_s64)))
 int64x2_t __arm_vcx1qa_m(int, int64x2_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_m_s8)))
 int8x16_t __arm_vcx1qa_m(int, int8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_m_u16)))
 uint16x8_t __arm_vcx1qa_m(int, uint16x8_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_m_u32)))
 uint32x4_t __arm_vcx1qa_m(int, uint32x4_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_m_u64)))
 uint64x2_t __arm_vcx1qa_m(int, uint64x2_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_m_u8)))
 uint8x16_t __arm_vcx1qa_m(int, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_s16)))
 int16x8_t __arm_vcx1qa(int, int16x8_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_s32)))
 int32x4_t __arm_vcx1qa(int, int32x4_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_s64)))
 int64x2_t __arm_vcx1qa(int, int64x2_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_s8)))
 int8x16_t __arm_vcx1qa(int, int8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_u16)))
 uint16x8_t __arm_vcx1qa(int, uint16x8_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_u32)))
 uint32x4_t __arm_vcx1qa(int, uint32x4_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_u64)))
 uint64x2_t __arm_vcx1qa(int, uint64x2_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_u8)))
 uint8x16_t __arm_vcx1qa(int, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_m_impl_s16)))
 int16x8_t __arm_vcx2q_m_impl(int, int16x8_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_m_impl_s32)))
 int32x4_t __arm_vcx2q_m_impl(int, int32x4_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_m_impl_s64)))
 int64x2_t __arm_vcx2q_m_impl(int, int64x2_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_m_impl_s8)))
 int8x16_t __arm_vcx2q_m_impl(int, int8x16_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_m_impl_u16)))
 uint16x8_t __arm_vcx2q_m_impl(int, uint16x8_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_m_impl_u32)))
 uint32x4_t __arm_vcx2q_m_impl(int, uint32x4_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_m_impl_u64)))
 uint64x2_t __arm_vcx2q_m_impl(int, uint64x2_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_m_impl_u8)))
 uint8x16_t __arm_vcx2q_m_impl(int, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_s16)))
 int16x8_t __arm_vcx2q(int, int16x8_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_s32)))
 int32x4_t __arm_vcx2q(int, int32x4_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_s64)))
 int64x2_t __arm_vcx2q(int, int64x2_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_s8)))
 int8x16_t __arm_vcx2q(int, int8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u16)))
 uint16x8_t __arm_vcx2q(int, uint16x8_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u32)))
 uint32x4_t __arm_vcx2q(int, uint32x4_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u64)))
 uint64x2_t __arm_vcx2q(int, uint64x2_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u8)))
 uint8x16_t __arm_vcx2q(int, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u8_s16)))
 uint8x16_t __arm_vcx2q_u8(int, int16x8_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u8_s32)))
 uint8x16_t __arm_vcx2q_u8(int, int32x4_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u8_s64)))
 uint8x16_t __arm_vcx2q_u8(int, int64x2_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u8_s8)))
 uint8x16_t __arm_vcx2q_u8(int, int8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u8_u16)))
 uint8x16_t __arm_vcx2q_u8(int, uint16x8_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u8_u32)))
 uint8x16_t __arm_vcx2q_u8(int, uint32x4_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u8_u64)))
 uint8x16_t __arm_vcx2q_u8(int, uint64x2_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u8_u8)))
 uint8x16_t __arm_vcx2q_u8(int, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_impl_s16)))
 int16x8_t __arm_vcx2qa_impl(int, int16x8_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_impl_s32)))
 int32x4_t __arm_vcx2qa_impl(int, int32x4_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_impl_s64)))
 int64x2_t __arm_vcx2qa_impl(int, int64x2_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_impl_s8)))
 int8x16_t __arm_vcx2qa_impl(int, int8x16_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_impl_u16)))
 uint16x8_t __arm_vcx2qa_impl(int, uint16x8_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_impl_u32)))
 uint32x4_t __arm_vcx2qa_impl(int, uint32x4_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_impl_u64)))
 uint64x2_t __arm_vcx2qa_impl(int, uint64x2_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_impl_u8)))
 uint8x16_t __arm_vcx2qa_impl(int, uint8x16_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_m_impl_s16)))
 int16x8_t __arm_vcx2qa_m_impl(int, int16x8_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_m_impl_s32)))
 int32x4_t __arm_vcx2qa_m_impl(int, int32x4_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_m_impl_s64)))
 int64x2_t __arm_vcx2qa_m_impl(int, int64x2_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_m_impl_s8)))
 int8x16_t __arm_vcx2qa_m_impl(int, int8x16_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_m_impl_u16)))
 uint16x8_t __arm_vcx2qa_m_impl(int, uint16x8_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_m_impl_u32)))
 uint32x4_t __arm_vcx2qa_m_impl(int, uint32x4_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_m_impl_u64)))
 uint64x2_t __arm_vcx2qa_m_impl(int, uint64x2_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_m_impl_u8)))
 uint8x16_t __arm_vcx2qa_m_impl(int, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_impl_s16)))
 int16x8_t __arm_vcx3q_impl(int, int16x8_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_impl_s32)))
 int32x4_t __arm_vcx3q_impl(int, int32x4_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_impl_s64)))
 int64x2_t __arm_vcx3q_impl(int, int64x2_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_impl_s8)))
 int8x16_t __arm_vcx3q_impl(int, int8x16_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_impl_u16)))
 uint16x8_t __arm_vcx3q_impl(int, uint16x8_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_impl_u32)))
 uint32x4_t __arm_vcx3q_impl(int, uint32x4_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_impl_u64)))
 uint64x2_t __arm_vcx3q_impl(int, uint64x2_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_impl_u8)))
 uint8x16_t __arm_vcx3q_impl(int, uint8x16_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_m_impl_s16)))
 int16x8_t __arm_vcx3q_m_impl(int, int16x8_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_m_impl_s32)))
 int32x4_t __arm_vcx3q_m_impl(int, int32x4_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_m_impl_s64)))
 int64x2_t __arm_vcx3q_m_impl(int, int64x2_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_m_impl_s8)))
 int8x16_t __arm_vcx3q_m_impl(int, int8x16_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_m_impl_u16)))
 uint16x8_t __arm_vcx3q_m_impl(int, uint16x8_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_m_impl_u32)))
 uint32x4_t __arm_vcx3q_m_impl(int, uint32x4_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_m_impl_u64)))
 uint64x2_t __arm_vcx3q_m_impl(int, uint64x2_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_m_impl_u8)))
 uint8x16_t __arm_vcx3q_m_impl(int, uint8x16_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_u8_impl_s16)))
 uint8x16_t __arm_vcx3q_u8_impl(int, int16x8_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_u8_impl_s32)))
 uint8x16_t __arm_vcx3q_u8_impl(int, int32x4_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_u8_impl_s64)))
 uint8x16_t __arm_vcx3q_u8_impl(int, int64x2_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_u8_impl_s8)))
 uint8x16_t __arm_vcx3q_u8_impl(int, int8x16_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_u8_impl_u16)))
 uint8x16_t __arm_vcx3q_u8_impl(int, uint16x8_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_u8_impl_u32)))
 uint8x16_t __arm_vcx3q_u8_impl(int, uint32x4_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_u8_impl_u64)))
 uint8x16_t __arm_vcx3q_u8_impl(int, uint64x2_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_u8_impl_u8)))
 uint8x16_t __arm_vcx3q_u8_impl(int, uint8x16_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_impl_s16)))
 int16x8_t __arm_vcx3qa_impl(int, int16x8_t, uint8x16_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_impl_s32)))
 int32x4_t __arm_vcx3qa_impl(int, int32x4_t, uint8x16_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_impl_s64)))
 int64x2_t __arm_vcx3qa_impl(int, int64x2_t, uint8x16_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_impl_s8)))
 int8x16_t __arm_vcx3qa_impl(int, int8x16_t, uint8x16_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_impl_u16)))
 uint16x8_t __arm_vcx3qa_impl(int, uint16x8_t, uint8x16_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_impl_u32)))
 uint32x4_t __arm_vcx3qa_impl(int, uint32x4_t, uint8x16_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_impl_u64)))
 uint64x2_t __arm_vcx3qa_impl(int, uint64x2_t, uint8x16_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_impl_u8)))
 uint8x16_t __arm_vcx3qa_impl(int, uint8x16_t, uint8x16_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_m_impl_s16)))
 int16x8_t __arm_vcx3qa_m_impl(int, int16x8_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_m_impl_s32)))
 int32x4_t __arm_vcx3qa_m_impl(int, int32x4_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_m_impl_s64)))
 int64x2_t __arm_vcx3qa_m_impl(int, int64x2_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_m_impl_s8)))
 int8x16_t __arm_vcx3qa_m_impl(int, int8x16_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_m_impl_u16)))
 uint16x8_t __arm_vcx3qa_m_impl(int, uint16x8_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_m_impl_u32)))
 uint32x4_t __arm_vcx3qa_m_impl(int, uint32x4_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_m_impl_u64)))
 uint64x2_t __arm_vcx3qa_m_impl(int, uint64x2_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_m_impl_u8)))
 uint8x16_t __arm_vcx3qa_m_impl(int, uint8x16_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_u8)))
 int16x8_t __arm_vreinterpretq_s16_u8(uint8x16_t);
 static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_u8)))
 int32x4_t __arm_vreinterpretq_s32_u8(uint8x16_t);
 static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_u8)))
 int64x2_t __arm_vreinterpretq_s64_u8(uint8x16_t);
 static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_u8)))
 int8x16_t __arm_vreinterpretq_s8_u8(uint8x16_t);
 static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_u8)))
 uint16x8_t __arm_vreinterpretq_u16_u8(uint8x16_t);
 static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_u8)))
 uint32x4_t __arm_vreinterpretq_u32_u8(uint8x16_t);
 static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_u8)))
 uint64x2_t __arm_vreinterpretq_u64_u8(uint8x16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s16)))
 uint8x16_t __arm_vreinterpretq_u8(int16x8_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s32)))
 uint8x16_t __arm_vreinterpretq_u8(int32x4_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s64)))
 uint8x16_t __arm_vreinterpretq_u8(int64x2_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s8)))
 uint8x16_t __arm_vreinterpretq_u8(int8x16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_u16)))
 uint8x16_t __arm_vreinterpretq_u8(uint16x8_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_u32)))
 uint8x16_t __arm_vreinterpretq_u8(uint32x4_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_u64)))
 uint8x16_t __arm_vreinterpretq_u8(uint64x2_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vreinterpretq_u8_u8)))
 uint8x16_t __arm_vreinterpretq_u8(uint8x16_t);
 #define __arm_vcx2q_m(cp, inactive, n, imm, pred) __arm_vcx2q_m_impl((cp), (inactive), __arm_vreinterpretq_u8(n), (imm), (pred))
 #define __arm_vcx2qa(cp, acc, n, imm) __arm_vcx2qa_impl((cp), (acc), __arm_vreinterpretq_u8(n), (imm))
 #define __arm_vcx2qa_m(cp, acc, n, imm, pred) __arm_vcx2qa_m_impl((cp), (acc), __arm_vreinterpretq_u8(n), (imm), (pred))
 #define __arm_vcx3q(cp, n, m, imm) __arm_vcx3q_impl((cp), (n), __arm_vreinterpretq_u8(m), (imm))
 #define __arm_vcx3q_m(cp, inactive, n, m, imm, pred) __arm_vcx3q_m_impl((cp), (inactive), __arm_vreinterpretq_u8(n), __arm_vreinterpretq_u8(m), (imm), (pred))
 #define __arm_vcx3q_u8(cp, n, m, imm) __arm_vcx3q_u8_impl((cp), (n), __arm_vreinterpretq_u8(m), (imm))
 #define __arm_vcx3qa(cp, acc, n, m, imm) __arm_vcx3qa_impl((cp), (acc), __arm_vreinterpretq_u8(n), __arm_vreinterpretq_u8(m), (imm))
 #define __arm_vcx3qa_m(cp, acc, n, m, imm, pred) __arm_vcx3qa_m_impl((cp), (acc), __arm_vreinterpretq_u8(n), __arm_vreinterpretq_u8(m), (imm), (pred))
 #endif /* __ARM_FEATURE_MVE */
 #if __ARM_FEATURE_MVE & 2
 typedef __fp16 float16_t;
 typedef float float32_t;
 typedef __attribute__((__neon_vector_type__(8), __clang_arm_mve_strict_polymorphism)) float16_t float16x8_t;
 typedef __attribute__((__neon_vector_type__(4), __clang_arm_mve_strict_polymorphism)) float32_t float32x4_t;
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1q_m_f16)))
 float16x8_t __arm_vcx1q_m(int, float16x8_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1q_m_f32)))
 float32x4_t __arm_vcx1q_m(int, float32x4_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_f16)))
 float16x8_t __arm_vcx1qa(int, float16x8_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_f32)))
 float32x4_t __arm_vcx1qa(int, float32x4_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_m_f16)))
 float16x8_t __arm_vcx1qa_m(int, float16x8_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_m_f32)))
 float32x4_t __arm_vcx1qa_m(int, float32x4_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_f16)))
 float16x8_t __arm_vcx2q(int, float16x8_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_f32)))
 float32x4_t __arm_vcx2q(int, float32x4_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_m_impl_f16)))
 float16x8_t __arm_vcx2q_m_impl(int, float16x8_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_m_impl_f32)))
 float32x4_t __arm_vcx2q_m_impl(int, float32x4_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u8_f16)))
 uint8x16_t __arm_vcx2q_u8(int, float16x8_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u8_f32)))
 uint8x16_t __arm_vcx2q_u8(int, float32x4_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_impl_f16)))
 float16x8_t __arm_vcx2qa_impl(int, float16x8_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_impl_f32)))
 float32x4_t __arm_vcx2qa_impl(int, float32x4_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_m_impl_f16)))
 float16x8_t __arm_vcx2qa_m_impl(int, float16x8_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_m_impl_f32)))
 float32x4_t __arm_vcx2qa_m_impl(int, float32x4_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_impl_f16)))
 float16x8_t __arm_vcx3q_impl(int, float16x8_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_impl_f32)))
 float32x4_t __arm_vcx3q_impl(int, float32x4_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_m_impl_f16)))
 float16x8_t __arm_vcx3q_m_impl(int, float16x8_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_m_impl_f32)))
 float32x4_t __arm_vcx3q_m_impl(int, float32x4_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_u8_impl_f16)))
 uint8x16_t __arm_vcx3q_u8_impl(int, float16x8_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_u8_impl_f32)))
 uint8x16_t __arm_vcx3q_u8_impl(int, float32x4_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_impl_f16)))
 float16x8_t __arm_vcx3qa_impl(int, float16x8_t, uint8x16_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_impl_f32)))
 float32x4_t __arm_vcx3qa_impl(int, float32x4_t, uint8x16_t, uint8x16_t, uint32_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_m_impl_f16)))
 float16x8_t __arm_vcx3qa_m_impl(int, float16x8_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_m_impl_f32)))
 float32x4_t __arm_vcx3qa_m_impl(int, float32x4_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t);
 static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_u8)))
 float16x8_t __arm_vreinterpretq_f16_u8(uint8x16_t);
 static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_u8)))
 float32x4_t __arm_vreinterpretq_f32_u8(uint8x16_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_f16)))
 uint8x16_t __arm_vreinterpretq_u8(float16x8_t);
 static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_f32)))
 uint8x16_t __arm_vreinterpretq_u8(float32x4_t);
 #endif /* __ARM_FEATURE_MVE & 2 */
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
 #endif /* __ARM_CDE_H */
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/arm_cmse.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/arm_cmse.h
@@ -1,217 +0,0 @@
 //===---- arm_cmse.h - Arm CMSE support -----------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 #ifndef __ARM_CMSE_H
 #define __ARM_CMSE_H
 #if (__ARM_FEATURE_CMSE & 0x1)
 #include <stddef.h>
 #include <stdint.h>
 #define __ARM_CMSE_SECURE_MODE (__ARM_FEATURE_CMSE & 0x2)
 #define CMSE_MPU_READWRITE 1 /* checks if readwrite_ok field is set */
 #define CMSE_AU_NONSECURE  2 /* checks if permissions have secure field unset */
 #define CMSE_MPU_UNPRIV    4 /* sets T flag on TT insrtuction */
 #define CMSE_MPU_READ      8 /* checks if read_ok field is set */
 #define CMSE_MPU_NONSECURE 16 /* sets A flag, checks if secure field unset */
 #define CMSE_NONSECURE (CMSE_AU_NONSECURE | CMSE_MPU_NONSECURE)
 #define cmse_check_pointed_object(p, f) \
  cmse_check_address_range((p), sizeof(*(p)), (f))
 #if defined(__cplusplus)
 extern "C" {
 #endif
 typedef union {
  struct cmse_address_info {
 #ifdef __ARM_BIG_ENDIAN
    /* __ARM_BIG_ENDIAN */
 #if (__ARM_CMSE_SECURE_MODE)
    unsigned idau_region : 8;
    unsigned idau_region_valid : 1;
    unsigned secure : 1;
    unsigned nonsecure_readwrite_ok : 1;
    unsigned nonsecure_read_ok : 1;
 #else
    unsigned : 12;
 #endif
    unsigned readwrite_ok : 1;
    unsigned read_ok : 1;
 #if (__ARM_CMSE_SECURE_MODE)
    unsigned sau_region_valid : 1;
 #else
    unsigned : 1;
 #endif
    unsigned mpu_region_valid : 1;
 #if (__ARM_CMSE_SECURE_MODE)
    unsigned sau_region : 8;
 #else
    unsigned : 8;
 #endif
    unsigned mpu_region : 8;
 #else /* __ARM_LITTLE_ENDIAN */
    unsigned mpu_region : 8;
 #if (__ARM_CMSE_SECURE_MODE)
    unsigned sau_region : 8;
 #else
    unsigned : 8;
 #endif
    unsigned mpu_region_valid : 1;
 #if (__ARM_CMSE_SECURE_MODE)
    unsigned sau_region_valid : 1;
 #else
    unsigned : 1;
 #endif
    unsigned read_ok : 1;
    unsigned readwrite_ok : 1;
 #if (__ARM_CMSE_SECURE_MODE)
    unsigned nonsecure_read_ok : 1;
    unsigned nonsecure_readwrite_ok : 1;
    unsigned secure : 1;
    unsigned idau_region_valid : 1;
    unsigned idau_region : 8;
 #else
    unsigned : 12;
 #endif
 #endif /*__ARM_LITTLE_ENDIAN */
  } flags;
  unsigned value;
 } cmse_address_info_t;
 static cmse_address_info_t __attribute__((__always_inline__, __nodebug__))
 cmse_TT(void *__p) {
  cmse_address_info_t __u;
  __u.value = __builtin_arm_cmse_TT(__p);
  return __u;
 }
 static cmse_address_info_t __attribute__((__always_inline__, __nodebug__))
 cmse_TTT(void *__p) {
  cmse_address_info_t __u;
  __u.value = __builtin_arm_cmse_TTT(__p);
  return __u;
 }
 #if __ARM_CMSE_SECURE_MODE
 static cmse_address_info_t __attribute__((__always_inline__, __nodebug__))
 cmse_TTA(void *__p) {
  cmse_address_info_t __u;
  __u.value = __builtin_arm_cmse_TTA(__p);
  return __u;
 }
 static cmse_address_info_t __attribute__((__always_inline__, __nodebug__))
 cmse_TTAT(void *__p) {
  cmse_address_info_t __u;
  __u.value = __builtin_arm_cmse_TTAT(__p);
  return __u;
 }
 #endif
 #define cmse_TT_fptr(p) cmse_TT(__builtin_bit_cast(void *, (p)))
 #define cmse_TTT_fptr(p) cmse_TTT(__builtin_bit_cast(void *, (p)))
 #if __ARM_CMSE_SECURE_MODE
 #define cmse_TTA_fptr(p) cmse_TTA(__builtin_bit_cast(void *, (p)))
 #define cmse_TTAT_fptr(p) cmse_TTAT(__builtin_bit_cast(void *, (p)))
 #endif
 static void *__attribute__((__always_inline__))
 cmse_check_address_range(void *__pb, size_t __s, int __flags) {
  uintptr_t __begin = (uintptr_t)__pb;
  uintptr_t __end = __begin + __s - 1;
  if (__end < __begin)
    return NULL; /* wrap around check */
  /* Check whether the range crosses a 32-bytes aligned address */
  const int __single_check = (__begin ^ __end) < 0x20u;
  /* execute the right variant of the TT instructions */
  void *__pe = (void *)__end;
  cmse_address_info_t __permb, __perme;
  switch (__flags & (CMSE_MPU_UNPRIV | CMSE_MPU_NONSECURE)) {
  case 0:
    __permb = cmse_TT(__pb);
    __perme = __single_check ? __permb : cmse_TT(__pe);
    break;
  case CMSE_MPU_UNPRIV:
    __permb = cmse_TTT(__pb);
    __perme = __single_check ? __permb : cmse_TTT(__pe);
    break;
 #if __ARM_CMSE_SECURE_MODE
  case CMSE_MPU_NONSECURE:
    __permb = cmse_TTA(__pb);
    __perme = __single_check ? __permb : cmse_TTA(__pe);
    break;
  case CMSE_MPU_UNPRIV | CMSE_MPU_NONSECURE:
    __permb = cmse_TTAT(__pb);
    __perme = __single_check ? __permb : cmse_TTAT(__pe);
    break;
 #endif
  /* if CMSE_NONSECURE is specified w/o __ARM_CMSE_SECURE_MODE */
  default:
    return NULL;
  }
  /* check that the range does not cross MPU, SAU, or IDAU region boundaries */
  if (__permb.value != __perme.value)
    return NULL;
 #if !(__ARM_CMSE_SECURE_MODE)
  /* CMSE_AU_NONSECURE is only supported when __ARM_FEATURE_CMSE & 0x2 */
  if (__flags & CMSE_AU_NONSECURE)
    return NULL;
 #endif
  /* check the permission on the range */
  switch (__flags & ~(CMSE_MPU_UNPRIV | CMSE_MPU_NONSECURE)) {
 #if (__ARM_CMSE_SECURE_MODE)
  case CMSE_MPU_READ | CMSE_MPU_READWRITE | CMSE_AU_NONSECURE:
  case CMSE_MPU_READWRITE | CMSE_AU_NONSECURE:
    return __permb.flags.nonsecure_readwrite_ok ? __pb : NULL;
  case CMSE_MPU_READ | CMSE_AU_NONSECURE:
    return __permb.flags.nonsecure_read_ok ? __pb : NULL;
  case CMSE_AU_NONSECURE:
    return __permb.flags.secure ? NULL : __pb;
 #endif
  case CMSE_MPU_READ | CMSE_MPU_READWRITE:
  case CMSE_MPU_READWRITE:
    return __permb.flags.readwrite_ok ? __pb : NULL;
  case CMSE_MPU_READ:
    return __permb.flags.read_ok ? __pb : NULL;
  default:
    return NULL;
  }
 }
 #if __ARM_CMSE_SECURE_MODE
 static int __attribute__((__always_inline__, __nodebug__))
 cmse_nonsecure_caller(void) {
  return !((uintptr_t)__builtin_return_address(0) & 1);
 }
 #define cmse_nsfptr_create(p)                                                  \
  __builtin_bit_cast(__typeof__(p),                                            \
                     (__builtin_bit_cast(uintptr_t, p) & ~(uintptr_t)1))
 #define cmse_is_nsfptr(p) ((__builtin_bit_cast(uintptr_t, p) & 1) == 0)
 #endif /* __ARM_CMSE_SECURE_MODE */
 void __attribute__((__noreturn__)) cmse_abort(void);
 #if defined(__cplusplus)
 }
 #endif
 #endif /* (__ARM_FEATURE_CMSE & 0x1) */
 #endif /* __ARM_CMSE_H */
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/arm_fp16.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/arm_fp16.h
@@ -1,596 +0,0 @@
 /*===---- arm_fp16.h - ARM FP16 intrinsics ---------------------------------===
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef __ARM_FP16_H
 #define __ARM_FP16_H
 #include <stdint.h>
 typedef __fp16 float16_t;
 #define __ai static __inline__ __attribute__((__always_inline__, __nodebug__))
 #if defined(__aarch64__) || defined(__arm64ec__)
 #define vabdh_f16(__p0, __p1) __extension__ ({ \
  float16_t __ret; \
  float16_t __s0 = __p0; \
  float16_t __s1 = __p1; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vabdh_f16(__s0, __s1)); \
  __ret; \
 })
 #define vabsh_f16(__p0) __extension__ ({ \
  float16_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vabsh_f16(__s0)); \
  __ret; \
 })
 #define vaddh_f16(__p0, __p1) __extension__ ({ \
  float16_t __ret; \
  float16_t __s0 = __p0; \
  float16_t __s1 = __p1; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vaddh_f16(__s0, __s1)); \
  __ret; \
 })
 #define vcageh_f16(__p0, __p1) __extension__ ({ \
  uint16_t __ret; \
  float16_t __s0 = __p0; \
  float16_t __s1 = __p1; \
  __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vcageh_f16(__s0, __s1)); \
  __ret; \
 })
 #define vcagth_f16(__p0, __p1) __extension__ ({ \
  uint16_t __ret; \
  float16_t __s0 = __p0; \
  float16_t __s1 = __p1; \
  __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vcagth_f16(__s0, __s1)); \
  __ret; \
 })
 #define vcaleh_f16(__p0, __p1) __extension__ ({ \
  uint16_t __ret; \
  float16_t __s0 = __p0; \
  float16_t __s1 = __p1; \
  __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vcaleh_f16(__s0, __s1)); \
  __ret; \
 })
 #define vcalth_f16(__p0, __p1) __extension__ ({ \
  uint16_t __ret; \
  float16_t __s0 = __p0; \
  float16_t __s1 = __p1; \
  __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vcalth_f16(__s0, __s1)); \
  __ret; \
 })
 #define vceqh_f16(__p0, __p1) __extension__ ({ \
  uint16_t __ret; \
  float16_t __s0 = __p0; \
  float16_t __s1 = __p1; \
  __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vceqh_f16(__s0, __s1)); \
  __ret; \
 })
 #define vceqzh_f16(__p0) __extension__ ({ \
  uint16_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vceqzh_f16(__s0)); \
  __ret; \
 })
 #define vcgeh_f16(__p0, __p1) __extension__ ({ \
  uint16_t __ret; \
  float16_t __s0 = __p0; \
  float16_t __s1 = __p1; \
  __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vcgeh_f16(__s0, __s1)); \
  __ret; \
 })
 #define vcgezh_f16(__p0) __extension__ ({ \
  uint16_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vcgezh_f16(__s0)); \
  __ret; \
 })
 #define vcgth_f16(__p0, __p1) __extension__ ({ \
  uint16_t __ret; \
  float16_t __s0 = __p0; \
  float16_t __s1 = __p1; \
  __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vcgth_f16(__s0, __s1)); \
  __ret; \
 })
 #define vcgtzh_f16(__p0) __extension__ ({ \
  uint16_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vcgtzh_f16(__s0)); \
  __ret; \
 })
 #define vcleh_f16(__p0, __p1) __extension__ ({ \
  uint16_t __ret; \
  float16_t __s0 = __p0; \
  float16_t __s1 = __p1; \
  __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vcleh_f16(__s0, __s1)); \
  __ret; \
 })
 #define vclezh_f16(__p0) __extension__ ({ \
  uint16_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vclezh_f16(__s0)); \
  __ret; \
 })
 #define vclth_f16(__p0, __p1) __extension__ ({ \
  uint16_t __ret; \
  float16_t __s0 = __p0; \
  float16_t __s1 = __p1; \
  __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vclth_f16(__s0, __s1)); \
  __ret; \
 })
 #define vcltzh_f16(__p0) __extension__ ({ \
  uint16_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vcltzh_f16(__s0)); \
  __ret; \
 })
 #define vcvth_n_s16_f16(__p0, __p1) __extension__ ({ \
  int16_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(int16_t, __builtin_neon_vcvth_n_s16_f16(__s0, __p1)); \
  __ret; \
 })
 #define vcvth_n_s32_f16(__p0, __p1) __extension__ ({ \
  int32_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvth_n_s32_f16(__s0, __p1)); \
  __ret; \
 })
 #define vcvth_n_s64_f16(__p0, __p1) __extension__ ({ \
  int64_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvth_n_s64_f16(__s0, __p1)); \
  __ret; \
 })
 #define vcvth_n_u16_f16(__p0, __p1) __extension__ ({ \
  uint16_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vcvth_n_u16_f16(__s0, __p1)); \
  __ret; \
 })
 #define vcvth_n_u32_f16(__p0, __p1) __extension__ ({ \
  uint32_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvth_n_u32_f16(__s0, __p1)); \
  __ret; \
 })
 #define vcvth_n_u64_f16(__p0, __p1) __extension__ ({ \
  uint64_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvth_n_u64_f16(__s0, __p1)); \
  __ret; \
 })
 #define vcvth_s16_f16(__p0) __extension__ ({ \
  int16_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(int16_t, __builtin_neon_vcvth_s16_f16(__s0)); \
  __ret; \
 })
 #define vcvth_s32_f16(__p0) __extension__ ({ \
  int32_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvth_s32_f16(__s0)); \
  __ret; \
 })
 #define vcvth_s64_f16(__p0) __extension__ ({ \
  int64_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvth_s64_f16(__s0)); \
  __ret; \
 })
 #define vcvth_u16_f16(__p0) __extension__ ({ \
  uint16_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vcvth_u16_f16(__s0)); \
  __ret; \
 })
 #define vcvth_u32_f16(__p0) __extension__ ({ \
  uint32_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvth_u32_f16(__s0)); \
  __ret; \
 })
 #define vcvth_u64_f16(__p0) __extension__ ({ \
  uint64_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvth_u64_f16(__s0)); \
  __ret; \
 })
 #define vcvtah_s16_f16(__p0) __extension__ ({ \
  int16_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(int16_t, __builtin_neon_vcvtah_s16_f16(__s0)); \
  __ret; \
 })
 #define vcvtah_s32_f16(__p0) __extension__ ({ \
  int32_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvtah_s32_f16(__s0)); \
  __ret; \
 })
 #define vcvtah_s64_f16(__p0) __extension__ ({ \
  int64_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvtah_s64_f16(__s0)); \
  __ret; \
 })
 #define vcvtah_u16_f16(__p0) __extension__ ({ \
  uint16_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vcvtah_u16_f16(__s0)); \
  __ret; \
 })
 #define vcvtah_u32_f16(__p0) __extension__ ({ \
  uint32_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvtah_u32_f16(__s0)); \
  __ret; \
 })
 #define vcvtah_u64_f16(__p0) __extension__ ({ \
  uint64_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvtah_u64_f16(__s0)); \
  __ret; \
 })
 #define vcvth_f16_u16(__p0) __extension__ ({ \
  float16_t __ret; \
  uint16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vcvth_f16_u16(__s0)); \
  __ret; \
 })
 #define vcvth_f16_s16(__p0) __extension__ ({ \
  float16_t __ret; \
  int16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vcvth_f16_s16(__s0)); \
  __ret; \
 })
 #define vcvth_f16_u32(__p0) __extension__ ({ \
  float16_t __ret; \
  uint32_t __s0 = __p0; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vcvth_f16_u32(__s0)); \
  __ret; \
 })
 #define vcvth_f16_s32(__p0) __extension__ ({ \
  float16_t __ret; \
  int32_t __s0 = __p0; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vcvth_f16_s32(__s0)); \
  __ret; \
 })
 #define vcvth_f16_u64(__p0) __extension__ ({ \
  float16_t __ret; \
  uint64_t __s0 = __p0; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vcvth_f16_u64(__s0)); \
  __ret; \
 })
 #define vcvth_f16_s64(__p0) __extension__ ({ \
  float16_t __ret; \
  int64_t __s0 = __p0; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vcvth_f16_s64(__s0)); \
  __ret; \
 })
 #define vcvth_n_f16_u32(__p0, __p1) __extension__ ({ \
  float16_t __ret; \
  uint32_t __s0 = __p0; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vcvth_n_f16_u32(__s0, __p1)); \
  __ret; \
 })
 #define vcvth_n_f16_s32(__p0, __p1) __extension__ ({ \
  float16_t __ret; \
  int32_t __s0 = __p0; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vcvth_n_f16_s32(__s0, __p1)); \
  __ret; \
 })
 #define vcvth_n_f16_u64(__p0, __p1) __extension__ ({ \
  float16_t __ret; \
  uint64_t __s0 = __p0; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vcvth_n_f16_u64(__s0, __p1)); \
  __ret; \
 })
 #define vcvth_n_f16_s64(__p0, __p1) __extension__ ({ \
  float16_t __ret; \
  int64_t __s0 = __p0; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vcvth_n_f16_s64(__s0, __p1)); \
  __ret; \
 })
 #define vcvth_n_f16_u16(__p0, __p1) __extension__ ({ \
  float16_t __ret; \
  uint16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vcvth_n_f16_u16(__s0, __p1)); \
  __ret; \
 })
 #define vcvth_n_f16_s16(__p0, __p1) __extension__ ({ \
  float16_t __ret; \
  int16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vcvth_n_f16_s16(__s0, __p1)); \
  __ret; \
 })
 #define vcvtmh_s16_f16(__p0) __extension__ ({ \
  int16_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(int16_t, __builtin_neon_vcvtmh_s16_f16(__s0)); \
  __ret; \
 })
 #define vcvtmh_s32_f16(__p0) __extension__ ({ \
  int32_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvtmh_s32_f16(__s0)); \
  __ret; \
 })
 #define vcvtmh_s64_f16(__p0) __extension__ ({ \
  int64_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvtmh_s64_f16(__s0)); \
  __ret; \
 })
 #define vcvtmh_u16_f16(__p0) __extension__ ({ \
  uint16_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vcvtmh_u16_f16(__s0)); \
  __ret; \
 })
 #define vcvtmh_u32_f16(__p0) __extension__ ({ \
  uint32_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvtmh_u32_f16(__s0)); \
  __ret; \
 })
 #define vcvtmh_u64_f16(__p0) __extension__ ({ \
  uint64_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvtmh_u64_f16(__s0)); \
  __ret; \
 })
 #define vcvtnh_s16_f16(__p0) __extension__ ({ \
  int16_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(int16_t, __builtin_neon_vcvtnh_s16_f16(__s0)); \
  __ret; \
 })
 #define vcvtnh_s32_f16(__p0) __extension__ ({ \
  int32_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvtnh_s32_f16(__s0)); \
  __ret; \
 })
 #define vcvtnh_s64_f16(__p0) __extension__ ({ \
  int64_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvtnh_s64_f16(__s0)); \
  __ret; \
 })
 #define vcvtnh_u16_f16(__p0) __extension__ ({ \
  uint16_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vcvtnh_u16_f16(__s0)); \
  __ret; \
 })
 #define vcvtnh_u32_f16(__p0) __extension__ ({ \
  uint32_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvtnh_u32_f16(__s0)); \
  __ret; \
 })
 #define vcvtnh_u64_f16(__p0) __extension__ ({ \
  uint64_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvtnh_u64_f16(__s0)); \
  __ret; \
 })
 #define vcvtph_s16_f16(__p0) __extension__ ({ \
  int16_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(int16_t, __builtin_neon_vcvtph_s16_f16(__s0)); \
  __ret; \
 })
 #define vcvtph_s32_f16(__p0) __extension__ ({ \
  int32_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvtph_s32_f16(__s0)); \
  __ret; \
 })
 #define vcvtph_s64_f16(__p0) __extension__ ({ \
  int64_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvtph_s64_f16(__s0)); \
  __ret; \
 })
 #define vcvtph_u16_f16(__p0) __extension__ ({ \
  uint16_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vcvtph_u16_f16(__s0)); \
  __ret; \
 })
 #define vcvtph_u32_f16(__p0) __extension__ ({ \
  uint32_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvtph_u32_f16(__s0)); \
  __ret; \
 })
 #define vcvtph_u64_f16(__p0) __extension__ ({ \
  uint64_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvtph_u64_f16(__s0)); \
  __ret; \
 })
 #define vdivh_f16(__p0, __p1) __extension__ ({ \
  float16_t __ret; \
  float16_t __s0 = __p0; \
  float16_t __s1 = __p1; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vdivh_f16(__s0, __s1)); \
  __ret; \
 })
 #define vfmah_f16(__p0, __p1, __p2) __extension__ ({ \
  float16_t __ret; \
  float16_t __s0 = __p0; \
  float16_t __s1 = __p1; \
  float16_t __s2 = __p2; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vfmah_f16(__s0, __s1, __s2)); \
  __ret; \
 })
 #define vfmsh_f16(__p0, __p1, __p2) __extension__ ({ \
  float16_t __ret; \
  float16_t __s0 = __p0; \
  float16_t __s1 = __p1; \
  float16_t __s2 = __p2; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vfmsh_f16(__s0, __s1, __s2)); \
  __ret; \
 })
 #define vmaxh_f16(__p0, __p1) __extension__ ({ \
  float16_t __ret; \
  float16_t __s0 = __p0; \
  float16_t __s1 = __p1; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vmaxh_f16(__s0, __s1)); \
  __ret; \
 })
 #define vmaxnmh_f16(__p0, __p1) __extension__ ({ \
  float16_t __ret; \
  float16_t __s0 = __p0; \
  float16_t __s1 = __p1; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vmaxnmh_f16(__s0, __s1)); \
  __ret; \
 })
 #define vminh_f16(__p0, __p1) __extension__ ({ \
  float16_t __ret; \
  float16_t __s0 = __p0; \
  float16_t __s1 = __p1; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vminh_f16(__s0, __s1)); \
  __ret; \
 })
 #define vminnmh_f16(__p0, __p1) __extension__ ({ \
  float16_t __ret; \
  float16_t __s0 = __p0; \
  float16_t __s1 = __p1; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vminnmh_f16(__s0, __s1)); \
  __ret; \
 })
 #define vmulh_f16(__p0, __p1) __extension__ ({ \
  float16_t __ret; \
  float16_t __s0 = __p0; \
  float16_t __s1 = __p1; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vmulh_f16(__s0, __s1)); \
  __ret; \
 })
 #define vmulxh_f16(__p0, __p1) __extension__ ({ \
  float16_t __ret; \
  float16_t __s0 = __p0; \
  float16_t __s1 = __p1; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vmulxh_f16(__s0, __s1)); \
  __ret; \
 })
 #define vnegh_f16(__p0) __extension__ ({ \
  float16_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vnegh_f16(__s0)); \
  __ret; \
 })
 #define vrecpeh_f16(__p0) __extension__ ({ \
  float16_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vrecpeh_f16(__s0)); \
  __ret; \
 })
 #define vrecpsh_f16(__p0, __p1) __extension__ ({ \
  float16_t __ret; \
  float16_t __s0 = __p0; \
  float16_t __s1 = __p1; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vrecpsh_f16(__s0, __s1)); \
  __ret; \
 })
 #define vrecpxh_f16(__p0) __extension__ ({ \
  float16_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vrecpxh_f16(__s0)); \
  __ret; \
 })
 #define vrndh_f16(__p0) __extension__ ({ \
  float16_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vrndh_f16(__s0)); \
  __ret; \
 })
 #define vrndah_f16(__p0) __extension__ ({ \
  float16_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vrndah_f16(__s0)); \
  __ret; \
 })
 #define vrndih_f16(__p0) __extension__ ({ \
  float16_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vrndih_f16(__s0)); \
  __ret; \
 })
 #define vrndmh_f16(__p0) __extension__ ({ \
  float16_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vrndmh_f16(__s0)); \
  __ret; \
 })
 #define vrndnh_f16(__p0) __extension__ ({ \
  float16_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vrndnh_f16(__s0)); \
  __ret; \
 })
 #define vrndph_f16(__p0) __extension__ ({ \
  float16_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vrndph_f16(__s0)); \
  __ret; \
 })
 #define vrndxh_f16(__p0) __extension__ ({ \
  float16_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vrndxh_f16(__s0)); \
  __ret; \
 })
 #define vrsqrteh_f16(__p0) __extension__ ({ \
  float16_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vrsqrteh_f16(__s0)); \
  __ret; \
 })
 #define vrsqrtsh_f16(__p0, __p1) __extension__ ({ \
  float16_t __ret; \
  float16_t __s0 = __p0; \
  float16_t __s1 = __p1; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vrsqrtsh_f16(__s0, __s1)); \
  __ret; \
 })
 #define vsqrth_f16(__p0) __extension__ ({ \
  float16_t __ret; \
  float16_t __s0 = __p0; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vsqrth_f16(__s0)); \
  __ret; \
 })
 #define vsubh_f16(__p0, __p1) __extension__ ({ \
  float16_t __ret; \
  float16_t __s0 = __p0; \
  float16_t __s1 = __p1; \
  __ret = __builtin_bit_cast(float16_t, __builtin_neon_vsubh_f16(__s0, __s1)); \
  __ret; \
 })
 #endif
 #undef __ai
 #endif /* __ARM_FP16_H */
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/arm_mve.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/arm_mve.h
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/arm_neon.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/arm_neon.h
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/arm_neon_sve_bridge.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/arm_neon_sve_bridge.h
@@ -1,182 +0,0 @@
 /*===---- arm_neon_sve_bridge.h - ARM NEON SVE Bridge intrinsics -----------===
 *
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef __ARM_NEON_SVE_BRIDGE_H
 #define __ARM_NEON_SVE_BRIDGE_H
 #include <arm_neon.h>
 #include <arm_sve.h>
 #ifdef __cplusplus
 extern "C" {
 #endif
 /* Function attributes */
 #define __ai static __inline__ __attribute__((__always_inline__, __nodebug__))
 #define __aio                                                                  \
  static __inline__                                                            \
      __attribute__((__always_inline__, __nodebug__, __overloadable__))
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s8)))
 svint8_t svset_neonq(svint8_t, int8x16_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s16)))
 svint16_t svset_neonq(svint16_t, int16x8_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s32)))
 svint32_t svset_neonq(svint32_t, int32x4_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s64)))
 svint64_t svset_neonq(svint64_t, int64x2_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u8)))
 svuint8_t svset_neonq(svuint8_t, uint8x16_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u16)))
 svuint16_t svset_neonq(svuint16_t, uint16x8_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u32)))
 svuint32_t svset_neonq(svuint32_t, uint32x4_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u64)))
 svuint64_t svset_neonq(svuint64_t, uint64x2_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_f16)))
 svfloat16_t svset_neonq(svfloat16_t, float16x8_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_f32)))
 svfloat32_t svset_neonq(svfloat32_t, float32x4_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_f64)))
 svfloat64_t svset_neonq(svfloat64_t, float64x2_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s8)))
 svint8_t svset_neonq_s8(svint8_t, int8x16_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s16)))
 svint16_t svset_neonq_s16(svint16_t, int16x8_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s32)))
 svint32_t svset_neonq_s32(svint32_t, int32x4_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s64)))
 svint64_t svset_neonq_s64(svint64_t, int64x2_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u8)))
 svuint8_t svset_neonq_u8(svuint8_t, uint8x16_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u16)))
 svuint16_t svset_neonq_u16(svuint16_t, uint16x8_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u32)))
 svuint32_t svset_neonq_u32(svuint32_t, uint32x4_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u64)))
 svuint64_t svset_neonq_u64(svuint64_t, uint64x2_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_f16)))
 svfloat16_t svset_neonq_f16(svfloat16_t, float16x8_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_f32)))
 svfloat32_t svset_neonq_f32(svfloat32_t, float32x4_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_f64)))
 svfloat64_t svset_neonq_f64(svfloat64_t, float64x2_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s8)))
 int8x16_t svget_neonq(svint8_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s16)))
 int16x8_t svget_neonq(svint16_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s32)))
 int32x4_t svget_neonq(svint32_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s64)))
 int64x2_t svget_neonq(svint64_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u8)))
 uint8x16_t svget_neonq(svuint8_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u16)))
 uint16x8_t svget_neonq(svuint16_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u32)))
 uint32x4_t svget_neonq(svuint32_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u64)))
 uint64x2_t svget_neonq(svuint64_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_f16)))
 float16x8_t svget_neonq(svfloat16_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_f32)))
 float32x4_t svget_neonq(svfloat32_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_f64)))
 float64x2_t svget_neonq(svfloat64_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s8)))
 int8x16_t svget_neonq_s8(svint8_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s16)))
 int16x8_t svget_neonq_s16(svint16_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s32)))
 int32x4_t svget_neonq_s32(svint32_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s64)))
 int64x2_t svget_neonq_s64(svint64_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u8)))
 uint8x16_t svget_neonq_u8(svuint8_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u16)))
 uint16x8_t svget_neonq_u16(svuint16_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u32)))
 uint32x4_t svget_neonq_u32(svuint32_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u64)))
 uint64x2_t svget_neonq_u64(svuint64_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_f16)))
 float16x8_t svget_neonq_f16(svfloat16_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_f32)))
 float32x4_t svget_neonq_f32(svfloat32_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_f64)))
 float64x2_t svget_neonq_f64(svfloat64_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s8)))
 svint8_t svdup_neonq(int8x16_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s16)))
 svint16_t svdup_neonq(int16x8_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s32)))
 svint32_t svdup_neonq(int32x4_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s64)))
 svint64_t svdup_neonq(int64x2_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u8)))
 svuint8_t svdup_neonq(uint8x16_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u16)))
 svuint16_t svdup_neonq(uint16x8_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u32)))
 svuint32_t svdup_neonq(uint32x4_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u64)))
 svuint64_t svdup_neonq(uint64x2_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_f16)))
 svfloat16_t svdup_neonq(float16x8_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_f32)))
 svfloat32_t svdup_neonq(float32x4_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_f64)))
 svfloat64_t svdup_neonq(float64x2_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s8)))
 svint8_t svdup_neonq_s8(int8x16_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s16)))
 svint16_t svdup_neonq_s16(int16x8_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s32)))
 svint32_t svdup_neonq_s32(int32x4_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s64)))
 svint64_t svdup_neonq_s64(int64x2_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u8)))
 svuint8_t svdup_neonq_u8(uint8x16_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u16)))
 svuint16_t svdup_neonq_u16(uint16x8_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u32)))
 svuint32_t svdup_neonq_u32(uint32x4_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u64)))
 svuint64_t svdup_neonq_u64(uint64x2_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_f16)))
 svfloat16_t svdup_neonq_f16(float16x8_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_f32)))
 svfloat32_t svdup_neonq_f32(float32x4_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_f64)))
 svfloat64_t svdup_neonq_f64(float64x2_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_bf16)))
 svbfloat16_t svset_neonq(svbfloat16_t, bfloat16x8_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_bf16)))
 svbfloat16_t svset_neonq_bf16(svbfloat16_t, bfloat16x8_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_bf16)))
 bfloat16x8_t svget_neonq(svbfloat16_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_bf16)))
 bfloat16x8_t svget_neonq_bf16(svbfloat16_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_bf16)))
 svbfloat16_t svdup_neonq(bfloat16x8_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_bf16)))
 svbfloat16_t svdup_neonq_bf16(bfloat16x8_t);
 #undef __ai
 #undef __aio
 #ifdef __cplusplus
 } // extern "C"
 #endif
 #endif //__ARM_NEON_SVE_BRIDGE_H
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/arm_sme.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/arm_sme.h
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/arm_sve.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/arm_sve.h
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/arm_vector_types.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/arm_vector_types.h
@@ -1,432 +0,0 @@
 /*===---- arm_vector_types - ARM vector type ------===
 *
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #if !defined(__ARM_NEON_H) && !defined(__ARM_SVE_H)
 #error "This file should not be used standalone. Please include arm_neon.h or arm_sve.h instead"
 #endif
 #ifndef __ARM_NEON_TYPES_H
 #define __ARM_NEON_TYPES_H
 typedef float float32_t;
 typedef __fp16 float16_t;
 #if defined(__aarch64__) || defined(__arm64ec__)
 typedef __mfp8 mfloat8_t;
 typedef double float64_t;
 #endif
 typedef uint64_t fpm_t;
 enum __ARM_FPM_FORMAT { __ARM_FPM_E5M2, __ARM_FPM_E4M3 };
 enum __ARM_FPM_OVERFLOW { __ARM_FPM_INFNAN, __ARM_FPM_SATURATE };
 static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
 __arm_fpm_init(void) {
  return 0;
 }
 static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
 __arm_set_fpm_src1_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) {
  return (__fpm & ~7ull) | (fpm_t)__format;
 }
 static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
 __arm_set_fpm_src2_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) {
  return (__fpm & ~0x38ull) | ((fpm_t)__format << 3u);
 }
 static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
 __arm_set_fpm_dst_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) {
  return (__fpm & ~0x1c0ull) | ((fpm_t)__format << 6u);
 }
 static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
 __arm_set_fpm_overflow_mul(fpm_t __fpm, enum __ARM_FPM_OVERFLOW __behaviour) {
  return (__fpm & ~0x4000ull) | ((fpm_t)__behaviour << 14u);
 }
 static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
 __arm_set_fpm_overflow_cvt(fpm_t __fpm, enum __ARM_FPM_OVERFLOW __behaviour) {
  return (__fpm & ~0x8000ull) | ((fpm_t)__behaviour << 15u);
 }
 static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
 __arm_set_fpm_lscale(fpm_t __fpm, uint64_t __scale) {
  return (__fpm & ~0x7f0000ull) | (__scale << 16u);
 }
 static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
 __arm_set_fpm_nscale(fpm_t __fpm, int64_t __scale) {
  return (__fpm & ~0xff000000ull) | (((fpm_t)__scale & 0xffu) << 24u);
 }
 static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
 __arm_set_fpm_lscale2(fpm_t __fpm, uint64_t __scale) {
  return (uint32_t)__fpm | (__scale << 32u);
 }
 typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t;
 typedef __attribute__((neon_vector_type(16))) int8_t int8x16_t;
 typedef __attribute__((neon_vector_type(4))) int16_t int16x4_t;
 typedef __attribute__((neon_vector_type(8))) int16_t int16x8_t;
 typedef __attribute__((neon_vector_type(2))) int32_t int32x2_t;
 typedef __attribute__((neon_vector_type(4))) int32_t int32x4_t;
 typedef __attribute__((neon_vector_type(1))) int64_t int64x1_t;
 typedef __attribute__((neon_vector_type(2))) int64_t int64x2_t;
 typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t;
 typedef __attribute__((neon_vector_type(16))) uint8_t uint8x16_t;
 typedef __attribute__((neon_vector_type(4))) uint16_t uint16x4_t;
 typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t;
 typedef __attribute__((neon_vector_type(2))) uint32_t uint32x2_t;
 typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t;
 typedef __attribute__((neon_vector_type(1))) uint64_t uint64x1_t;
 typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t;
 #if defined(__aarch64__) || defined(__arm64ec__)
 typedef __attribute__((neon_vector_type(8))) mfloat8_t mfloat8x8_t;
 typedef __attribute__((neon_vector_type(16))) mfloat8_t mfloat8x16_t;
 #endif
 typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t;
 typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t;
 typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t;
 typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t;
 #if defined(__aarch64__) || defined(__arm64ec__)
 typedef __attribute__((neon_vector_type(1))) float64_t float64x1_t;
 typedef __attribute__((neon_vector_type(2))) float64_t float64x2_t;
 #endif
 typedef struct int8x8x2_t {
  int8x8_t val[2];
 } int8x8x2_t;
 typedef struct int8x16x2_t {
  int8x16_t val[2];
 } int8x16x2_t;
 typedef struct int16x4x2_t {
  int16x4_t val[2];
 } int16x4x2_t;
 typedef struct int16x8x2_t {
  int16x8_t val[2];
 } int16x8x2_t;
 typedef struct int32x2x2_t {
  int32x2_t val[2];
 } int32x2x2_t;
 typedef struct int32x4x2_t {
  int32x4_t val[2];
 } int32x4x2_t;
 typedef struct int64x1x2_t {
  int64x1_t val[2];
 } int64x1x2_t;
 typedef struct int64x2x2_t {
  int64x2_t val[2];
 } int64x2x2_t;
 typedef struct uint8x8x2_t {
  uint8x8_t val[2];
 } uint8x8x2_t;
 typedef struct uint8x16x2_t {
  uint8x16_t val[2];
 } uint8x16x2_t;
 typedef struct uint16x4x2_t {
  uint16x4_t val[2];
 } uint16x4x2_t;
 typedef struct uint16x8x2_t {
  uint16x8_t val[2];
 } uint16x8x2_t;
 typedef struct uint32x2x2_t {
  uint32x2_t val[2];
 } uint32x2x2_t;
 typedef struct uint32x4x2_t {
  uint32x4_t val[2];
 } uint32x4x2_t;
 typedef struct uint64x1x2_t {
  uint64x1_t val[2];
 } uint64x1x2_t;
 typedef struct uint64x2x2_t {
  uint64x2_t val[2];
 } uint64x2x2_t;
 #if defined(__aarch64__) || defined(__arm64ec__)
 typedef struct mfloat8x8x2_t {
  mfloat8x8_t val[2];
 } mfloat8x8x2_t;
 typedef struct mfloat8x16x2_t {
  mfloat8x16_t val[2];
 } mfloat8x16x2_t;
 #endif
 typedef struct float16x4x2_t {
  float16x4_t val[2];
 } float16x4x2_t;
 typedef struct float16x8x2_t {
  float16x8_t val[2];
 } float16x8x2_t;
 typedef struct float32x2x2_t {
  float32x2_t val[2];
 } float32x2x2_t;
 typedef struct float32x4x2_t {
  float32x4_t val[2];
 } float32x4x2_t;
 #if defined(__aarch64__) || defined(__arm64ec__)
 typedef struct float64x1x2_t {
  float64x1_t val[2];
 } float64x1x2_t;
 typedef struct float64x2x2_t {
  float64x2_t val[2];
 } float64x2x2_t;
 #endif
 typedef struct int8x8x3_t {
  int8x8_t val[3];
 } int8x8x3_t;
 typedef struct int8x16x3_t {
  int8x16_t val[3];
 } int8x16x3_t;
 typedef struct int16x4x3_t {
  int16x4_t val[3];
 } int16x4x3_t;
 typedef struct int16x8x3_t {
  int16x8_t val[3];
 } int16x8x3_t;
 typedef struct int32x2x3_t {
  int32x2_t val[3];
 } int32x2x3_t;
 typedef struct int32x4x3_t {
  int32x4_t val[3];
 } int32x4x3_t;
 typedef struct int64x1x3_t {
  int64x1_t val[3];
 } int64x1x3_t;
 typedef struct int64x2x3_t {
  int64x2_t val[3];
 } int64x2x3_t;
 typedef struct uint8x8x3_t {
  uint8x8_t val[3];
 } uint8x8x3_t;
 typedef struct uint8x16x3_t {
  uint8x16_t val[3];
 } uint8x16x3_t;
 typedef struct uint16x4x3_t {
  uint16x4_t val[3];
 } uint16x4x3_t;
 typedef struct uint16x8x3_t {
  uint16x8_t val[3];
 } uint16x8x3_t;
 typedef struct uint32x2x3_t {
  uint32x2_t val[3];
 } uint32x2x3_t;
 typedef struct uint32x4x3_t {
  uint32x4_t val[3];
 } uint32x4x3_t;
 typedef struct uint64x1x3_t {
  uint64x1_t val[3];
 } uint64x1x3_t;
 typedef struct uint64x2x3_t {
  uint64x2_t val[3];
 } uint64x2x3_t;
 #if defined(__aarch64__) || defined(__arm64ec__)
 typedef struct mfloat8x8x3_t {
  mfloat8x8_t val[3];
 } mfloat8x8x3_t;
 typedef struct mfloat8x16x3_t {
  mfloat8x16_t val[3];
 } mfloat8x16x3_t;
 #endif
 typedef struct float16x4x3_t {
  float16x4_t val[3];
 } float16x4x3_t;
 typedef struct float16x8x3_t {
  float16x8_t val[3];
 } float16x8x3_t;
 typedef struct float32x2x3_t {
  float32x2_t val[3];
 } float32x2x3_t;
 typedef struct float32x4x3_t {
  float32x4_t val[3];
 } float32x4x3_t;
 #if defined(__aarch64__) || defined(__arm64ec__)
 typedef struct float64x1x3_t {
  float64x1_t val[3];
 } float64x1x3_t;
 typedef struct float64x2x3_t {
  float64x2_t val[3];
 } float64x2x3_t;
 #endif
 typedef struct int8x8x4_t {
  int8x8_t val[4];
 } int8x8x4_t;
 typedef struct int8x16x4_t {
  int8x16_t val[4];
 } int8x16x4_t;
 typedef struct int16x4x4_t {
  int16x4_t val[4];
 } int16x4x4_t;
 typedef struct int16x8x4_t {
  int16x8_t val[4];
 } int16x8x4_t;
 typedef struct int32x2x4_t {
  int32x2_t val[4];
 } int32x2x4_t;
 typedef struct int32x4x4_t {
  int32x4_t val[4];
 } int32x4x4_t;
 typedef struct int64x1x4_t {
  int64x1_t val[4];
 } int64x1x4_t;
 typedef struct int64x2x4_t {
  int64x2_t val[4];
 } int64x2x4_t;
 typedef struct uint8x8x4_t {
  uint8x8_t val[4];
 } uint8x8x4_t;
 typedef struct uint8x16x4_t {
  uint8x16_t val[4];
 } uint8x16x4_t;
 typedef struct uint16x4x4_t {
  uint16x4_t val[4];
 } uint16x4x4_t;
 typedef struct uint16x8x4_t {
  uint16x8_t val[4];
 } uint16x8x4_t;
 typedef struct uint32x2x4_t {
  uint32x2_t val[4];
 } uint32x2x4_t;
 typedef struct uint32x4x4_t {
  uint32x4_t val[4];
 } uint32x4x4_t;
 typedef struct uint64x1x4_t {
  uint64x1_t val[4];
 } uint64x1x4_t;
 typedef struct uint64x2x4_t {
  uint64x2_t val[4];
 } uint64x2x4_t;
 #if defined(__aarch64__) || defined(__arm64ec__)
 typedef struct mfloat8x8x4_t {
  mfloat8x8_t val[4];
 } mfloat8x8x4_t;
 typedef struct mfloat8x16x4_t {
  mfloat8x16_t val[4];
 } mfloat8x16x4_t;
 #endif
 typedef struct float16x4x4_t {
  float16x4_t val[4];
 } float16x4x4_t;
 typedef struct float16x8x4_t {
  float16x8_t val[4];
 } float16x8x4_t;
 typedef struct float32x2x4_t {
  float32x2_t val[4];
 } float32x2x4_t;
 typedef struct float32x4x4_t {
  float32x4_t val[4];
 } float32x4x4_t;
 #if defined(__aarch64__) || defined(__arm64ec__)
 typedef struct float64x1x4_t {
  float64x1_t val[4];
 } float64x1x4_t;
 typedef struct float64x2x4_t {
  float64x2_t val[4];
 } float64x2x4_t;
 #endif
 typedef __attribute__((neon_vector_type(4))) bfloat16_t bfloat16x4_t;
 typedef __attribute__((neon_vector_type(8))) bfloat16_t bfloat16x8_t;
 typedef struct bfloat16x4x2_t {
  bfloat16x4_t val[2];
 } bfloat16x4x2_t;
 typedef struct bfloat16x8x2_t {
  bfloat16x8_t val[2];
 } bfloat16x8x2_t;
 typedef struct bfloat16x4x3_t {
  bfloat16x4_t val[3];
 } bfloat16x4x3_t;
 typedef struct bfloat16x8x3_t {
  bfloat16x8_t val[3];
 } bfloat16x8x3_t;
 typedef struct bfloat16x4x4_t {
  bfloat16x4_t val[4];
 } bfloat16x4x4_t;
 typedef struct bfloat16x8x4_t {
  bfloat16x8_t val[4];
 } bfloat16x8x4_t;
 #endif // __ARM_NEON_TYPES_H
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/armintr.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/armintr.h
@@ -1,31 +0,0 @@
 /*===---- armintr.h - ARM Windows intrinsics -------------------------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 /* Only include this if we're compiling for the windows platform. */
 #ifndef _MSC_VER
 #include_next <armintr.h>
 #else
 #ifndef __ARMINTR_H
 #define __ARMINTR_H
 typedef enum
 {
  _ARM_BARRIER_SY    = 0xF,
  _ARM_BARRIER_ST    = 0xE,
  _ARM_BARRIER_ISH   = 0xB,
  _ARM_BARRIER_ISHST = 0xA,
  _ARM_BARRIER_NSH   = 0x7,
  _ARM_BARRIER_NSHST = 0x6,
  _ARM_BARRIER_OSH   = 0x3,
  _ARM_BARRIER_OSHST = 0x2
 } _ARMINTR_BARRIER_TYPE;
 #endif /* __ARMINTR_H */
 #endif /* _MSC_VER */
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/avx10_2_512bf16intrin.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/avx10_2_512bf16intrin.h
@@ -1,561 +0,0 @@
 /*===----------- avx10_2_512bf16intrin.h - AVX10-BF16 intrinsics ---------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef __IMMINTRIN_H
 #error                                                                         \
    "Never use <avx10_2_512bf16intrin.h> directly; include <immintrin.h> instead."
 #endif
 #ifdef __SSE2__
 #ifndef __AVX10_2_512BF16INTRIN_H
 #define __AVX10_2_512BF16INTRIN_H
 /* Define the default attributes for the functions in this file. */
 typedef __bf16 __m512bh_u __attribute__((__vector_size__(64), __aligned__(1)));
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS512                                                  \
  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-512"),    \
                 __min_vector_width__(512)))
 static __inline __m512bh __DEFAULT_FN_ATTRS512 _mm512_setzero_pbh(void) {
  return __builtin_bit_cast(__m512bh, _mm512_setzero_ps());
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_undefined_pbh(void) {
  return (__m512bh)__builtin_ia32_undef512();
 }
 static __inline __m512bh __DEFAULT_FN_ATTRS512 _mm512_set1_pbh(__bf16 bf) {
  return (__m512bh)(__v32bf){bf, bf, bf, bf, bf, bf, bf, bf, bf, bf, bf,
                             bf, bf, bf, bf, bf, bf, bf, bf, bf, bf, bf,
                             bf, bf, bf, bf, bf, bf, bf, bf, bf, bf};
 }
 static __inline __m512bh __DEFAULT_FN_ATTRS512 _mm512_set_pbh(
    __bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4, __bf16 bf5, __bf16 bf6,
    __bf16 bf7, __bf16 bf8, __bf16 bf9, __bf16 bf10, __bf16 bf11, __bf16 bf12,
    __bf16 bf13, __bf16 bf14, __bf16 bf15, __bf16 bf16, __bf16 bf17,
    __bf16 bf18, __bf16 bf19, __bf16 bf20, __bf16 bf21, __bf16 bf22,
    __bf16 bf23, __bf16 bf24, __bf16 bf25, __bf16 bf26, __bf16 bf27,
    __bf16 bf28, __bf16 bf29, __bf16 bf30, __bf16 bf31, __bf16 bf32) {
  return (__m512bh)(__v32bf){bf32, bf31, bf30, bf29, bf28, bf27, bf26, bf25,
                             bf24, bf23, bf22, bf21, bf20, bf19, bf18, bf17,
                             bf16, bf15, bf14, bf13, bf12, bf11, bf10, bf9,
                             bf8,  bf7,  bf6,  bf5,  bf4,  bf3,  bf2,  bf1};
 }
 #define _mm512_setr_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8, bf9, bf10,     \
                        bf11, bf12, bf13, bf14, bf15, bf16, bf17, bf18, bf19,  \
                        bf20, bf21, bf22, bf23, bf24, bf25, bf26, bf27, bf28,  \
                        bf29, bf30, bf31, bf32)                                \
  _mm512_set_pbh((bf32), (bf31), (bf30), (bf29), (bf28), (bf27), (bf26),       \
                 (bf25), (bf24), (bf23), (bf22), (bf21), (bf20), (bf19),       \
                 (bf18), (bf17), (bf16), (bf15), (bf14), (bf13), (bf12),       \
                 (bf11), (bf10), (bf9), (bf8), (bf7), (bf6), (bf5), (bf4),     \
                 (bf3), (bf2), (bf1))
 static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_castbf16_ps(__m512bh __a) {
  return (__m512)__a;
 }
 static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_castbf16_pd(__m512bh __a) {
  return (__m512d)__a;
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_castbf16_si512(__m512bh __a) {
  return (__m512i)__a;
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_castps_pbh(__m512 __a) {
  return (__m512bh)__a;
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_castpd_pbh(__m512d __a) {
  return (__m512bh)__a;
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_castsi512_pbh(__m512i __a) {
  return (__m512bh)__a;
 }
 static __inline__ __m128bh __DEFAULT_FN_ATTRS512
 _mm512_castbf16512_pbh128(__m512bh __a) {
  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7);
 }
 static __inline__ __m256bh __DEFAULT_FN_ATTRS512
 _mm512_castbf16512_pbh256(__m512bh __a) {
  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
                                 12, 13, 14, 15);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_castbf16128_pbh512(__m128bh __a) {
  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1,
                                 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
                                 -1, -1, -1, -1, -1, -1, -1, -1, -1);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_castbf16256_pbh512(__m256bh __a) {
  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
                                 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1,
                                 -1, -1, -1, -1, -1, -1, -1, -1);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_zextbf16128_pbh512(__m128bh __a) {
  return __builtin_shufflevector(
      __a, (__v8bf)_mm_setzero_pbh(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
      13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_zextbf16256_pbh512(__m256bh __a) {
  return __builtin_shufflevector(__a, (__v16bf)_mm256_setzero_pbh(), 0, 1, 2, 3,
                                 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
                                 29, 30, 31);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_abs_pbh(__m512bh __A) {
  return (__m512bh)_mm512_and_epi32(_mm512_set1_epi32(0x7FFF7FFF),
                                    (__m512i)__A);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_load_pbh(void const *__p) {
  return *(const __m512bh *)__p;
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_loadu_pbh(void const *__p) {
  struct __loadu_pbh {
    __m512bh_u __v;
  } __attribute__((__packed__, __may_alias__));
  return ((const struct __loadu_pbh *)__p)->__v;
 }
 static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_store_pbh(void *__P,
                                                              __m512bh __A) {
  *(__m512bh *)__P = __A;
 }
 static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_storeu_pbh(void *__P,
                                                               __m512bh __A) {
  struct __storeu_pbh {
    __m512bh_u __v;
  } __attribute__((__packed__, __may_alias__));
  ((struct __storeu_pbh *)__P)->__v = __A;
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_mask_blend_pbh(__mmask32 __U, __m512bh __A, __m512bh __W) {
  return (__m512bh)__builtin_ia32_selectpbf_512((__mmask32)__U, (__v32bf)__W,
                                                (__v32bf)__A);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_permutex2var_pbh(__m512bh __A, __m512i __I, __m512bh __B) {
  return (__m512bh)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I,
                                                  (__v32hi)__B);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_permutexvar_pbh(__m512i __A, __m512bh __B) {
  return (__m512bh)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_add_pbh(__m512bh __A,
                                                                __m512bh __B) {
  return (__m512bh)((__v32bf)__A + (__v32bf)__B);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_mask_add_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
  return (__m512bh)__builtin_ia32_selectpbf_512(
      (__mmask32)__U, (__v32bf)_mm512_add_pbh(__A, __B), (__v32bf)__W);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_maskz_add_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
  return (__m512bh)__builtin_ia32_selectpbf_512(
      (__mmask32)__U, (__v32bf)_mm512_add_pbh(__A, __B),
      (__v32bf)_mm512_setzero_pbh());
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_sub_pbh(__m512bh __A,
                                                                __m512bh __B) {
  return (__m512bh)((__v32bf)__A - (__v32bf)__B);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_mask_sub_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
  return (__m512bh)__builtin_ia32_selectpbf_512(
      (__mmask32)__U, (__v32bf)_mm512_sub_pbh(__A, __B), (__v32bf)__W);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_maskz_sub_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
  return (__m512bh)__builtin_ia32_selectpbf_512(
      (__mmask32)__U, (__v32bf)_mm512_sub_pbh(__A, __B),
      (__v32bf)_mm512_setzero_pbh());
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mul_pbh(__m512bh __A,
                                                                __m512bh __B) {
  return (__m512bh)((__v32bf)__A * (__v32bf)__B);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_mask_mul_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
  return (__m512bh)__builtin_ia32_selectpbf_512(
      (__mmask32)__U, (__v32bf)_mm512_mul_pbh(__A, __B), (__v32bf)__W);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_maskz_mul_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
  return (__m512bh)__builtin_ia32_selectpbf_512(
      (__mmask32)__U, (__v32bf)_mm512_mul_pbh(__A, __B),
      (__v32bf)_mm512_setzero_pbh());
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_div_pbh(__m512bh __A,
                                                                __m512bh __B) {
  return (__m512bh)((__v32bf)__A / (__v32bf)__B);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_mask_div_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
  return (__m512bh)__builtin_ia32_selectpbf_512(
      (__mmask32)__U, (__v32bf)_mm512_div_pbh(__A, __B), (__v32bf)__W);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_maskz_div_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
  return (__m512bh)__builtin_ia32_selectpbf_512(
      (__mmask32)__U, (__v32bf)_mm512_div_pbh(__A, __B),
      (__v32bf)_mm512_setzero_pbh());
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_max_pbh(__m512bh __A,
                                                                __m512bh __B) {
  return (__m512bh)__builtin_ia32_vmaxbf16512((__v32bf)__A, (__v32bf)__B);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_mask_max_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
  return (__m512bh)__builtin_ia32_selectpbf_512(
      (__mmask32)__U, (__v32bf)_mm512_max_pbh(__A, __B), (__v32bf)__W);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_maskz_max_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
  return (__m512bh)__builtin_ia32_selectpbf_512(
      (__mmask32)__U, (__v32bf)_mm512_max_pbh(__A, __B),
      (__v32bf)_mm512_setzero_pbh());
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_min_pbh(__m512bh __A,
                                                                __m512bh __B) {
  return (__m512bh)__builtin_ia32_vminbf16512((__v32bf)__A, (__v32bf)__B);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_mask_min_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
  return (__m512bh)__builtin_ia32_selectpbf_512(
      (__mmask32)__U, (__v32bf)_mm512_min_pbh(__A, __B), (__v32bf)__W);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_maskz_min_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
  return (__m512bh)__builtin_ia32_selectpbf_512(
      (__mmask32)__U, (__v32bf)_mm512_min_pbh(__A, __B),
      (__v32bf)_mm512_setzero_pbh());
 }
 #define _mm512_cmp_pbh_mask(__A, __B, __P)                                     \
  ((__mmask32)__builtin_ia32_vcmpbf16512_mask((__v32bf)(__m512bh)(__A),        \
                                              (__v32bf)(__m512bh)(__B),        \
                                              (int)(__P), (__mmask32) - 1))
 #define _mm512_mask_cmp_pbh_mask(__U, __A, __B, __P)                           \
  ((__mmask32)__builtin_ia32_vcmpbf16512_mask((__v32bf)(__m512bh)(__A),        \
                                              (__v32bf)(__m512bh)(__B),        \
                                              (int)(__P), (__mmask32)(__U)))
 #define _mm512_mask_fpclass_pbh_mask(__U, __A, imm)                            \
  ((__mmask32)__builtin_ia32_vfpclassbf16512_mask(                             \
      (__v32bf)(__m512bh)(__A), (int)(imm), (__mmask32)(__U)))
 #define _mm512_fpclass_pbh_mask(__A, imm)                                      \
  ((__mmask32)__builtin_ia32_vfpclassbf16512_mask(                             \
      (__v32bf)(__m512bh)(__A), (int)(imm), (__mmask32) - 1))
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_scalef_pbh(__m512bh __A, __m512bh __B) {
  return (__m512bh)__builtin_ia32_vscalefbf16512_mask(
      (__v32bf)__A, (__v32bf)__B, (__v32bf)_mm512_undefined_pbh(),
      (__mmask32)-1);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_pbh(
    __m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
  return (__m512bh)__builtin_ia32_vscalefbf16512_mask(
      (__v32bf)__A, (__v32bf)__B, (__v32bf)__W, (__mmask32)__U);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_maskz_scalef_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
  return (__m512bh)__builtin_ia32_vscalefbf16512_mask(
      (__v32bf)__A, (__v32bf)__B, (__v32bf)_mm512_setzero_pbh(),
      (__mmask32)__U);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_rcp_pbh(__m512bh __A) {
  return (__m512bh)__builtin_ia32_vrcpbf16512_mask(
      (__v32bf)__A, (__v32bf)_mm512_undefined_pbh(), (__mmask32)-1);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_mask_rcp_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
  return (__m512bh)__builtin_ia32_vrcpbf16512_mask((__v32bf)__A, (__v32bf)__W,
                                                   (__mmask32)__U);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_maskz_rcp_pbh(__mmask32 __U, __m512bh __A) {
  return (__m512bh)__builtin_ia32_vrcpbf16512_mask(
      (__v32bf)__A, (__v32bf)_mm512_setzero_pbh(), (__mmask32)__U);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_getexp_pbh(__m512bh __A) {
  return (__m512bh)__builtin_ia32_vgetexpbf16512_mask(
      (__v32bf)__A, (__v32bf)_mm512_undefined_pbh(), (__mmask32)-1);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_mask_getexp_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
  return (__m512bh)__builtin_ia32_vgetexpbf16512_mask(
      (__v32bf)__A, (__v32bf)__W, (__mmask32)__U);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_maskz_getexp_pbh(__mmask32 __U, __m512bh __A) {
  return (__m512bh)__builtin_ia32_vgetexpbf16512_mask(
      (__v32bf)__A, (__v32bf)_mm512_setzero_pbh(), (__mmask32)__U);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_rsqrt_pbh(__m512bh __A) {
  return (__m512bh)__builtin_ia32_vrsqrtbf16512_mask(
      (__v32bf)__A, (__v32bf)_mm512_undefined_pbh(), (__mmask32)-1);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_mask_rsqrt_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
  return (__m512bh)__builtin_ia32_vrsqrtbf16512_mask((__v32bf)__A, (__v32bf)__W,
                                                     (__mmask32)__U);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_maskz_rsqrt_pbh(__mmask32 __U, __m512bh __A) {
  return (__m512bh)__builtin_ia32_vrsqrtbf16512_mask(
      (__v32bf)__A, (__v32bf)_mm512_setzero_pbh(), (__mmask32)__U);
 }
 #define _mm512_reduce_pbh(__A, imm)                                            \
  ((__m512bh)__builtin_ia32_vreducebf16512_mask(                               \
      (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_undefined_pbh(),   \
      (__mmask32) - 1))
 #define _mm512_mask_reduce_pbh(__W, __U, __A, imm)                             \
  ((__m512bh)__builtin_ia32_vreducebf16512_mask(                               \
      (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)(__m512bh)(__W),          \
      (__mmask32)(__U)))
 #define _mm512_maskz_reduce_pbh(__U, __A, imm)                                 \
  ((__m512bh)__builtin_ia32_vreducebf16512_mask(                               \
      (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_setzero_pbh(),     \
      (__mmask32)(__U)))
 #define _mm512_roundscale_pbh(__A, imm)                                        \
  ((__m512bh)__builtin_ia32_vrndscalebf16_mask(                                \
      (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_setzero_pbh(),     \
      (__mmask32) - 1))
 #define _mm512_mask_roundscale_pbh(__W, __U, __A, imm)                         \
  ((__m512bh)__builtin_ia32_vrndscalebf16_mask(                                \
      (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)(__m512bh)(__W),          \
      (__mmask32)(__U)))
 #define _mm512_maskz_roundscale_pbh(__U, __A, imm)                             \
  ((__m512bh)__builtin_ia32_vrndscalebf16_mask(                                \
      (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_setzero_pbh(),     \
      (__mmask32)(__U)))
 #define _mm512_getmant_pbh(__A, __B, __C)                                      \
  ((__m512bh)__builtin_ia32_vgetmantbf16512_mask(                              \
      (__v32bf)(__m512bh)(__A), (int)(((__C) << 2) | (__B)),                   \
      (__v32bf)_mm512_undefined_pbh(), (__mmask32) - 1))
 #define _mm512_mask_getmant_pbh(__W, __U, __A, __B, __C)                       \
  ((__m512bh)__builtin_ia32_vgetmantbf16512_mask(                              \
      (__v32bf)(__m512bh)(__A), (int)(((__C) << 2) | (__B)),                   \
      (__v32bf)(__m512bh)(__W), (__mmask32)(__U)))
 #define _mm512_maskz_getmant_pbh(__U, __A, __B, __C)                           \
  ((__m512bh)__builtin_ia32_vgetmantbf16512_mask(                              \
      (__v32bf)(__m512bh)(__A), (int)(((__C) << 2) | (__B)),                   \
      (__v32bf)_mm512_setzero_pbh(), (__mmask32)(__U)))
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_sqrt_pbh(__m512bh __A) {
  return (__m512bh)__builtin_ia32_vsqrtbf16512((__v32bf)__A);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_mask_sqrt_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
  return (__m512bh)__builtin_ia32_selectpbf_512(
      (__mmask32)__U, (__v32bf)_mm512_sqrt_pbh(__A), (__v32bf)__W);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_maskz_sqrt_pbh(__mmask32 __U, __m512bh __A) {
  return (__m512bh)__builtin_ia32_selectpbf_512((__mmask32)__U,
                                                (__v32bf)_mm512_sqrt_pbh(__A),
                                                (__v32bf)_mm512_setzero_pbh());
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_fmadd_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
  return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, (__v32bf)__B,
                                                (__v32bf)__C);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_mask_fmadd_pbh(__m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) {
  return (__m512bh)__builtin_ia32_selectpbf_512(
      (__mmask32)__U,
      _mm512_fmadd_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), (__v32bf)__A);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fmadd_pbh(
    __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) {
  return (__m512bh)__builtin_ia32_selectpbf_512(
      (__mmask32)__U,
      _mm512_fmadd_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), (__v32bf)__C);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_pbh(
    __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) {
  return (__m512bh)__builtin_ia32_selectpbf_512(
      (__mmask32)__U,
      _mm512_fmadd_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
      (__v32bf)_mm512_setzero_pbh());
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_fmsub_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
  return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, (__v32bf)__B,
                                                -(__v32bf)__C);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_mask_fmsub_pbh(__m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) {
  return (__m512bh)__builtin_ia32_selectpbf_512(
      (__mmask32)__U,
      _mm512_fmsub_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), (__v32bf)__A);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsub_pbh(
    __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) {
  return (__m512bh)__builtin_ia32_selectpbf_512(
      (__mmask32)__U,
      _mm512_fmsub_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), (__v32bf)__C);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_pbh(
    __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) {
  return (__m512bh)__builtin_ia32_selectpbf_512(
      (__mmask32)__U,
      _mm512_fmsub_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
      (__v32bf)_mm512_setzero_pbh());
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_fnmadd_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
  return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, -(__v32bf)__B,
                                                (__v32bf)__C);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_pbh(
    __m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) {
  return (__m512bh)__builtin_ia32_selectpbf_512(
      (__mmask32)__U,
      _mm512_fnmadd_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
      (__v32bf)__A);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmadd_pbh(
    __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) {
  return (__m512bh)__builtin_ia32_selectpbf_512(
      (__mmask32)__U,
      _mm512_fnmadd_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
      (__v32bf)__C);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_pbh(
    __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) {
  return (__m512bh)__builtin_ia32_selectpbf_512(
      (__mmask32)__U,
      _mm512_fnmadd_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
      (__v32bf)_mm512_setzero_pbh());
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_fnmsub_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
  return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, -(__v32bf)__B,
                                                -(__v32bf)__C);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_pbh(
    __m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) {
  return (__m512bh)__builtin_ia32_selectpbf_512(
      (__mmask32)__U,
      _mm512_fnmsub_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
      (__v32bf)__A);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsub_pbh(
    __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) {
  return (__m512bh)__builtin_ia32_selectpbf_512(
      (__mmask32)__U,
      _mm512_fnmsub_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
      (__v32bf)__C);
 }
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_pbh(
    __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) {
  return (__m512bh)__builtin_ia32_selectpbf_512(
      (__mmask32)__U,
      _mm512_fnmsub_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
      (__v32bf)_mm512_setzero_pbh());
 }
 #undef __DEFAULT_FN_ATTRS512
 #endif
 #endif
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/avx10_2_512convertintrin.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/avx10_2_512convertintrin.h
@@ -1,322 +0,0 @@
 /*===--------- avx10_2_512convertintrin.h - AVX10_2_512CONVERT -------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef __IMMINTRIN_H
 #error                                                                         \
    "Never use <avx10_2_512convertintrin.h> directly; include <immintrin.h> instead."
 #endif // __IMMINTRIN_H
 #ifdef __SSE2__
 #ifndef __AVX10_2_512CONVERTINTRIN_H
 #define __AVX10_2_512CONVERTINTRIN_H
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS512                                                  \
  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-512"),    \
                 __min_vector_width__(512)))
 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_cvtx2ps_ph(__m512 __A,
                                                                  __m512 __B) {
  return (__m512h)__builtin_ia32_vcvt2ps2phx512_mask(
      (__v16sf)__A, (__v16sf)__B, (__v32hf)_mm512_setzero_ph(), (__mmask32)(-1),
      _MM_FROUND_CUR_DIRECTION);
 }
 static __inline__ __m512h __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtx2ps_ph(__m512h __W, __mmask32 __U, __m512 __A, __m512 __B) {
  return (__m512h)__builtin_ia32_vcvt2ps2phx512_mask(
      (__v16sf)__A, (__v16sf)__B, (__v32hf)__W, (__mmask32)__U,
      _MM_FROUND_CUR_DIRECTION);
 }
 static __inline__ __m512h __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtx2ps_ph(__mmask32 __U, __m512 __A, __m512 __B) {
  return (__m512h)__builtin_ia32_vcvt2ps2phx512_mask(
      (__v16sf)__A, (__v16sf)__B, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U,
      _MM_FROUND_CUR_DIRECTION);
 }
 #define _mm512_cvtx_round2ps_ph(A, B, R)                                       \
  ((__m512h)__builtin_ia32_vcvt2ps2phx512_mask(                                \
      (__v16sf)(A), (__v16sf)(B), (__v32hf)_mm512_undefined_ph(),              \
      (__mmask32)(-1), (const int)(R)))
 #define _mm512_mask_cvtx_round2ps_ph(W, U, A, B, R)                            \
  ((__m512h)__builtin_ia32_vcvt2ps2phx512_mask((__v16sf)(A), (__v16sf)(B),     \
                                               (__v32hf)(W), (__mmask32)(U),   \
                                               (const int)(R)))
 #define _mm512_maskz_cvtx_round2ps_ph(U, A, B, R)                              \
  ((__m512h)__builtin_ia32_vcvt2ps2phx512_mask(                                \
      (__v16sf)(A), (__v16sf)(B), (__v32hf)_mm512_setzero_ph(),                \
      (__mmask32)(U), (const int)(R)))
 static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_cvtbiasph_bf8(__m512i __A, __m512h __B) {
  return (__m256i)__builtin_ia32_vcvtbiasph2bf8_512_mask(
      (__v64qi)__A, (__v32hf)__B, (__v32qi)_mm256_undefined_si256(),
      (__mmask32)-1);
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtbiasph_bf8(
    __m256i __W, __mmask32 __U, __m512i __A, __m512h __B) {
  return (__m256i)__builtin_ia32_vcvtbiasph2bf8_512_mask(
      (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)__W, (__mmask32)__U);
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtbiasph_bf8(__mmask32 __U, __m512i __A, __m512h __B) {
  return (__m256i)__builtin_ia32_vcvtbiasph2bf8_512_mask(
      (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)_mm256_setzero_si256(),
      (__mmask32)__U);
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_cvts_biasph_bf8(__m512i __A, __m512h __B) {
  return (__m256i)__builtin_ia32_vcvtbiasph2bf8s_512_mask(
      (__v64qi)__A, (__v32hf)__B, (__v32qi)_mm256_undefined_si256(),
      (__mmask32)-1);
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvts_biasph_bf8(
    __m256i __W, __mmask32 __U, __m512i __A, __m512h __B) {
  return (__m256i)__builtin_ia32_vcvtbiasph2bf8s_512_mask(
      (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)__W, (__mmask32)__U);
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvts_biasph_bf8(__mmask32 __U, __m512i __A, __m512h __B) {
  return (__m256i)__builtin_ia32_vcvtbiasph2bf8s_512_mask(
      (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)_mm256_setzero_si256(),
      (__mmask32)__U);
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_cvtbiasph_hf8(__m512i __A, __m512h __B) {
  return (__m256i)__builtin_ia32_vcvtbiasph2hf8_512_mask(
      (__v64qi)__A, (__v32hf)__B, (__v32qi)_mm256_undefined_si256(),
      (__mmask32)-1);
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtbiasph_hf8(
    __m256i __W, __mmask32 __U, __m512i __A, __m512h __B) {
  return (__m256i)__builtin_ia32_vcvtbiasph2hf8_512_mask(
      (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)__W, (__mmask32)__U);
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtbiasph_hf8(__mmask32 __U, __m512i __A, __m512h __B) {
  return (__m256i)__builtin_ia32_vcvtbiasph2hf8_512_mask(
      (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)_mm256_setzero_si256(),
      (__mmask32)__U);
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_cvts_biasph_hf8(__m512i __A, __m512h __B) {
  return (__m256i)__builtin_ia32_vcvtbiasph2hf8s_512_mask(
      (__v64qi)__A, (__v32hf)__B, (__v32qi)_mm256_undefined_si256(),
      (__mmask32)-1);
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvts_biasph_hf8(
    __m256i __W, __mmask32 __U, __m512i __A, __m512h __B) {
  return (__m256i)__builtin_ia32_vcvtbiasph2hf8s_512_mask(
      (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)__W, (__mmask32)__U);
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvts_biasph_hf8(__mmask32 __U, __m512i __A, __m512h __B) {
  return (__m256i)__builtin_ia32_vcvtbiasph2hf8s_512_mask(
      (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)_mm256_setzero_si256(),
      (__mmask32)__U);
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvt2ph_bf8(__m512h __A,
                                                                  __m512h __B) {
  return (__m512i)__builtin_ia32_vcvt2ph2bf8_512((__v32hf)(__A),
                                                 (__v32hf)(__B));
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvt2ph_bf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) {
  return (__m512i)__builtin_ia32_selectb_512(
      (__mmask64)__U, (__v64qi)_mm512_cvt2ph_bf8(__A, __B), (__v64qi)__W);
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvt2ph_bf8(__mmask64 __U, __m512h __A, __m512h __B) {
  return (__m512i)__builtin_ia32_selectb_512(
      (__mmask64)__U, (__v64qi)_mm512_cvt2ph_bf8(__A, __B),
      (__v64qi)(__m512i)_mm512_setzero_si512());
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_cvts_2ph_bf8(__m512h __A, __m512h __B) {
  return (__m512i)__builtin_ia32_vcvt2ph2bf8s_512((__v32hf)(__A),
                                                  (__v32hf)(__B));
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvts_2ph_bf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) {
  return (__m512i)__builtin_ia32_selectb_512(
      (__mmask64)__U, (__v64qi)_mm512_cvts_2ph_bf8(__A, __B), (__v64qi)__W);
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvts_2ph_bf8(__mmask64 __U, __m512h __A, __m512h __B) {
  return (__m512i)__builtin_ia32_selectb_512(
      (__mmask64)__U, (__v64qi)_mm512_cvts_2ph_bf8(__A, __B),
      (__v64qi)(__m512i)_mm512_setzero_si512());
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvt2ph_hf8(__m512h __A,
                                                                  __m512h __B) {
  return (__m512i)__builtin_ia32_vcvt2ph2hf8_512((__v32hf)(__A),
                                                 (__v32hf)(__B));
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvt2ph_hf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) {
  return (__m512i)__builtin_ia32_selectb_512(
      (__mmask64)__U, (__v64qi)_mm512_cvt2ph_hf8(__A, __B), (__v64qi)__W);
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvt2ph_hf8(__mmask64 __U, __m512h __A, __m512h __B) {
  return (__m512i)__builtin_ia32_selectb_512(
      (__mmask64)__U, (__v64qi)_mm512_cvt2ph_hf8(__A, __B),
      (__v64qi)(__m512i)_mm512_setzero_si512());
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_cvts_2ph_hf8(__m512h __A, __m512h __B) {
  return (__m512i)__builtin_ia32_vcvt2ph2hf8s_512((__v32hf)(__A),
                                                  (__v32hf)(__B));
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvts_2ph_hf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) {
  return (__m512i)__builtin_ia32_selectb_512(
      (__mmask64)__U, (__v64qi)_mm512_cvts_2ph_hf8(__A, __B), (__v64qi)__W);
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvts_2ph_hf8(__mmask64 __U, __m512h __A, __m512h __B) {
  return (__m512i)__builtin_ia32_selectb_512(
      (__mmask64)__U, (__v64qi)_mm512_cvts_2ph_hf8(__A, __B),
      (__v64qi)(__m512i)_mm512_setzero_si512());
 }
 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_cvthf8_ph(__m256i __A) {
  return (__m512h)__builtin_ia32_vcvthf8_2ph512_mask(
      (__v32qi)__A, (__v32hf)(__m512h)_mm512_undefined_ph(), (__mmask32)-1);
 }
 static __inline__ __m512h __DEFAULT_FN_ATTRS512
 _mm512_mask_cvthf8_ph(__m512h __W, __mmask32 __U, __m256i __A) {
  return (__m512h)__builtin_ia32_vcvthf8_2ph512_mask(
      (__v32qi)__A, (__v32hf)(__m512h)__W, (__mmask32)__U);
 }
 static __inline__ __m512h __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvthf8_ph(__mmask32 __U, __m256i __A) {
  return (__m512h)__builtin_ia32_vcvthf8_2ph512_mask(
      (__v32qi)__A, (__v32hf)(__m512h)_mm512_setzero_ph(), (__mmask32)__U);
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtph_bf8(__m512h __A) {
  return (__m256i)__builtin_ia32_vcvtph2bf8_512_mask(
      (__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1);
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtph_bf8(__m256i __W, __mmask32 __U, __m512h __A) {
  return (__m256i)__builtin_ia32_vcvtph2bf8_512_mask(
      (__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U);
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtph_bf8(__mmask32 __U, __m512h __A) {
  return (__m256i)__builtin_ia32_vcvtph2bf8_512_mask(
      (__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U);
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_cvts_ph_bf8(__m512h __A) {
  return (__m256i)__builtin_ia32_vcvtph2bf8s_512_mask(
      (__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1);
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvts_ph_bf8(__m256i __W, __mmask32 __U, __m512h __A) {
  return (__m256i)__builtin_ia32_vcvtph2bf8s_512_mask(
      (__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U);
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvts_ph_bf8(__mmask32 __U, __m512h __A) {
  return (__m256i)__builtin_ia32_vcvtph2bf8s_512_mask(
      (__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U);
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtph_hf8(__m512h __A) {
  return (__m256i)__builtin_ia32_vcvtph2hf8_512_mask(
      (__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1);
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtph_hf8(__m256i __W, __mmask32 __U, __m512h __A) {
  return (__m256i)__builtin_ia32_vcvtph2hf8_512_mask(
      (__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U);
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtph_hf8(__mmask32 __U, __m512h __A) {
  return (__m256i)__builtin_ia32_vcvtph2hf8_512_mask(
      (__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U);
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_cvts_ph_hf8(__m512h __A) {
  return (__m256i)__builtin_ia32_vcvtph2hf8s_512_mask(
      (__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1);
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvts_ph_hf8(__m256i __W, __mmask32 __U, __m512h __A) {
  return (__m256i)__builtin_ia32_vcvtph2hf8s_512_mask(
      (__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U);
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvts_ph_hf8(__mmask32 __U, __m512h __A) {
  return (__m256i)__builtin_ia32_vcvtph2hf8s_512_mask(
      (__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U);
 }
 static __inline __m512h __DEFAULT_FN_ATTRS512 _mm512_cvtbf8_ph(__m256i __A) {
  return _mm512_castsi512_ph(_mm512_slli_epi16(_mm512_cvtepi8_epi16(__A), 8));
 }
 static __inline __m512h __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtbf8_ph(__m512h __S, __mmask32 __U, __m256i __A) {
  return _mm512_castsi512_ph(
      _mm512_mask_slli_epi16((__m512i)__S, __U, _mm512_cvtepi8_epi16(__A), 8));
 }
 static __inline __m512h __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtbf8_ph(__mmask32 __U, __m256i __A) {
  return _mm512_castsi512_ph(
      _mm512_slli_epi16(_mm512_maskz_cvtepi8_epi16(__U, __A), 8));
 }
 #undef __DEFAULT_FN_ATTRS512
 #endif // __AVX10_2_512CONVERTINTRIN_H
 #endif // __SSE2__
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/avx10_2_512minmaxintrin.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/avx10_2_512minmaxintrin.h
@@ -1,127 +0,0 @@
 /*===---- avx10_2_512minmaxintrin.h - AVX10_2_512MINMAX intrinsics ---------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef __IMMINTRIN_H
 #error                                                                         \
    "Never use <avx10_2_512minmaxintrin.h> directly; include <immintrin.h> instead."
 #endif // __IMMINTRIN_H
 #ifndef __AVX10_2_512MINMAXINTRIN_H
 #define __AVX10_2_512MINMAXINTRIN_H
 #define _mm512_minmax_pbh(A, B, C)                                             \
  ((__m512bh)__builtin_ia32_vminmaxbf16512((__v32bf)(__m512bh)(A),             \
                                           (__v32bf)(__m512bh)(A), (int)(C)))
 #define _mm512_mask_minmax_pbh(W, U, A, B, C)                                  \
  ((__m512bh)__builtin_ia32_selectpbf_512(                                     \
      (__mmask32)(U),                                                          \
      (__v32bf)_mm512_minmax_pbh((__v32bf)(__m512bh)(A),                       \
                                 (__v32bf)(__m512bh)(B), (int)(C)),            \
      (__v32bf)(__m512bh)(W)))
 #define _mm512_maskz_minmax_pbh(U, A, B, C)                                    \
  ((__m512bh)__builtin_ia32_selectpbf_512(                                     \
      (__mmask32)(U),                                                          \
      (__v32bf)_mm512_minmax_pbh((__v32bf)(__m512bh)(A),                       \
                                 (__v32bf)(__m512bh)(B), (int)(C)),            \
      (__v32bf) __builtin_bit_cast(__m512bh, _mm512_setzero_ps())))
 #define _mm512_minmax_pd(A, B, C)                                              \
  ((__m512d)__builtin_ia32_vminmaxpd512_round_mask(                            \
      (__v8df)(__m512d)(A), (__v8df)(__m512d)(B), (int)(C),                    \
      (__v8df)_mm512_undefined_pd(), (__mmask8)-1,                             \
      _MM_FROUND_CUR_DIRECTION))
 #define _mm512_mask_minmax_pd(W, U, A, B, C)                                   \
  ((__m512d)__builtin_ia32_vminmaxpd512_round_mask(                            \
      (__v8df)(__m512d)(A), (__v8df)(__m512d)(B), (int)(C),                    \
      (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
 #define _mm512_maskz_minmax_pd(U, A, B, C)                                     \
  ((__m512d)__builtin_ia32_vminmaxpd512_round_mask(                            \
      (__v8df)(__m512d)(A), (__v8df)(__m512d)(B), (int)(C),                    \
      (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
 #define _mm512_minmax_round_pd(A, B, C, R)                                     \
  ((__m512d)__builtin_ia32_vminmaxpd512_round_mask(                            \
      (__v8df)(__m512d)(A), (__v8df)(__m512d)(B), (int)(C),                    \
      (__v8df)_mm512_undefined_pd(), (__mmask8)-1, (int)(R)))
 #define _mm512_mask_minmax_round_pd(W, U, A, B, C, R)                          \
  ((__m512d)__builtin_ia32_vminmaxpd512_round_mask(                            \
      (__v8df)(__m512d)(A), (__v8df)(__m512d)(B), (int)(C),                    \
      (__v8df)(__m512d)(W), (__mmask8)(U), (int)(R)))
 #define _mm512_maskz_minmax_round_pd(U, A, B, C, R)                            \
  ((__m512d)__builtin_ia32_vminmaxpd512_round_mask(                            \
      (__v8df)(__m512d)(A), (__v8df)(__m512d)(B), (int)(C),                    \
      (__v8df)_mm512_setzero_pd(), (__mmask8)(U), (int)(R)))
 #define _mm512_minmax_ph(A, B, C)                                              \
  ((__m512h)__builtin_ia32_vminmaxph512_round_mask(                            \
      (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (int)(C),                  \
      (__v32hf)_mm512_undefined_ph(), (__mmask32)-1,                           \
      _MM_FROUND_CUR_DIRECTION))
 #define _mm512_mask_minmax_ph(W, U, A, B, C)                                   \
  ((__m512h)__builtin_ia32_vminmaxph512_round_mask(                            \
      (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (int)(C),                  \
      (__v32hf)(__m512h)(W), (__mmask32)(U), _MM_FROUND_CUR_DIRECTION))
 #define _mm512_maskz_minmax_ph(U, A, B, C)                                     \
  ((__m512h)__builtin_ia32_vminmaxph512_round_mask(                            \
      (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (int)(C),                  \
      (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), _MM_FROUND_CUR_DIRECTION))
 #define _mm512_minmax_round_ph(A, B, C, R)                                     \
  ((__m512h)__builtin_ia32_vminmaxph512_round_mask(                            \
      (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (int)(C),                  \
      (__v32hf)_mm512_undefined_ph(), (__mmask32)-1, (int)(R)))
 #define _mm512_mask_minmax_round_ph(W, U, A, B, C, R)                          \
  ((__m512h)__builtin_ia32_vminmaxph512_round_mask(                            \
      (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (int)(C),                  \
      (__v32hf)(__m512h)(W), (__mmask32)(U), (int)(R)))
 #define _mm512_maskz_minmax_round_ph(U, A, B, C, R)                            \
  ((__m512h)__builtin_ia32_vminmaxph512_round_mask(                            \
      (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (int)(C),                  \
      (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R)))
 #define _mm512_minmax_ps(A, B, C)                                              \
  ((__m512)__builtin_ia32_vminmaxps512_round_mask(                             \
      (__v16sf)(__m512)(A), (__v16sf)(__m512)(B), (int)(C),                    \
      (__v16sf)_mm512_undefined_ps(), (__mmask16)-1,                           \
      _MM_FROUND_CUR_DIRECTION))
 #define _mm512_mask_minmax_ps(W, U, A, B, C)                                   \
  ((__m512)__builtin_ia32_vminmaxps512_round_mask(                             \
      (__v16sf)(__m512)(A), (__v16sf)(__m512)(B), (int)(C), (__v16sf)(W),      \
      (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
 #define _mm512_maskz_minmax_ps(U, A, B, C)                                     \
  ((__m512)__builtin_ia32_vminmaxps512_round_mask(                             \
      (__v16sf)(__m512)(A), (__v16sf)(__m512)(B), (int)(C),                    \
      (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
 #define _mm512_minmax_round_ps(A, B, C, R)                                     \
  ((__m512)__builtin_ia32_vminmaxps512_round_mask(                             \
      (__v16sf)(__m512)(A), (__v16sf)(__m512)(B), (int)(C),                    \
      (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, (int)(R)))
 #define _mm512_mask_minmax_round_ps(W, U, A, B, C, R)                          \
  ((__m512)__builtin_ia32_vminmaxps512_round_mask(                             \
      (__v16sf)(__m512)(A), (__v16sf)(__m512)(B), (int)(C), (__v16sf)(W),      \
      (__mmask16)(U), (int)(R)))
 #define _mm512_maskz_minmax_round_ps(U, A, B, C, R)                            \
  ((__m512)__builtin_ia32_vminmaxps512_round_mask(                             \
      (__v16sf)(__m512)(A), (__v16sf)(__m512)(B), (int)(C),                    \
      (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), (int)(R)))
 #endif // __AVX10_2_512MINMAXINTRIN_H
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/avx10_2_512niintrin.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/avx10_2_512niintrin.h
@@ -1,314 +0,0 @@
 /*===---- avx10_2_512niintrin.h - AVX10.2-512 new instruction intrinsics ---===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef __IMMINTRIN_H
 #error                                                                         \
    "Never use <avx10_2_512niintrin.h> directly; include <immintrin.h> instead."
 #endif
 #ifdef __SSE2__
 #ifndef __AVX10_2_512NIINTRIN_H
 #define __AVX10_2_512NIINTRIN_H
 #define __DEFAULT_FN_ATTRS                                                     \
  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-512"),    \
                 __min_vector_width__(512)))
 /* VNNI FP16 */
 static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_dpph_ps(__m512 __W,
                                                           __m512h __A,
                                                           __m512h __B) {
  return (__m512)__builtin_ia32_vdpphps512((__v16sf)__W, (__v32hf)__A,
                                           (__v32hf)__B);
 }
 static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_dpph_ps(__m512 __W,
                                                                __mmask16 __U,
                                                                __m512h __A,
                                                                __m512h __B) {
  return (__m512)__builtin_ia32_selectps_512(
      (__mmask16)__U, (__v16sf)_mm512_dpph_ps(__W, __A, __B), (__v16sf)__W);
 }
 static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_dpph_ps(__mmask16 __U,
                                                                 __m512 __W,
                                                                 __m512h __A,
                                                                 __m512h __B) {
  return (__m512)__builtin_ia32_selectps_512(
      (__mmask16)__U, (__v16sf)_mm512_dpph_ps(__W, __A, __B),
      (__v16sf)_mm512_setzero_ps());
 }
 /* VMPSADBW */
 #define _mm512_mpsadbw_epu8(A, B, imm)                                         \
  ((__m512i)__builtin_ia32_mpsadbw512((__v64qi)(__m512i)(A),                   \
                                      (__v64qi)(__m512i)(B), (int)(imm)))
 #define _mm512_mask_mpsadbw_epu8(W, U, A, B, imm)                              \
  ((__m512i)__builtin_ia32_selectw_512(                                        \
      (__mmask32)(U), (__v32hi)_mm512_mpsadbw_epu8((A), (B), (imm)),           \
      (__v32hi)(__m512i)(W)))
 #define _mm512_maskz_mpsadbw_epu8(U, A, B, imm)                                \
  ((__m512i)__builtin_ia32_selectw_512(                                        \
      (__mmask32)(U), (__v32hi)_mm512_mpsadbw_epu8((A), (B), (imm)),           \
      (__v32hi)_mm512_setzero_si512()))
 /* VNNI INT8 */
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbssd_epi32(__m512i __W,
                                                                 __m512i __A,
                                                                 __m512i __B) {
  return (__m512i)__builtin_ia32_vpdpbssd512((__v16si)__W, (__v16si)__A,
                                             (__v16si)__B);
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_dpbssd_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
  return (__m512i)__builtin_ia32_selectd_512(
      __U, (__v16si)_mm512_dpbssd_epi32(__W, __A, __B), (__v16si)__W);
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbssd_epi32(
    __mmask16 __U, __m512i __W, __m512i __A, __m512i __B) {
  return (__m512i)__builtin_ia32_selectd_512(
      __U, (__v16si)_mm512_dpbssd_epi32(__W, __A, __B),
      (__v16si)_mm512_setzero_si512());
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbssds_epi32(__m512i __W,
                                                                  __m512i __A,
                                                                  __m512i __B) {
  return (__m512i)__builtin_ia32_vpdpbssds512((__v16si)__W, (__v16si)__A,
                                              (__v16si)__B);
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpbssds_epi32(
    __m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
  return (__m512i)__builtin_ia32_selectd_512(
      __U, (__v16si)_mm512_dpbssds_epi32(__W, __A, __B), (__v16si)__W);
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbssds_epi32(
    __mmask16 __U, __m512i __W, __m512i __A, __m512i __B) {
  return (__m512i)__builtin_ia32_selectd_512(
      __U, (__v16si)_mm512_dpbssds_epi32(__W, __A, __B),
      (__v16si)_mm512_setzero_si512());
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbsud_epi32(__m512i __W,
                                                                 __m512i __A,
                                                                 __m512i __B) {
  return (__m512i)__builtin_ia32_vpdpbsud512((__v16si)__W, (__v16si)__A,
                                             (__v16si)__B);
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_dpbsud_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
  return (__m512i)__builtin_ia32_selectd_512(
      __U, (__v16si)_mm512_dpbsud_epi32(__W, __A, __B), (__v16si)__W);
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbsud_epi32(
    __mmask16 __U, __m512i __W, __m512i __A, __m512i __B) {
  return (__m512i)__builtin_ia32_selectd_512(
      __U, (__v16si)_mm512_dpbsud_epi32(__W, __A, __B),
      (__v16si)_mm512_setzero_si512());
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbsuds_epi32(__m512i __W,
                                                                  __m512i __A,
                                                                  __m512i __B) {
  return (__m512i)__builtin_ia32_vpdpbsuds512((__v16si)__W, (__v16si)__A,
                                              (__v16si)__B);
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpbsuds_epi32(
    __m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
  return (__m512i)__builtin_ia32_selectd_512(
      __U, (__v16si)_mm512_dpbsuds_epi32(__W, __A, __B), (__v16si)__W);
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbsuds_epi32(
    __mmask16 __U, __m512i __W, __m512i __A, __m512i __B) {
  return (__m512i)__builtin_ia32_selectd_512(
      __U, (__v16si)_mm512_dpbsuds_epi32(__W, __A, __B),
      (__v16si)_mm512_setzero_si512());
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbuud_epi32(__m512i __W,
                                                                 __m512i __A,
                                                                 __m512i __B) {
  return (__m512i)__builtin_ia32_vpdpbuud512((__v16si)__W, (__v16si)__A,
                                             (__v16si)__B);
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_dpbuud_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
  return (__m512i)__builtin_ia32_selectd_512(
      __U, (__v16si)_mm512_dpbuud_epi32(__W, __A, __B), (__v16si)__W);
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbuud_epi32(
    __mmask16 __U, __m512i __W, __m512i __A, __m512i __B) {
  return (__m512i)__builtin_ia32_selectd_512(
      __U, (__v16si)_mm512_dpbuud_epi32(__W, __A, __B),
      (__v16si)_mm512_setzero_si512());
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbuuds_epi32(__m512i __W,
                                                                  __m512i __A,
                                                                  __m512i __B) {
  return (__m512i)__builtin_ia32_vpdpbuuds512((__v16si)__W, (__v16si)__A,
                                              (__v16si)__B);
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpbuuds_epi32(
    __m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
  return (__m512i)__builtin_ia32_selectd_512(
      __U, (__v16si)_mm512_dpbuuds_epi32(__W, __A, __B), (__v16si)__W);
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbuuds_epi32(
    __mmask16 __U, __m512i __W, __m512i __A, __m512i __B) {
  return (__m512i)__builtin_ia32_selectd_512(
      __U, (__v16si)_mm512_dpbuuds_epi32(__W, __A, __B),
      (__v16si)_mm512_setzero_si512());
 }
 /* VNNI INT16 */
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwsud_epi32(__m512i __A,
                                                                 __m512i __B,
                                                                 __m512i __C) {
  return (__m512i)__builtin_ia32_vpdpwsud512((__v16si)__A, (__v16si)__B,
                                             (__v16si)__C);
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_dpwsud_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
  return (__m512i)__builtin_ia32_selectd_512(
      (__mmask16)__U, (__v16si)_mm512_dpwsud_epi32(__A, __B, __C),
      (__v16si)__A);
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwsud_epi32(
    __mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
  return (__m512i)__builtin_ia32_selectd_512(
      (__mmask16)__U, (__v16si)_mm512_dpwsud_epi32(__A, __B, __C),
      (__v16si)_mm512_setzero_si512());
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwsuds_epi32(__m512i __A,
                                                                  __m512i __B,
                                                                  __m512i __C) {
  return (__m512i)__builtin_ia32_vpdpwsuds512((__v16si)__A, (__v16si)__B,
                                              (__v16si)__C);
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwsuds_epi32(
    __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
  return (__m512i)__builtin_ia32_selectd_512(
      (__mmask16)__U, (__v16si)_mm512_dpwsuds_epi32(__A, __B, __C),
      (__v16si)__A);
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwsuds_epi32(
    __mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
  return (__m512i)__builtin_ia32_selectd_512(
      (__mmask16)__U, (__v16si)_mm512_dpwsuds_epi32(__A, __B, __C),
      (__v16si)_mm512_setzero_si512());
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwusd_epi32(__m512i __A,
                                                                 __m512i __B,
                                                                 __m512i __C) {
  return (__m512i)__builtin_ia32_vpdpwusd512((__v16si)__A, (__v16si)__B,
                                             (__v16si)__C);
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_dpwusd_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
  return (__m512i)__builtin_ia32_selectd_512(
      (__mmask16)__U, (__v16si)_mm512_dpwusd_epi32(__A, __B, __C),
      (__v16si)__A);
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwusd_epi32(
    __mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
  return (__m512i)__builtin_ia32_selectd_512(
      (__mmask16)__U, (__v16si)_mm512_dpwusd_epi32(__A, __B, __C),
      (__v16si)_mm512_setzero_si512());
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwusds_epi32(__m512i __A,
                                                                  __m512i __B,
                                                                  __m512i __C) {
  return (__m512i)__builtin_ia32_vpdpwusds512((__v16si)__A, (__v16si)__B,
                                              (__v16si)__C);
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwusds_epi32(
    __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
  return (__m512i)__builtin_ia32_selectd_512(
      (__mmask16)__U, (__v16si)_mm512_dpwusds_epi32(__A, __B, __C),
      (__v16si)__A);
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwusds_epi32(
    __mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
  return (__m512i)__builtin_ia32_selectd_512(
      (__mmask16)__U, (__v16si)_mm512_dpwusds_epi32(__A, __B, __C),
      (__v16si)_mm512_setzero_si512());
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwuud_epi32(__m512i __A,
                                                                 __m512i __B,
                                                                 __m512i __C) {
  return (__m512i)__builtin_ia32_vpdpwuud512((__v16si)__A, (__v16si)__B,
                                             (__v16si)__C);
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_dpwuud_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
  return (__m512i)__builtin_ia32_selectd_512(
      (__mmask16)__U, (__v16si)_mm512_dpwuud_epi32(__A, __B, __C),
      (__v16si)__A);
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwuud_epi32(
    __mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
  return (__m512i)__builtin_ia32_selectd_512(
      (__mmask16)__U, (__v16si)_mm512_dpwuud_epi32(__A, __B, __C),
      (__v16si)_mm512_setzero_si512());
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwuuds_epi32(__m512i __A,
                                                                  __m512i __B,
                                                                  __m512i __C) {
  return (__m512i)__builtin_ia32_vpdpwuuds512((__v16si)__A, (__v16si)__B,
                                              (__v16si)__C);
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwuuds_epi32(
    __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
  return (__m512i)__builtin_ia32_selectd_512(
      (__mmask16)__U, (__v16si)_mm512_dpwuuds_epi32(__A, __B, __C),
      (__v16si)__A);
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwuuds_epi32(
    __mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
  return (__m512i)__builtin_ia32_selectd_512(
      (__mmask16)__U, (__v16si)_mm512_dpwuuds_epi32(__A, __B, __C),
      (__v16si)_mm512_setzero_si512());
 }
 #undef __DEFAULT_FN_ATTRS
 #endif /* __SSE2__ */
 #endif /* __AVX10_2_512NIINTRIN_H */
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/avx10_2_512satcvtdsintrin.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/avx10_2_512satcvtdsintrin.h
@@ -1,307 +0,0 @@
 /*===----- avx10_2_512satcvtdsintrin.h - AVX10_2_512SATCVTDS intrinsics ----===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef __IMMINTRIN_H
 #error                                                                         \
    "Never use <avx10_2_512satcvtdsintrin.h> directly; include <immintrin.h> instead."
 #endif
 #ifndef __AVX10_2_512SATCVTDSINTRIN_H
 #define __AVX10_2_512SATCVTDSINTRIN_H
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS                                                     \
  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-512"),    \
                 __min_vector_width__(512)))
 // 512 bit : Double -> Int
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm512_cvtts_pd_epi32(__m512d __A) {
  return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(
      (__v8df)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
      _MM_FROUND_CUR_DIRECTION));
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm512_mask_cvtts_pd_epi32(__m256i __W, __mmask8 __U, __m512d __A) {
  return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(
      (__v8df)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION));
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm512_maskz_cvtts_pd_epi32(__mmask8 __U, __m512d __A) {
  return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(
      (__v8df)__A, (__v8si)_mm256_setzero_si256(), __U,
      _MM_FROUND_CUR_DIRECTION));
 }
 #define _mm512_cvtts_roundpd_epi32(__A, __R)                                   \
  ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(                          \
      (__v8df)(__m512d)(__A), (__v8si)_mm256_undefined_si256(),                \
      (__mmask8) - 1, (const int)(__R)))
 #define _mm512_mask_cvtts_roundpd_epi32(__W, __U, __A, __R)                    \
  ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(                          \
      (__v8df)(__m512d)(__A), (__v8si)(__m256i)(__W), (__mmask8)(__U),         \
      (const int)(__R)))
 #define _mm512_maskz_cvtts_roundpd_epi32(__U, __A, __R)                        \
  ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(                          \
      (__v8df)(__m512d)(__A), (__v8si)_mm256_setzero_si256(), (__mmask8)(__U), \
      (const int)(__R)))
 // 512 bit : Double -> uInt
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm512_cvtts_pd_epu32(__m512d __A) {
  return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(
      (__v8df)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
      _MM_FROUND_CUR_DIRECTION));
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm512_mask_cvtts_pd_epu32(__m256i __W, __mmask8 __U, __m512d __A) {
  return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(
      (__v8df)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION));
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm512_maskz_cvtts_pd_epu32(__mmask8 __U, __m512d __A) {
  return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(
      (__v8df)__A, (__v8si)_mm256_setzero_si256(), __U,
      _MM_FROUND_CUR_DIRECTION));
 }
 #define _mm512_cvtts_roundpd_epu32(__A, __R)                                   \
  ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(                         \
      (__v8df)(__m512d)(__A), (__v8si)_mm256_undefined_si256(),                \
      (__mmask8) - 1, (const int)(__R)))
 #define _mm512_mask_cvtts_roundpd_epu32(__W, __U, __A, __R)                    \
  ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(                         \
      (__v8df)(__m512d)(__A), (__v8si)(__m256i)(__W), (__mmask8)(__U),         \
      (const int)(__R)))
 #define _mm512_maskz_cvtts_roundpd_epu32(__U, __A, __R)                        \
  ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(                         \
      (__v8df)(__m512d)(__A), (__v8si)_mm256_setzero_si256(), (__mmask8)(__U), \
      (const int)(__R)))
 //  512 bit : Double -> Long
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_cvtts_pd_epi64(__m512d __A) {
  return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(
      (__v8df)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
      _MM_FROUND_CUR_DIRECTION));
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_cvtts_pd_epi64(__m512i __W, __mmask8 __U, __m512d __A) {
  return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(
      (__v8df)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_cvtts_pd_epi64(__mmask8 __U, __m512d __A) {
  return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(
      (__v8df)__A, (__v8di)_mm512_setzero_si512(), __U,
      _MM_FROUND_CUR_DIRECTION));
 }
 #define _mm512_cvtts_roundpd_epi64(__A, __R)                                   \
  ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(                          \
      (__v8df)(__m512d)(__A), (__v8di)_mm512_undefined_epi32(),                \
      (__mmask8) - 1, (const int)(__R)))
 #define _mm512_mask_cvtts_roundpd_epi64(__W, __U, __A, __R)                    \
  ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(                          \
      (__v8df)(__m512d)(__A), (__v8di)(__m512i)(__W), (__mmask8)(__U),         \
      (const int)(__R)))
 #define _mm512_maskz_cvtts_roundpd_epi64(__U, __A, __R)                        \
  ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(                          \
      (__v8df)(__m512d)(__A), (__v8di)_mm512_setzero_si512(), (__mmask8)(__U), \
      (const int)(__R)))
 // 512 bit : Double -> ULong
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_cvtts_pd_epu64(__m512d __A) {
  return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(
      (__v8df)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
      _MM_FROUND_CUR_DIRECTION));
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_cvtts_pd_epu64(__m512i __W, __mmask8 __U, __m512d __A) {
  return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(
      (__v8df)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_cvtts_pd_epu64(__mmask8 __U, __m512d __A) {
  return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(
      (__v8df)__A, (__v8di)_mm512_setzero_si512(), __U,
      _MM_FROUND_CUR_DIRECTION));
 }
 #define _mm512_cvtts_roundpd_epu64(__A, __R)                                   \
  ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(                         \
      (__v8df)(__m512d)(__A), (__v8di)_mm512_undefined_epi32(),                \
      (__mmask8) - 1, (const int)(__R)))
 #define _mm512_mask_cvtts_roundpd_epu64(__W, __U, __A, __R)                    \
  ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(                         \
      (__v8df)(__m512d)(__A), (__v8di)(__m512i)(__W), (__mmask8)(__U),         \
      (const int)(__R)))
 #define _mm512_maskz_cvtts_roundpd_epu64(__U, __A, __R)                        \
  ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(                         \
      (__v8df)(__m512d)(__A), (__v8di)_mm512_setzero_si512(), (__mmask8)(__U), \
      (const int)(__R)))
 // 512 bit: Float -> int
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtts_ps_epi32(__m512 __A) {
  return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
      (__v16sf)(__A), (__v16si)_mm512_undefined_epi32(), (__mmask16)-1,
      _MM_FROUND_CUR_DIRECTION));
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_cvtts_ps_epi32(__m512i __W, __mmask16 __U, __m512 __A) {
  return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
      (__v16sf)(__A), (__v16si)(__W), __U, _MM_FROUND_CUR_DIRECTION));
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_cvtts_ps_epi32(__mmask16 __U, __m512 __A) {
  return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
      (__v16sf)(__A), (__v16si)_mm512_setzero_si512(), __U,
      _MM_FROUND_CUR_DIRECTION));
 }
 #define _mm512_cvtts_roundps_epi32(__A, __R)                                   \
  ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(                          \
      (__v16sf)(__m512)(__A), (__v16si)_mm512_undefined_epi32(),               \
      (__mmask16) - 1, (const int)(__R)))
 #define _mm512_mask_cvtts_roundps_epi32(__W, __U, __A, __R)                    \
  ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(                          \
      (__v16sf)(__m512)(__A), (__v16si)(__m512i)(__W), (__mmask16)(__U),       \
      (const int)(__R)))
 #define _mm512_maskz_cvtts_roundps_epi32(__U, __A, __R)                        \
  ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(                          \
      (__v16sf)(__m512)(__A), (__v16si)_mm512_setzero_si512(),                 \
      (__mmask16)(__U), (const int)(__R)))
 // 512 bit: Float -> uint
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtts_ps_epu32(__m512 __A) {
  return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(
      (__v16sf)(__A), (__v16si)_mm512_undefined_epi32(), (__mmask16)-1,
      _MM_FROUND_CUR_DIRECTION));
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_cvtts_ps_epu32(__m512i __W, __mmask16 __U, __m512 __A) {
  return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(
      (__v16sf)(__A), (__v16si)(__W), __U, _MM_FROUND_CUR_DIRECTION));
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_cvtts_ps_epu32(__mmask16 __U, __m512 __A) {
  return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(
      (__v16sf)(__A), (__v16si)_mm512_setzero_si512(), __U,
      _MM_FROUND_CUR_DIRECTION));
 }
 #define _mm512_cvtts_roundps_epu32(__A, __R)                                   \
  ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(                         \
      (__v16sf)(__m512)(__A), (__v16si)_mm512_undefined_epi32(),               \
      (__mmask16) - 1, (const int)(__R)))
 #define _mm512_mask_cvtts_roundps_epu32(__W, __U, __A, __R)                    \
  ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(                         \
      (__v16sf)(__m512)(__A), (__v16si)(__m512i)(__W), (__mmask16)(__U),       \
      (const int)(__R)))
 #define _mm512_maskz_cvtts_roundps_epu32(__U, __A, __R)                        \
  ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(                         \
      (__v16sf)(__m512)(__A), (__v16si)_mm512_setzero_si512(),                 \
      (__mmask16)(__U), (const int)(__R)))
 // 512 bit : float -> long
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtts_ps_epi64(__m256 __A) {
  return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
      (__v8sf)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
      _MM_FROUND_CUR_DIRECTION));
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_cvtts_ps_epi64(__m512i __W, __mmask8 __U, __m256 __A) {
  return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
      (__v8sf)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_cvtts_ps_epi64(__mmask8 __U, __m256 __A) {
  return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
      (__v8sf)__A, (__v8di)_mm512_setzero_si512(), __U,
      _MM_FROUND_CUR_DIRECTION));
 }
 #define _mm512_cvtts_roundps_epi64(__A, __R)                                   \
  ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(                          \
      (__v8sf)(__m256)(__A), (__v8di)_mm512_undefined_epi32(), (__mmask8) - 1, \
      (const int)(__R)))
 #define _mm512_mask_cvtts_roundps_epi64(__W, __U, __A, __R)                    \
  ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(                          \
      (__v8sf)(__m256)(__A), (__v8di)(__m512i)(__W), (__mmask8)(__U),          \
      (const int)(__R)))
 #define _mm512_maskz_cvtts_roundps_epi64(__U, __A, __R)                        \
  ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(                          \
      (__v8sf)(__m256)(__A), (__v8di)_mm512_setzero_si512(), (__mmask8)(__U),  \
      (const int)(__R)))
 // 512 bit : float -> ulong
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtts_ps_epu64(__m256 __A) {
  return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(
      (__v8sf)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
      _MM_FROUND_CUR_DIRECTION));
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_cvtts_ps_epu64(__m512i __W, __mmask8 __U, __m256 __A) {
  return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(
      (__v8sf)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_cvtts_ps_epu64(__mmask8 __U, __m256 __A) {
  return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(
      (__v8sf)__A, (__v8di)_mm512_setzero_si512(), __U,
      _MM_FROUND_CUR_DIRECTION));
 }
 #define _mm512_cvtts_roundps_epu64(__A, __R)                                   \
  ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(                         \
      (__v8sf)(__m256)(__A), (__v8di)_mm512_undefined_epi32(), (__mmask8) - 1, \
      (const int)(__R)))
 #define _mm512_mask_cvtts_roundps_epu64(__W, __U, __A, __R)                    \
  ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(                         \
      (__v8sf)(__m256)(__A), (__v8di)(__m512i)(__W), (__mmask8)(__U),          \
      (const int)(__R)))
 #define _mm512_maskz_cvtts_roundps_epu64(__U, __A, __R)                        \
  ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(                         \
      (__v8sf)(__m256)(__A), (__v8di)_mm512_setzero_si512(), (__mmask8)(__U),  \
      (const int)(__R)))
 #undef __DEFAULT_FN_ATTRS
 #endif // __AVX10_2_512SATCVTDSINTRIN_H
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/avx10_2_512satcvtintrin.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/avx10_2_512satcvtintrin.h
@@ -1,301 +0,0 @@
 /*===------ avx10_2_512satcvtintrin.h - AVX10_2_512SATCVT intrinsics -------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef __IMMINTRIN_H
 #error                                                                         \
    "Never use <avx10_2_512satcvtintrin.h> directly; include <immintrin.h> instead."
 #endif // __IMMINTRIN_H
 #ifndef __AVX10_2_512SATCVTINTRIN_H
 #define __AVX10_2_512SATCVTINTRIN_H
 #define _mm512_ipcvts_bf16_epi8(A)                                             \
  ((__m512i)__builtin_ia32_vcvtbf162ibs512((__v32bf)(__m512bh)(A)))
 #define _mm512_mask_ipcvts_bf16_epi8(W, U, A)                                  \
  ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U),                         \
                                       (__v32hi)_mm512_ipcvts_bf16_epi8(A),    \
                                       (__v32hi)(__m512i)(W)))
 #define _mm512_maskz_ipcvts_bf16_epi8(U, A)                                    \
  ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U),                         \
                                       (__v32hi)_mm512_ipcvts_bf16_epi8(A),    \
                                       (__v32hi)_mm512_setzero_si512()))
 #define _mm512_ipcvts_bf16_epu8(A)                                             \
  ((__m512i)__builtin_ia32_vcvtbf162iubs512((__v32bf)(__m512bh)(A)))
 #define _mm512_mask_ipcvts_bf16_epu8(W, U, A)                                  \
  ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U),                         \
                                       (__v32hi)_mm512_ipcvts_bf16_epu8(A),    \
                                       (__v32hi)(__m512i)(W)))
 #define _mm512_maskz_ipcvts_bf16_epu8(U, A)                                    \
  ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U),                         \
                                       (__v32hi)_mm512_ipcvts_bf16_epu8(A),    \
                                       (__v32hi)_mm512_setzero_si512()))
 #define _mm512_ipcvtts_bf16_epi8(A)                                            \
  ((__m512i)__builtin_ia32_vcvttbf162ibs512((__v32bf)(__m512bh)(A)))
 #define _mm512_mask_ipcvtts_bf16_epi8(W, U, A)                                 \
  ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U),                         \
                                       (__v32hi)_mm512_ipcvtts_bf16_epi8(A),   \
                                       (__v32hi)(__m512i)(W)))
 #define _mm512_maskz_ipcvtts_bf16_epi8(U, A)                                   \
  ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U),                         \
                                       (__v32hi)_mm512_ipcvtts_bf16_epi8(A),   \
                                       (__v32hi)_mm512_setzero_si512()))
 #define _mm512_ipcvtts_bf16_epu8(A)                                            \
  ((__m512i)__builtin_ia32_vcvttbf162iubs512((__v32bf)(__m512bh)(A)))
 #define _mm512_mask_ipcvtts_bf16_epu8(W, U, A)                                 \
  ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U),                         \
                                       (__v32hi)_mm512_ipcvtts_bf16_epu8(A),   \
                                       (__v32hi)(__m512i)(W)))
 #define _mm512_maskz_ipcvtts_bf16_epu8(U, A)                                   \
  ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U),                         \
                                       (__v32hi)_mm512_ipcvtts_bf16_epu8(A),   \
                                       (__v32hi)_mm512_setzero_si512()))
 #define _mm512_ipcvts_ph_epi8(A)                                               \
  ((__m512i)__builtin_ia32_vcvtph2ibs512_mask(                                 \
      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
      _MM_FROUND_CUR_DIRECTION))
 #define _mm512_mask_ipcvts_ph_epi8(W, U, A)                                    \
  ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A),           \
                                              (__v32hu)(W), (__mmask32)(U),    \
                                              _MM_FROUND_CUR_DIRECTION))
 #define _mm512_maskz_ipcvts_ph_epi8(U, A)                                      \
  ((__m512i)__builtin_ia32_vcvtph2ibs512_mask(                                 \
      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U),  \
      _MM_FROUND_CUR_DIRECTION))
 #define _mm512_ipcvts_roundph_epi8(A, R)                                       \
  ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A),           \
                                              (__v32hu)_mm512_setzero_si512(), \
                                              (__mmask32) - 1, (const int)R))
 #define _mm512_mask_ipcvts_roundph_epi8(W, U, A, R)                            \
  ((__m512i)__builtin_ia32_vcvtph2ibs512_mask(                                 \
      (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), (const int)R))
 #define _mm512_maskz_ipcvts_roundph_epi8(U, A, R)                              \
  ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A),           \
                                              (__v32hu)_mm512_setzero_si512(), \
                                              (__mmask32)(U), (const int)R))
 #define _mm512_ipcvts_ph_epu8(A)                                               \
  ((__m512i)__builtin_ia32_vcvtph2iubs512_mask(                                \
      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
      _MM_FROUND_CUR_DIRECTION))
 #define _mm512_mask_ipcvts_ph_epu8(W, U, A)                                    \
  ((__m512i)__builtin_ia32_vcvtph2iubs512_mask((__v32hf)(__m512h)(A),          \
                                               (__v32hu)(W), (__mmask32)(U),   \
                                               _MM_FROUND_CUR_DIRECTION))
 #define _mm512_maskz_ipcvts_ph_epu8(U, A)                                      \
  ((__m512i)__builtin_ia32_vcvtph2iubs512_mask(                                \
      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U),  \
      _MM_FROUND_CUR_DIRECTION))
 #define _mm512_ipcvts_roundph_epu8(A, R)                                       \
  ((__m512i)__builtin_ia32_vcvtph2iubs512_mask(                                \
      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
      (const int)R))
 #define _mm512_mask_ipcvts_roundph_epu8(W, U, A, R)                            \
  ((__m512i)__builtin_ia32_vcvtph2iubs512_mask(                                \
      (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), (const int)R))
 #define _mm512_maskz_ipcvts_roundph_epu8(U, A, R)                              \
  ((__m512i)__builtin_ia32_vcvtph2iubs512_mask(                                \
      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U),  \
      (const int)R))
 #define _mm512_ipcvts_ps_epi8(A)                                               \
  ((__m512i)__builtin_ia32_vcvtps2ibs512_mask(                                 \
      (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1,  \
      _MM_FROUND_CUR_DIRECTION))
 #define _mm512_mask_ipcvts_ps_epi8(W, U, A)                                    \
  ((__m512i)__builtin_ia32_vcvtps2ibs512_mask((__v16sf)(__m512)(A),            \
                                              (__v16su)(W), (__mmask16)(U),    \
                                              _MM_FROUND_CUR_DIRECTION))
 #define _mm512_maskz_ipcvts_ps_epi8(U, A)                                      \
  ((__m512i)__builtin_ia32_vcvtps2ibs512_mask(                                 \
      (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U),   \
      _MM_FROUND_CUR_DIRECTION))
 #define _mm512_ipcvts_roundps_epi8(A, R)                                       \
  ((__m512i)__builtin_ia32_vcvtps2ibs512_mask((__v16sf)(__m512)(A),            \
                                              (__v16su)_mm512_setzero_si512(), \
                                              (__mmask16) - 1, (const int)R))
 #define _mm512_mask_ipcvts_roundps_epi8(W, U, A, R)                            \
  ((__m512i)__builtin_ia32_vcvtps2ibs512_mask(                                 \
      (__v16sf)(__m512)(A), (__v16su)(W), (__mmask16)(U), (const int)R))
 #define _mm512_maskz_ipcvts_roundps_epi8(U, A, R)                              \
  ((__m512i)__builtin_ia32_vcvtps2ibs512_mask((__v16sf)(__m512)(A),            \
                                              (__v16su)_mm512_setzero_si512(), \
                                              (__mmask16)(U), (const int)R))
 #define _mm512_ipcvts_ps_epu8(A)                                               \
  ((__m512i)__builtin_ia32_vcvtps2iubs512_mask(                                \
      (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1,  \
      _MM_FROUND_CUR_DIRECTION))
 #define _mm512_mask_ipcvts_ps_epu8(W, U, A)                                    \
  ((__m512i)__builtin_ia32_vcvtps2iubs512_mask((__v16sf)(__m512)(A),           \
                                               (__v16su)(W), (__mmask16)(U),   \
                                               _MM_FROUND_CUR_DIRECTION))
 #define _mm512_maskz_ipcvts_ps_epu8(U, A)                                      \
  ((__m512i)__builtin_ia32_vcvtps2iubs512_mask(                                \
      (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U),   \
      _MM_FROUND_CUR_DIRECTION))
 #define _mm512_ipcvts_roundps_epu8(A, R)                                       \
  ((__m512i)__builtin_ia32_vcvtps2iubs512_mask(                                \
      (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1,  \
      (const int)R))
 #define _mm512_mask_ipcvts_roundps_epu8(W, U, A, R)                            \
  ((__m512i)__builtin_ia32_vcvtps2iubs512_mask(                                \
      (__v16sf)(__m512)(A), (__v16su)(W), (__mmask16)(U), (const int)R))
 #define _mm512_maskz_ipcvts_roundps_epu8(U, A, R)                              \
  ((__m512i)__builtin_ia32_vcvtps2iubs512_mask(                                \
      (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U),   \
      (const int)R))
 #define _mm512_ipcvtts_ph_epi8(A)                                              \
  ((__m512i)__builtin_ia32_vcvttph2ibs512_mask(                                \
      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
      _MM_FROUND_CUR_DIRECTION))
 #define _mm512_mask_ipcvtts_ph_epi8(W, U, A)                                   \
  ((__m512i)__builtin_ia32_vcvttph2ibs512_mask((__v32hf)(__m512h)(A),          \
                                               (__v32hu)(W), (__mmask32)(U),   \
                                               _MM_FROUND_CUR_DIRECTION))
 #define _mm512_maskz_ipcvtts_ph_epi8(U, A)                                     \
  ((__m512i)__builtin_ia32_vcvttph2ibs512_mask(                                \
      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U),  \
      _MM_FROUND_CUR_DIRECTION))
 #define _mm512_ipcvtts_roundph_epi8(A, S)                                      \
  ((__m512i)__builtin_ia32_vcvttph2ibs512_mask(                                \
      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
      S))
 #define _mm512_mask_ipcvtts_roundph_epi8(W, U, A, S)                           \
  ((__m512i)__builtin_ia32_vcvttph2ibs512_mask(                                \
      (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), S))
 #define _mm512_maskz_ipcvtts_roundph_epi8(U, A, S)                             \
  ((__m512i)__builtin_ia32_vcvttph2ibs512_mask(                                \
      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U),  \
      S))
 #define _mm512_ipcvtts_ph_epu8(A)                                              \
  ((__m512i)__builtin_ia32_vcvttph2iubs512_mask(                               \
      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
      _MM_FROUND_CUR_DIRECTION))
 #define _mm512_mask_ipcvtts_ph_epu8(W, U, A)                                   \
  ((__m512i)__builtin_ia32_vcvttph2iubs512_mask((__v32hf)(__m512h)(A),         \
                                                (__v32hu)(W), (__mmask32)(U),  \
                                                _MM_FROUND_CUR_DIRECTION))
 #define _mm512_maskz_ipcvtts_ph_epu8(U, A)                                     \
  ((__m512i)__builtin_ia32_vcvttph2iubs512_mask(                               \
      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U),  \
      _MM_FROUND_CUR_DIRECTION))
 #define _mm512_ipcvtts_roundph_epu8(A, S)                                      \
  ((__m512i)__builtin_ia32_vcvttph2iubs512_mask(                               \
      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
      S))
 #define _mm512_mask_ipcvtts_roundph_epu8(W, U, A, S)                           \
  ((__m512i)__builtin_ia32_vcvttph2iubs512_mask(                               \
      (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), S))
 #define _mm512_maskz_ipcvtts_roundph_epu8(U, A, S)                             \
  ((__m512i)__builtin_ia32_vcvttph2iubs512_mask(                               \
      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U),  \
      S))
 #define _mm512_ipcvtts_ps_epi8(A)                                              \
  ((__m512i)__builtin_ia32_vcvttps2ibs512_mask(                                \
      (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \
      _MM_FROUND_CUR_DIRECTION))
 #define _mm512_mask_ipcvtts_ps_epi8(W, U, A)                                   \
  ((__m512i)__builtin_ia32_vcvttps2ibs512_mask((__v16sf)(__m512h)(A),          \
                                               (__v16su)(W), (__mmask16)(U),   \
                                               _MM_FROUND_CUR_DIRECTION))
 #define _mm512_maskz_ipcvtts_ps_epi8(U, A)                                     \
  ((__m512i)__builtin_ia32_vcvttps2ibs512_mask(                                \
      (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U),  \
      _MM_FROUND_CUR_DIRECTION))
 #define _mm512_ipcvtts_roundps_epi8(A, S)                                      \
  ((__m512i)__builtin_ia32_vcvttps2ibs512_mask(                                \
      (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \
      S))
 #define _mm512_mask_ipcvtts_roundps_epi8(W, U, A, S)                           \
  ((__m512i)__builtin_ia32_vcvttps2ibs512_mask(                                \
      (__v16sf)(__m512h)(A), (__v16su)(W), (__mmask16)(U), S))
 #define _mm512_maskz_ipcvtts_roundps_epi8(U, A, S)                             \
  ((__m512i)__builtin_ia32_vcvttps2ibs512_mask(                                \
      (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U),  \
      S))
 #define _mm512_ipcvtts_ps_epu8(A)                                              \
  ((__m512i)__builtin_ia32_vcvttps2iubs512_mask(                               \
      (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \
      _MM_FROUND_CUR_DIRECTION))
 #define _mm512_mask_ipcvtts_ps_epu8(W, U, A)                                   \
  ((__m512i)__builtin_ia32_vcvttps2iubs512_mask((__v16sf)(__m512h)(A),         \
                                                (__v16su)(W), (__mmask16)(U),  \
                                                _MM_FROUND_CUR_DIRECTION))
 #define _mm512_maskz_ipcvtts_ps_epu8(U, A)                                     \
  ((__m512i)__builtin_ia32_vcvttps2iubs512_mask(                               \
      (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U),  \
      _MM_FROUND_CUR_DIRECTION))
 #define _mm512_ipcvtts_roundps_epu8(A, S)                                      \
  ((__m512i)__builtin_ia32_vcvttps2iubs512_mask(                               \
      (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \
      S))
 #define _mm512_mask_ipcvtts_roundps_epu8(W, U, A, S)                           \
  ((__m512i)__builtin_ia32_vcvttps2iubs512_mask(                               \
      (__v16sf)(__m512h)(A), (__v16su)(W), (__mmask16)(U), S))
 #define _mm512_maskz_ipcvtts_roundps_epu8(U, A, S)                             \
  ((__m512i)__builtin_ia32_vcvttps2iubs512_mask(                               \
      (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U),  \
      S))
 #endif // __AVX10_2_512SATCVTINTRIN_H
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/avx10_2bf16intrin.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/avx10_2bf16intrin.h
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/avx10_2convertintrin.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/avx10_2convertintrin.h
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/avx10_2copyintrin.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/avx10_2copyintrin.h
@@ -1,66 +0,0 @@
 /*===---- avx10_2copyintrin.h - AVX10.2 Copy intrinsics -------------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef __IMMINTRIN_H
 #error                                                                         \
    "Never use <avx10_2copyintrin.h> directly; include <immintrin.h> instead."
 #endif // __IMMINTRIN_H
 #ifndef __AVX10_2COPYINTRIN_H
 #define __AVX10_2COPYINTRIN_H
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS128                                                  \
  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"),    \
                 __min_vector_width__(128)))
 /// Constructs a 128-bit integer vector, setting the lower 32 bits to the
 ///    lower 32 bits of the parameter \a __A; the upper bits are zeoroed.
 ///
 /// \code{.operation}
 /// result[31:0] := __A[31:0]
 /// result[MAX:32] := 0
 /// \endcode
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> VMOVD </c> instruction.
 ///
 /// \param __A
 ///    A 128-bit integer vector.
 /// \returns A 128-bit integer vector. The lower 32 bits are copied from the
 ///    parameter \a __A; the upper bits are zeroed.
 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_move_epi32(__m128i __A) {
  return (__m128i)__builtin_shufflevector(
      (__v4si)__A, (__v4si)_mm_setzero_si128(), 0, 4, 4, 4);
 }
 /// Constructs a 128-bit integer vector, setting the lower 16 bits to the
 ///    lower 16 bits of the parameter \a __A; the upper bits are zeoroed.
 ///
 /// \code{.operation}
 /// result[15:0] := __A[15:0]
 /// result[MAX:16] := 0
 /// \endcode
 ///
 /// \headerfile <immintrin.h>
 ///
 /// This intrinsic corresponds to the <c> VMOVW </c> instruction.
 ///
 /// \param __A
 ///    A 128-bit integer vector.
 /// \returns A 128-bit integer vector. The lower 16 bits are copied from the
 ///    parameter \a __A; the upper bits are zeroed.
 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_move_epi16(__m128i __A) {
  return (__m128i)__builtin_shufflevector(
      (__v8hi)__A, (__v8hi)_mm_setzero_si128(), 0, 8, 8, 8, 8, 8, 8, 8);
 }
 #undef __DEFAULT_FN_ATTRS128
 #endif // __AVX10_2COPYINTRIN_H
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/avx10_2minmaxintrin.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/avx10_2minmaxintrin.h
@@ -1,232 +0,0 @@
 /*===-------- avx10_2minmaxintrin.h - AVX10_2MINMAX intrinsics -------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef __IMMINTRIN_H
 #error                                                                         \
    "Never use <avx10_2minmaxintrin.h> directly; include <immintrin.h> instead."
 #endif // __IMMINTRIN_H
 #ifndef __AVX10_2MINMAXINTRIN_H
 #define __AVX10_2MINMAXINTRIN_H
 #define _mm_minmax_pbh(A, B, C)                                                \
  ((__m128bh)__builtin_ia32_vminmaxbf16128((__m128bh)(__v8bf)(A),              \
                                           (__m128bh)(__v8bf)(B), (int)(C)))
 #define _mm_mask_minmax_pbh(W, U, A, B, C)                                     \
  ((__m128bh)__builtin_ia32_selectpbf_128(                                     \
      (__mmask8)(U),                                                           \
      (__v8bf)_mm_minmax_pbh((__m128bh)(__v8bf)(A), (__m128bh)(__v8bf)(B),     \
                             (int)(C)),                                        \
      (__v8bf)(W)))
 #define _mm_maskz_minmax_pbh(U, A, B, C)                                       \
  ((__m128bh)__builtin_ia32_selectpbf_128(                                     \
      (__mmask8)(U),                                                           \
      (__v8bf)_mm_minmax_pbh((__m128bh)(__v8bf)(A), (__m128bh)(__v8bf)(B),     \
                             (int)(C)),                                        \
      (__v8bf) __builtin_bit_cast(__m128bh, _mm_setzero_ps())))
 #define _mm256_minmax_pbh(A, B, C)                                             \
  ((__m256bh)__builtin_ia32_vminmaxbf16256((__m256bh)(__v16bf)(A),             \
                                           (__m256bh)(__v16bf)(B), (int)(C)))
 #define _mm256_mask_minmax_pbh(W, U, A, B, C)                                  \
  ((__m256bh)__builtin_ia32_selectpbf_256(                                     \
      (__mmask16)(U),                                                          \
      (__v16bf)_mm256_minmax_pbh((__m256bh)(__v16bf)(A),                       \
                                 (__m256bh)(__v16bf)(B), (int)(C)),            \
      (__v16bf)(W)))
 #define _mm256_maskz_minmax_pbh(U, A, B, C)                                    \
  ((__m256bh)__builtin_ia32_selectpbf_256(                                     \
      (__mmask16)(U),                                                          \
      (__v16bf)_mm256_minmax_pbh((__m256bh)(__v16bf)(A),                       \
                                 (__m256bh)(__v16bf)(B), (int)(C)),            \
      (__v16bf) __builtin_bit_cast(__m256bh, _mm256_setzero_ps())))
 #define _mm_minmax_pd(A, B, C)                                                 \
  ((__m128d)__builtin_ia32_vminmaxpd128_mask(                                  \
      (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
      (__v2df)_mm_setzero_pd(), (__mmask8)-1))
 #define _mm_mask_minmax_pd(W, U, A, B, C)                                      \
  ((__m128d)__builtin_ia32_vminmaxpd128_mask(                                  \
      (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
      (__v2df)(__m128d)(W), (__mmask8)(U)))
 #define _mm_maskz_minmax_pd(U, A, B, C)                                        \
  ((__m128d)__builtin_ia32_vminmaxpd128_mask(                                  \
      (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
      (__v2df)_mm_setzero_pd(), (__mmask8)(U)))
 #define _mm256_minmax_pd(A, B, C)                                              \
  ((__m256d)__builtin_ia32_vminmaxpd256_mask(                                  \
      (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C),                    \
      (__v4df)_mm256_setzero_pd(), (__mmask8)-1))
 #define _mm256_mask_minmax_pd(W, U, A, B, C)                                   \
  ((__m256d)__builtin_ia32_vminmaxpd256_mask(                                  \
      (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C),                    \
      (__v4df)(__m256d)(W), (__mmask8)(U)))
 #define _mm256_maskz_minmax_pd(U, A, B, C)                                     \
  ((__m256d)__builtin_ia32_vminmaxpd256_mask(                                  \
      (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C),                    \
      (__v4df)_mm256_setzero_pd(), (__mmask8)(U)))
 #define _mm_minmax_ph(A, B, C)                                                 \
  ((__m128h)__builtin_ia32_vminmaxph128_mask(                                  \
      (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
      (__v8hf)_mm_setzero_ph(), (__mmask8)-1))
 #define _mm_mask_minmax_ph(W, U, A, B, C)                                      \
  ((__m128h)__builtin_ia32_vminmaxph128_mask(                                  \
      (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
      (__v8hf)(__m128h)(W), (__mmask16)-1))
 #define _mm_maskz_minmax_ph(U, A, B, C)                                        \
  ((__m128h)__builtin_ia32_vminmaxph128_mask(                                  \
      (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
      (__v8hf)_mm_setzero_ph(), (__mmask8)(U)))
 #define _mm256_minmax_ph(A, B, C)                                              \
  ((__m256h)__builtin_ia32_vminmaxph256_mask(                                  \
      (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C),                  \
      (__v16hf)_mm256_setzero_ph(), (__mmask16)-1))
 #define _mm256_mask_minmax_ph(W, U, A, B, C)                                   \
  ((__m256h)__builtin_ia32_vminmaxph256_mask(                                  \
      (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C),                  \
      (__v16hf)(__m256h)(W), (__mmask16)(U)))
 #define _mm256_maskz_minmax_ph(U, A, B, C)                                     \
  ((__m256h)__builtin_ia32_vminmaxph256_mask(                                  \
      (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C),                  \
      (__v16hf)_mm256_setzero_ph(), (__mmask16)(U)))
 #define _mm_minmax_ps(A, B, C)                                                 \
  ((__m128)__builtin_ia32_vminmaxps128_mask(                                   \
      (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C),                      \
      (__v4sf)_mm_setzero_ps(), (__mmask8)-1))
 #define _mm_mask_minmax_ps(W, U, A, B, C)                                      \
  ((__m128)__builtin_ia32_vminmaxps128_mask(                                   \
      (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), \
      (__mmask8)(U)))
 #define _mm_maskz_minmax_ps(U, A, B, C)                                        \
  ((__m128)__builtin_ia32_vminmaxps128_mask(                                   \
      (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C),                      \
      (__v4sf)_mm_setzero_ps(), (__mmask8)(U)))
 #define _mm256_minmax_ps(A, B, C)                                              \
  ((__m256)__builtin_ia32_vminmaxps256_mask(                                   \
      (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C),                      \
      (__v8sf)_mm256_setzero_ps(), (__mmask8)-1))
 #define _mm256_mask_minmax_ps(W, U, A, B, C)                                   \
  ((__m256)__builtin_ia32_vminmaxps256_mask(                                   \
      (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), (__v8sf)(__m256)(W), \
      (__mmask8)(U)))
 #define _mm256_maskz_minmax_ps(U, A, B, C)                                     \
  ((__m256)__builtin_ia32_vminmaxps256_mask(                                   \
      (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C),                      \
      (__v8sf)_mm256_setzero_ps(), (__mmask8)(U)))
 #define _mm_minmax_sd(A, B, C)                                                 \
  ((__m128d)__builtin_ia32_vminmaxsd_round_mask(                               \
      (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
      (__v2df)_mm_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
 #define _mm_mask_minmax_sd(W, U, A, B, C)                                      \
  ((__m128d)__builtin_ia32_vminmaxsd_round_mask(                               \
      (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
      (__v2df)(__m128d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
 #define _mm_maskz_minmax_sd(U, A, B, C)                                        \
  ((__m128d)__builtin_ia32_vminmaxsd_round_mask(                               \
      (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
      (__v2df)_mm_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
 #define _mm_minmax_round_sd(A, B, C, R)                                        \
  ((__m128d)__builtin_ia32_vminmaxsd_round_mask(                               \
      (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
      (__v2df)_mm_undefined_pd(), (__mmask8)-1, (int)(R)))
 #define _mm_mask_minmax_round_sd(W, U, A, B, C, R)                             \
  ((__m128d)__builtin_ia32_vminmaxsd_round_mask(                               \
      (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
      (__v2df)(__m128d)(W), (__mmask8)(U), (int)(R)))
 #define _mm_maskz_minmax_round_sd(U, A, B, C, R)                               \
  ((__m128d)__builtin_ia32_vminmaxsd_round_mask(                               \
      (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C),                    \
      (__v2df)_mm_setzero_pd(), (__mmask8)(U), (int)(R)))
 #define _mm_minmax_sh(A, B, C)                                                 \
  ((__m128h)__builtin_ia32_vminmaxsh_round_mask(                               \
      (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
      (__v8hf)_mm_undefined_ph(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
 #define _mm_mask_minmax_sh(W, U, A, B, C)                                      \
  ((__m128h)__builtin_ia32_vminmaxsh_round_mask(                               \
      (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
      (__v8hf)(__m128h)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
 #define _mm_maskz_minmax_sh(U, A, B, C)                                        \
  ((__m128h)__builtin_ia32_vminmaxsh_round_mask(                               \
      (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
      (__v8hf)_mm_setzero_ph(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
 #define _mm_minmax_round_sh(A, B, C, R)                                        \
  ((__m128h)__builtin_ia32_vminmaxsh_round_mask(                               \
      (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
      (__v8hf)_mm_undefined_ph(), (__mmask8)-1, (int)(R)))
 #define _mm_mask_minmax_round_sh(W, U, A, B, C, R)                             \
  ((__m128h)__builtin_ia32_vminmaxsh_round_mask(                               \
      (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
      (__v8hf)(__m128h)(W), (__mmask8)(U), (int)(R)))
 #define _mm_maskz_minmax_round_sh(U, A, B, C, R)                               \
  ((__m128h)__builtin_ia32_vminmaxsh_round_mask(                               \
      (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C),                    \
      (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
 #define _mm_minmax_ss(A, B, C)                                                 \
  ((__m128)__builtin_ia32_vminmaxss_round_mask(                                \
      (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C),                      \
      (__v4sf)_mm_undefined_ps(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
 #define _mm_mask_minmax_ss(W, U, A, B, C)                                      \
  ((__m128)__builtin_ia32_vminmaxss_round_mask(                                \
      (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C), (__v4sf)(W),         \
      (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
 #define _mm_maskz_minmax_ss(U, A, B, C)                                        \
  ((__m128)__builtin_ia32_vminmaxss_round_mask(                                \
      (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C),                      \
      (__v4sf)_mm_setzero_ps(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
 #define _mm_minmax_round_ss(A, B, C, R)                                        \
  ((__m128)__builtin_ia32_vminmaxss_round_mask(                                \
      (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C),                      \
      (__v4sf)_mm_undefined_ps(), (__mmask8)-1, (int)(R)))
 #define _mm_mask_minmax_round_ss(W, U, A, B, C, R)                             \
  ((__m128)__builtin_ia32_vminmaxss_round_mask(                                \
      (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C), (__v4sf)(W),         \
      (__mmask8)(U), (int)(R)))
 #define _mm_maskz_minmax_round_ss(U, A, B, C, R)                               \
  ((__m128)__builtin_ia32_vminmaxss_round_mask(                                \
      (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C),                      \
      (__v4sf)_mm_setzero_ps(), (__mmask8)(U), (int)(R)))
 #endif // __AVX10_2MINMAXINTRIN_H
--- a/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/avx10_2niintrin.h
+++ b/zsh/.local/share/nvim/mason/packages/clangd/clangd_21.1.0/lib/clang/21/include/avx10_2niintrin.h
@@ -1,409 +0,0 @@
 /*===---- avx10_2niintrin.h - AVX10.2 new instruction intrinsics -----------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
 #ifndef __IMMINTRIN_H
 #error "Never use <avx10_2niintrin.h> directly; include <immintrin.h> instead."
 #endif
 #ifdef __SSE2__
 #ifndef __AVX10_2NIINTRIN_H
 #define __AVX10_2NIINTRIN_H
 #define __DEFAULT_FN_ATTRS128                                                  \
  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"),    \
                 __min_vector_width__(128)))
 #define __DEFAULT_FN_ATTRS256                                                  \
  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"),    \
                 __min_vector_width__(256)))
 /* VNNI FP16 */
 static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_dpph_ps(__m128 __W,
                                                           __m128h __A,
                                                           __m128h __B) {
  return (__m128)__builtin_ia32_vdpphps128((__v4sf)__W, (__v8hf)__A,
                                           (__v8hf)__B);
 }
 static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_dpph_ps(__m128 __W,
                                                                __mmask8 __U,
                                                                __m128h __A,
                                                                __m128h __B) {
  return (__m128)__builtin_ia32_selectps_128(
      (__mmask8)__U, (__v4sf)_mm_dpph_ps(__W, __A, __B), (__v4sf)__W);
 }
 static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_dpph_ps(__mmask8 __U,
                                                                 __m128 __W,
                                                                 __m128h __A,
                                                                 __m128h __B) {
  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                             (__v4sf)_mm_dpph_ps(__W, __A, __B),
                                             (__v4sf)_mm_setzero_ps());
 }
 static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_dpph_ps(__m256 __W,
                                                              __m256h __A,
                                                              __m256h __B) {
  return (__m256)__builtin_ia32_vdpphps256((__v8sf)__W, (__v16hf)__A,
                                           (__v16hf)__B);
 }
 static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_dpph_ps(__m256 __W, __mmask8 __U, __m256h __A, __m256h __B) {
  return (__m256)__builtin_ia32_selectps_256(
      (__mmask8)__U, (__v8sf)_mm256_dpph_ps(__W, __A, __B), (__v8sf)__W);
 }
 static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_dpph_ps(__mmask8 __U, __m256 __W, __m256h __A, __m256h __B) {
  return (__m256)__builtin_ia32_selectps_256(
      (__mmask8)__U, (__v8sf)_mm256_dpph_ps(__W, __A, __B),
      (__v8sf)_mm256_setzero_ps());
 }
 /* VMPSADBW */
 #define _mm_mask_mpsadbw_epu8(W, U, A, B, imm)                                 \
  ((__m128i)__builtin_ia32_selectw_128(                                        \
      (__mmask8)(U), (__v8hi)_mm_mpsadbw_epu8((A), (B), (imm)),                \
      (__v8hi)(__m128i)(W)))
 #define _mm_maskz_mpsadbw_epu8(U, A, B, imm)                                   \
  ((__m128i)__builtin_ia32_selectw_128(                                        \
      (__mmask8)(U), (__v8hi)_mm_mpsadbw_epu8((A), (B), (imm)),                \
      (__v8hi)_mm_setzero_si128()))
 #define _mm256_mask_mpsadbw_epu8(W, U, A, B, imm)                              \
  ((__m256i)__builtin_ia32_selectw_256(                                        \
      (__mmask16)(U), (__v16hi)_mm256_mpsadbw_epu8((A), (B), (imm)),           \
      (__v16hi)(__m256i)(W)))
 #define _mm256_maskz_mpsadbw_epu8(U, A, B, imm)                                \
  ((__m256i)__builtin_ia32_selectw_256(                                        \
      (__mmask16)(U), (__v16hi)_mm256_mpsadbw_epu8((A), (B), (imm)),           \
      (__v16hi)_mm256_setzero_si256()))
 /* VNNI INT8 */
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_dpbssd_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
  return (__m128i)__builtin_ia32_selectd_128(
      __U, (__v4si)_mm_dpbssd_epi32(__W, __A, __B), (__v4si)__W);
 }
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_dpbssd_epi32(__mmask8 __U, __m128i __W, __m128i __A, __m128i __B) {
  return (__m128i)__builtin_ia32_selectd_128(
      __U, (__v4si)_mm_dpbssd_epi32(__W, __A, __B),
      (__v4si)_mm_setzero_si128());
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_dpbssd_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
  return (__m256i)__builtin_ia32_selectd_256(
      __U, (__v8si)_mm256_dpbssd_epi32(__W, __A, __B), (__v8si)__W);
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_dpbssd_epi32(__mmask8 __U, __m256i __W, __m256i __A, __m256i __B) {
  return (__m256i)__builtin_ia32_selectd_256(
      __U, (__v8si)_mm256_dpbssd_epi32(__W, __A, __B),
      (__v8si)_mm256_setzero_si256());
 }
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_dpbssds_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
  return (__m128i)__builtin_ia32_selectd_128(
      __U, (__v4si)_mm_dpbssds_epi32(__W, __A, __B), (__v4si)__W);
 }
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_dpbssds_epi32(__mmask8 __U, __m128i __W, __m128i __A, __m128i __B) {
  return (__m128i)__builtin_ia32_selectd_128(
      __U, (__v4si)_mm_dpbssds_epi32(__W, __A, __B),
      (__v4si)_mm_setzero_si128());
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_dpbssds_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
  return (__m256i)__builtin_ia32_selectd_256(
      __U, (__v8si)_mm256_dpbssds_epi32(__W, __A, __B), (__v8si)__W);
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpbssds_epi32(
    __mmask8 __U, __m256i __W, __m256i __A, __m256i __B) {
  return (__m256i)__builtin_ia32_selectd_256(
      __U, (__v8si)_mm256_dpbssds_epi32(__W, __A, __B),
      (__v8si)_mm256_setzero_si256());
 }
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_dpbsud_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
  return (__m128i)__builtin_ia32_selectd_128(
      __U, (__v4si)_mm_dpbsud_epi32(__W, __A, __B), (__v4si)__W);
 }
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_dpbsud_epi32(__mmask8 __U, __m128i __W, __m128i __A, __m128i __B) {
  return (__m128i)__builtin_ia32_selectd_128(
      __U, (__v4si)_mm_dpbsud_epi32(__W, __A, __B),
      (__v4si)_mm_setzero_si128());
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_dpbsud_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
  return (__m256i)__builtin_ia32_selectd_256(
      __U, (__v8si)_mm256_dpbsud_epi32(__W, __A, __B), (__v8si)__W);
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_dpbsud_epi32(__mmask8 __U, __m256i __W, __m256i __A, __m256i __B) {
  return (__m256i)__builtin_ia32_selectd_256(
      __U, (__v8si)_mm256_dpbsud_epi32(__W, __A, __B),
      (__v8si)_mm256_setzero_si256());
 }
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_dpbsuds_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
  return (__m128i)__builtin_ia32_selectd_128(
      __U, (__v4si)_mm_dpbsuds_epi32(__W, __A, __B), (__v4si)__W);
 }
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_dpbsuds_epi32(__mmask8 __U, __m128i __W, __m128i __A, __m128i __B) {
  return (__m128i)__builtin_ia32_selectd_128(
      __U, (__v4si)_mm_dpbsuds_epi32(__W, __A, __B),
      (__v4si)_mm_setzero_si128());
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_dpbsuds_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
  return (__m256i)__builtin_ia32_selectd_256(
      __U, (__v8si)_mm256_dpbsuds_epi32(__W, __A, __B), (__v8si)__W);
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpbsuds_epi32(
    __mmask8 __U, __m256i __W, __m256i __A, __m256i __B) {
  return (__m256i)__builtin_ia32_selectd_256(
      __U, (__v8si)_mm256_dpbsuds_epi32(__W, __A, __B),
      (__v8si)_mm256_setzero_si256());
 }
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_dpbuud_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
  return (__m128i)__builtin_ia32_selectd_128(
      __U, (__v4si)_mm_dpbuud_epi32(__W, __A, __B), (__v4si)__W);
 }
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_dpbuud_epi32(__mmask8 __U, __m128i __W, __m128i __A, __m128i __B) {
  return (__m128i)__builtin_ia32_selectd_128(
      __U, (__v4si)_mm_dpbuud_epi32(__W, __A, __B),
      (__v4si)_mm_setzero_si128());
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_dpbuud_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
  return (__m256i)__builtin_ia32_selectd_256(
      __U, (__v8si)_mm256_dpbuud_epi32(__W, __A, __B), (__v8si)__W);
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_dpbuud_epi32(__mmask8 __U, __m256i __W, __m256i __A, __m256i __B) {
  return (__m256i)__builtin_ia32_selectd_256(
      __U, (__v8si)_mm256_dpbuud_epi32(__W, __A, __B),
      (__v8si)_mm256_setzero_si256());
 }
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_dpbuuds_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
  return (__m128i)__builtin_ia32_selectd_128(
      __U, (__v4si)_mm_dpbuuds_epi32(__W, __A, __B), (__v4si)__W);
 }
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_dpbuuds_epi32(__mmask8 __U, __m128i __W, __m128i __A, __m128i __B) {
  return (__m128i)__builtin_ia32_selectd_128(
      __U, (__v4si)_mm_dpbuuds_epi32(__W, __A, __B),
      (__v4si)_mm_setzero_si128());
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_dpbuuds_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
  return (__m256i)__builtin_ia32_selectd_256(
      __U, (__v8si)_mm256_dpbuuds_epi32(__W, __A, __B), (__v8si)__W);
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpbuuds_epi32(
    __mmask8 __U, __m256i __W, __m256i __A, __m256i __B) {
  return (__m256i)__builtin_ia32_selectd_256(
      __U, (__v8si)_mm256_dpbuuds_epi32(__W, __A, __B),
      (__v8si)_mm256_setzero_si256());
 }
 /* VNNI INT16 */
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_dpwsud_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
  return (__m128i)__builtin_ia32_selectd_128(
      (__mmask8)__U, (__v4si)_mm_dpwsud_epi32(__A, __B, __C), (__v4si)__A);
 }
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_dpwsud_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
  return (__m128i)__builtin_ia32_selectd_128(
      (__mmask8)__U, (__v4si)_mm_dpwsud_epi32(__A, __B, __C),
      (__v4si)_mm_setzero_si128());
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_dpwsud_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
  return (__m256i)__builtin_ia32_selectd_256(
      (__mmask8)__U, (__v8si)_mm256_dpwsud_epi32(__A, __B, __C), (__v8si)__A);
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_dpwsud_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
  return (__m256i)__builtin_ia32_selectd_256(
      (__mmask8)__U, (__v8si)_mm256_dpwsud_epi32(__A, __B, __C),
      (__v8si)_mm256_setzero_si256());
 }
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_dpwsuds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
  return (__m128i)__builtin_ia32_selectd_128(
      (__mmask8)__U, (__v4si)_mm_dpwsuds_epi32(__A, __B, __C), (__v4si)__A);
 }
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_dpwsuds_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
  return (__m128i)__builtin_ia32_selectd_128(
      (__mmask8)__U, (__v4si)_mm_dpwsuds_epi32(__A, __B, __C),
      (__v4si)_mm_setzero_si128());
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_dpwsuds_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
  return (__m256i)__builtin_ia32_selectd_256(
      (__mmask8)__U, (__v8si)_mm256_dpwsuds_epi32(__A, __B, __C), (__v8si)__A);
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpwsuds_epi32(
    __mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
  return (__m256i)__builtin_ia32_selectd_256(
      (__mmask8)__U, (__v8si)_mm256_dpwsuds_epi32(__A, __B, __C),
      (__v8si)_mm256_setzero_si256());
 }
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_dpwusd_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
  return (__m128i)__builtin_ia32_selectd_128(
      (__mmask8)__U, (__v4si)_mm_dpwusd_epi32(__A, __B, __C), (__v4si)__A);
 }
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_dpwusd_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
  return (__m128i)__builtin_ia32_selectd_128(
      (__mmask8)__U, (__v4si)_mm_dpwusd_epi32(__A, __B, __C),
      (__v4si)_mm_setzero_si128());
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_dpwusd_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
  return (__m256i)__builtin_ia32_selectd_256(
      (__mmask8)__U, (__v8si)_mm256_dpwusd_epi32(__A, __B, __C), (__v8si)__A);
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_dpwusd_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
  return (__m256i)__builtin_ia32_selectd_256(
      (__mmask8)__U, (__v8si)_mm256_dpwusd_epi32(__A, __B, __C),
      (__v8si)_mm256_setzero_si256());
 }
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_dpwusds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
  return (__m128i)__builtin_ia32_selectd_128(
      (__mmask8)__U, (__v4si)_mm_dpwusds_epi32(__A, __B, __C), (__v4si)__A);
 }
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_dpwusds_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
  return (__m128i)__builtin_ia32_selectd_128(
      (__mmask8)__U, (__v4si)_mm_dpwusds_epi32(__A, __B, __C),
      (__v4si)_mm_setzero_si128());
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_dpwusds_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
  return (__m256i)__builtin_ia32_selectd_256(
      (__mmask8)__U, (__v8si)_mm256_dpwusds_epi32(__A, __B, __C), (__v8si)__A);
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpwusds_epi32(
    __mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
  return (__m256i)__builtin_ia32_selectd_256(
      (__mmask8)__U, (__v8si)_mm256_dpwusds_epi32(__A, __B, __C),
      (__v8si)_mm256_setzero_si256());
 }
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_dpwuud_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
  return (__m128i)__builtin_ia32_selectd_128(
      (__mmask8)__U, (__v4si)_mm_dpwuud_epi32(__A, __B, __C), (__v4si)__A);
 }
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_dpwuud_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
  return (__m128i)__builtin_ia32_selectd_128(
      (__mmask8)__U, (__v4si)_mm_dpwuud_epi32(__A, __B, __C),
      (__v4si)_mm_setzero_si128());
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_dpwuud_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
  return (__m256i)__builtin_ia32_selectd_256(
      (__mmask8)__U, (__v8si)_mm256_dpwuud_epi32(__A, __B, __C), (__v8si)__A);
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_dpwuud_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
  return (__m256i)__builtin_ia32_selectd_256(
      (__mmask8)__U, (__v8si)_mm256_dpwuud_epi32(__A, __B, __C),
      (__v8si)_mm256_setzero_si256());
 }
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_dpwuuds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
  return (__m128i)__builtin_ia32_selectd_128(
      (__mmask8)__U, (__v4si)_mm_dpwuuds_epi32(__A, __B, __C), (__v4si)__A);
 }
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_dpwuuds_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
  return (__m128i)__builtin_ia32_selectd_128(
      (__mmask8)__U, (__v4si)_mm_dpwuuds_epi32(__A, __B, __C),
      (__v4si)_mm_setzero_si128());
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_dpwuuds_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
  return (__m256i)__builtin_ia32_selectd_256(
      (__mmask8)__U, (__v8si)_mm256_dpwuuds_epi32(__A, __B, __C), (__v8si)__A);
 }
 static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpwuuds_epi32(
    __mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
  return (__m256i)__builtin_ia32_selectd_256(
      (__mmask8)__U, (__v8si)_mm256_dpwuuds_epi32(__A, __B, __C),
      (__v8si)_mm256_setzero_si256());
 }
 #undef __DEFAULT_FN_ATTRS256
 #undef __DEFAULT_FN_ATTRS128
 #endif /* __AVX10_2NIINTRIN_H */
 #endif /* __SSE2__ */
--- a/Show More
+++ b/Show More
		`@@ -1 +0,0 @@`
			`../packages/clangd/clangd_21.1.0/bin/clangd`
		`@@ -1 +0,0 @@`
			`../packages/lua-language-server/lua-language-server`
		`@@ -1 +0,0 @@`
			`../packages/ols/ols-x86_64-unknown-linux-gnu`