mirror of https://gitee.com/namelin2022/ollama
Browse Source
* add build to .dockerignore * test: only build one arch * add build to .gitignore * fix ccache path * filter amdgpu targets * only filter if autodetecting * Don't clobber gpu list for default runner This ensures the GPU specific environment variables are set properly * explicitly set CXX compiler for HIP * Update build_windows.ps1 This isn't complete, but is close. Dependencies are missing, and it only builds the "default" preset. * build: add ollama subdir * add .git to .dockerignore * docs: update development.md * update build_darwin.sh * remove unused scripts * llm: add cwd and build/lib/ollama to library paths * default DYLD_LIBRARY_PATH to LD_LIBRARY_PATH in runner on macOS * add additional cmake output vars for msvc * interim edits to make server detection logic work with dll directories like lib/ollama/cuda_v12 * remove unncessary filepath.Dir, cleanup * add hardware-specific directory to path * use absolute server path * build: linux arm * cmake install targets * remove unused files * ml: visit each library path once * build: skip cpu variants on arm * build: install cpu targets * build: fix workflow * shorter names * fix rocblas install * docs: clean up development.md * consistent build dir removal in development.md * silence -Wimplicit-function-declaration build warnings in ggml-cpu * update readme * update development readme * llm: update library lookup logic now that there is one runner (#8587) * tweak development.md * update docs * add windows cuda/rocm tests --------- Co-authored-by: jmorganca <jmorganca@gmail.com> Co-authored-by: Daniel Hiltgen <daniel@ollama.com>brucemacd/next-bpe-test
committed by
GitHub
542 changed files with 5778 additions and 11451 deletions
File diff suppressed because it is too large
@ -0,0 +1,112 @@ |
|||
cmake_minimum_required(VERSION 3.21) |
|||
|
|||
project(Ollama C CXX) |
|||
|
|||
include(CheckLanguage) |
|||
|
|||
find_package(Threads REQUIRED) |
|||
|
|||
set(CMAKE_BUILD_TYPE Release) |
|||
set(BUILD_SHARED_LIBS ON) |
|||
|
|||
set(CMAKE_CXX_STANDARD 17) |
|||
set(CMAKE_CXX_STANDARD_REQUIRED ON) |
|||
set(CMAKE_CXX_EXTENSIONS OFF) |
|||
|
|||
set(GGML_BUILD ON) |
|||
set(GGML_SHARED ON) |
|||
set(GGML_CCACHE ON) |
|||
set(GGML_BACKEND_DL ON) |
|||
set(GGML_BACKEND_SHARED ON) |
|||
set(GGML_SCHED_MAX_COPIES 4) |
|||
|
|||
set(GGML_LLAMAFILE ON) |
|||
set(GGML_CUDA_PEER_MAX_BATCH_SIZE 128) |
|||
set(GGML_CUDA_GRAPHS ON) |
|||
|
|||
if((NOT CMAKE_OSX_ARCHITECTURES MATCHES "arm64") |
|||
OR (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm|aarch64|ARM64|ARMv[0-9]+")) |
|||
set(GGML_CPU_ALL_VARIANTS ON) |
|||
endif() |
|||
|
|||
set(OLLAMA_BUILD_DIR ${CMAKE_BINARY_DIR}/lib/ollama) |
|||
set(OLLAMA_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}/lib/ollama) |
|||
|
|||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${OLLAMA_BUILD_DIR}) |
|||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${OLLAMA_BUILD_DIR}) |
|||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${OLLAMA_BUILD_DIR}) |
|||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${OLLAMA_BUILD_DIR}) |
|||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${OLLAMA_BUILD_DIR}) |
|||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE ${OLLAMA_BUILD_DIR}) |
|||
|
|||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src) |
|||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/include) |
|||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cpu) |
|||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cpu/amx) |
|||
|
|||
set(GGML_CPU ON) |
|||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src) |
|||
set_property(TARGET ggml PROPERTY EXCLUDE_FROM_ALL TRUE) |
|||
|
|||
get_target_property(CPU_VARIANTS ggml-cpu MANUALLY_ADDED_DEPENDENCIES) |
|||
if(NOT CPU_VARIANTS) |
|||
set(CPU_VARIANTS "ggml-cpu") |
|||
endif() |
|||
|
|||
install(TARGETS ggml-base ${CPU_VARIANTS} |
|||
RUNTIME_DEPENDENCIES |
|||
PRE_EXCLUDE_REGEXES ".*" |
|||
RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT CPU |
|||
LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT CPU |
|||
FRAMEWORK DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT CPU |
|||
) |
|||
|
|||
check_language(CUDA) |
|||
if(CMAKE_CUDA_COMPILER) |
|||
if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.24" AND NOT CMAKE_CUDA_ARCHITECTURES) |
|||
set(CMAKE_CUDA_ARCHITECTURES "native") |
|||
endif() |
|||
|
|||
find_package(CUDAToolkit) |
|||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cuda) |
|||
set(OLLAMA_CUDA_INSTALL_DIR ${OLLAMA_INSTALL_DIR}/cuda_v${CUDAToolkit_VERSION_MAJOR}) |
|||
install(TARGETS ggml-cuda |
|||
RUNTIME_DEPENDENCIES |
|||
DIRECTORIES ${CUDAToolkit_BIN_DIR} ${CUDAToolkit_LIBRARY_DIR} |
|||
PRE_INCLUDE_REGEXES cublas cublasLt cudart |
|||
PRE_EXCLUDE_REGEXES ".*" |
|||
RUNTIME DESTINATION ${OLLAMA_CUDA_INSTALL_DIR} COMPONENT CUDA |
|||
LIBRARY DESTINATION ${OLLAMA_CUDA_INSTALL_DIR} COMPONENT CUDA |
|||
) |
|||
endif() |
|||
|
|||
check_language(HIP) |
|||
if(CMAKE_HIP_COMPILER) |
|||
set(HIP_PLATFORM "amd") |
|||
|
|||
find_package(hip REQUIRED) |
|||
if(NOT AMDGPU_TARGETS) |
|||
list(FILTER AMDGPU_TARGETS INCLUDE REGEX "^gfx(900|94[012]|101[02]|1030|110[012])$") |
|||
endif() |
|||
|
|||
if(AMDGPU_TARGETS) |
|||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-hip) |
|||
set(OLLAMA_HIP_INSTALL_DIR ${OLLAMA_INSTALL_DIR}/rocm) |
|||
install(TARGETS ggml-hip |
|||
RUNTIME_DEPENDENCIES |
|||
DIRECTORIES ${HIP_BIN_INSTALL_DIR} ${HIP_LIB_INSTALL_DIR} |
|||
PRE_INCLUDE_REGEXES amdhip64 hipblas rocblas amd_comgr hsa_runtime64 rocprofiler-register drm_amdgpu drm numa |
|||
PRE_EXCLUDE_REGEXES ".*" |
|||
POST_EXCLUDE_REGEXES "system32" |
|||
RUNTIME DESTINATION ${OLLAMA_HIP_INSTALL_DIR} COMPONENT HIP |
|||
LIBRARY DESTINATION ${OLLAMA_HIP_INSTALL_DIR} COMPONENT HIP |
|||
) |
|||
|
|||
foreach(HIP_LIB_BIN_INSTALL_DIR IN ITEMS ${HIP_BIN_INSTALL_DIR} ${HIP_LIB_INSTALL_DIR}) |
|||
if(EXISTS ${HIP_LIB_BIN_INSTALL_DIR}/rocblas) |
|||
install(DIRECTORY ${HIP_LIB_BIN_INSTALL_DIR}/rocblas DESTINATION ${OLLAMA_HIP_INSTALL_DIR} COMPONENT HIP) |
|||
break() |
|||
endif() |
|||
endforeach() |
|||
endif() |
|||
endif() |
|||
@ -0,0 +1,110 @@ |
|||
{ |
|||
"version": 3, |
|||
"configurePresets": [ |
|||
{ |
|||
"name": "Default", |
|||
"binaryDir": "${sourceDir}/build", |
|||
"installDir": "${sourceDir}/dist", |
|||
"cacheVariables": { |
|||
"CMAKE_BUILD_TYPE": "Release" |
|||
} |
|||
}, |
|||
{ |
|||
"name": "CPU", |
|||
"inherits": [ "Default" ] |
|||
}, |
|||
{ |
|||
"name": "CUDA", |
|||
"inherits": [ "Default" ] |
|||
}, |
|||
{ |
|||
"name": "CUDA 11", |
|||
"inherits": [ "CUDA" ], |
|||
"cacheVariables": { |
|||
"CMAKE_CUDA_ARCHITECTURES": "50;52;53;60;61;62;70;72;75;80;86" |
|||
} |
|||
}, |
|||
{ |
|||
"name": "CUDA 12", |
|||
"inherits": [ "CUDA" ], |
|||
"cacheVariables": { |
|||
"CMAKE_CUDA_ARCHITECTURES": "60;61;62;70;72;75;80;86;87;89;90;90a" |
|||
} |
|||
}, |
|||
{ |
|||
"name": "JetPack 5", |
|||
"inherits": [ "CUDA" ], |
|||
"cacheVariables": { |
|||
"CMAKE_CUDA_ARCHITECTURES": "72;87" |
|||
} |
|||
}, |
|||
{ |
|||
"name": "JetPack 6", |
|||
"inherits": [ "CUDA" ], |
|||
"cacheVariables": { |
|||
"CMAKE_CUDA_ARCHITECTURES": "87" |
|||
} |
|||
}, |
|||
{ |
|||
"name": "ROCm", |
|||
"inherits": [ "Default" ], |
|||
"cacheVariables": { |
|||
"CMAKE_HIP_PLATFORM": "amd" |
|||
} |
|||
}, |
|||
{ |
|||
"name": "ROCm 6", |
|||
"inherits": [ "ROCm" ], |
|||
"cacheVariables": { |
|||
"AMDGPU_TARGETS": "gfx900;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102" |
|||
} |
|||
} |
|||
], |
|||
"buildPresets": [ |
|||
{ |
|||
"name": "Default", |
|||
"configurePreset": "Default", |
|||
"configuration": "Release" |
|||
}, |
|||
{ |
|||
"name": "CPU", |
|||
"configurePreset": "Default", |
|||
"targets": [ "ggml-cpu" ] |
|||
}, |
|||
{ |
|||
"name": "CUDA", |
|||
"configurePreset": "CUDA", |
|||
"targets": [ "ggml-cuda" ] |
|||
}, |
|||
{ |
|||
"name": "CUDA 11", |
|||
"inherits": [ "CUDA" ], |
|||
"configurePreset": "CUDA 11" |
|||
}, |
|||
{ |
|||
"name": "CUDA 12", |
|||
"inherits": [ "CUDA" ], |
|||
"configurePreset": "CUDA 12" |
|||
}, |
|||
{ |
|||
"name": "JetPack 5", |
|||
"inherits": [ "CUDA" ], |
|||
"configurePreset": "JetPack 5" |
|||
}, |
|||
{ |
|||
"name": "JetPack 6", |
|||
"inherits": [ "CUDA" ], |
|||
"configurePreset": "JetPack 6" |
|||
}, |
|||
{ |
|||
"name": "ROCm", |
|||
"configurePreset": "ROCm", |
|||
"targets": [ "ggml-hip" ] |
|||
}, |
|||
{ |
|||
"name": "ROCm 6", |
|||
"inherits": [ "ROCm" ], |
|||
"configurePreset": "ROCm 6" |
|||
} |
|||
] |
|||
} |
|||
@ -1,201 +1,128 @@ |
|||
ARG GOLANG_VERSION=1.22.8 |
|||
ARG CUDA_VERSION_11=11.3.1 |
|||
ARG CUDA_VERSION_12=12.4.0 |
|||
ARG ROCM_VERSION=6.1.2 |
|||
ARG JETPACK_6=r36.2.0 |
|||
ARG JETPACK_5=r35.4.1 |
|||
|
|||
### To create a local image for building linux binaries on mac or windows with efficient incremental builds |
|||
# |
|||
# docker build --platform linux/amd64 -t builder-amd64 -f Dockerfile --target unified-builder-amd64 . |
|||
# docker run --platform linux/amd64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-amd64 |
|||
# |
|||
### Then incremental builds will be much faster in this container |
|||
# |
|||
# make -j 10 dist |
|||
# |
|||
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS unified-builder-amd64 |
|||
ARG GOLANG_VERSION |
|||
ARG CUDA_VERSION_11 |
|||
ARG CUDA_VERSION_12 |
|||
COPY ./scripts/rh_linux_deps.sh / |
|||
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:/usr/local/cuda/bin:$PATH |
|||
ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64 |
|||
RUN GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh |
|||
RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo && \ |
|||
dnf clean all && \ |
|||
dnf install -y \ |
|||
zsh \ |
|||
cuda-toolkit-$(echo ${CUDA_VERSION_11} | cut -f1-2 -d. | sed -e "s/\./-/g") \ |
|||
cuda-toolkit-$(echo ${CUDA_VERSION_12} | cut -f1-2 -d. | sed -e "s/\./-/g") |
|||
# TODO intel oneapi goes here... |
|||
ENV GOARCH amd64 |
|||
ENV CGO_ENABLED 1 |
|||
WORKDIR /go/src/github.com/ollama/ollama/ |
|||
ENTRYPOINT [ "zsh" ] |
|||
|
|||
### To create a local image for building linux binaries on mac or linux/arm64 with efficient incremental builds |
|||
# Note: this does not contain jetson variants |
|||
# |
|||
# docker build --platform linux/arm64 -t builder-arm64 -f Dockerfile --target unified-builder-arm64 . |
|||
# docker run --platform linux/arm64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-arm64 |
|||
# |
|||
FROM --platform=linux/arm64 rockylinux:8 AS unified-builder-arm64 |
|||
ARG GOLANG_VERSION |
|||
ARG CUDA_VERSION_11 |
|||
ARG CUDA_VERSION_12 |
|||
COPY ./scripts/rh_linux_deps.sh / |
|||
RUN GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh |
|||
RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo && \ |
|||
dnf config-manager --set-enabled appstream && \ |
|||
dnf clean all && \ |
|||
dnf install -y \ |
|||
zsh \ |
|||
cuda-toolkit-$(echo ${CUDA_VERSION_11} | cut -f1-2 -d. | sed -e "s/\./-/g") \ |
|||
cuda-toolkit-$(echo ${CUDA_VERSION_12} | cut -f1-2 -d. | sed -e "s/\./-/g") |
|||
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH:/usr/local/cuda/bin |
|||
ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64 |
|||
ENV LIBRARY_PATH=/usr/local/cuda/lib64/stubs:/opt/amdgpu/lib64 |
|||
ENV GOARCH arm64 |
|||
ENV CGO_ENABLED 1 |
|||
WORKDIR /go/src/github.com/ollama/ollama/ |
|||
ENTRYPOINT [ "zsh" ] |
|||
|
|||
FROM --platform=linux/amd64 unified-builder-amd64 AS build-amd64 |
|||
COPY . . |
|||
ARG OLLAMA_SKIP_CUDA_GENERATE |
|||
ARG OLLAMA_SKIP_ROCM_GENERATE |
|||
ARG OLLAMA_FAST_BUILD |
|||
ARG VERSION |
|||
ARG CUSTOM_CPU_FLAGS |
|||
# vim: filetype=dockerfile |
|||
|
|||
ARG FLAVOR=${TARGETARCH} |
|||
|
|||
ARG ROCMVERSION=6.1.2 |
|||
ARG JETPACK5VERSION=r35.4.1 |
|||
ARG JETPACK6VERSION=r36.2.0 |
|||
ARG CMAKEVERSION=3.31.2 |
|||
|
|||
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCMVERSION}-complete AS base-amd64 |
|||
RUN sed -i -e 's/mirror.centos.org/vault.centos.org/g' -e 's/^#.*baseurl=http/baseurl=http/g' -e 's/^mirrorlist=http/#mirrorlist=http/g' /etc/yum.repos.d/*.repo \ |
|||
&& yum install -y yum-utils devtoolset-10-gcc devtoolset-10-gcc-c++ \ |
|||
&& yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo \ |
|||
&& curl -s -L https://github.com/ccache/ccache/releases/download/v4.10.2/ccache-4.10.2-linux-x86_64.tar.xz | tar -Jx -C /usr/local/bin --strip-components 1 |
|||
ENV PATH=/opt/rh/devtoolset-10/root/usr/bin:/opt/rh/devtoolset-11/root/usr/bin:$PATH |
|||
|
|||
FROM --platform=linux/arm64 rockylinux:8 AS base-arm64 |
|||
# install epel-release for ccache |
|||
RUN yum install -y yum-utils epel-release \ |
|||
&& yum install -y clang ccache \ |
|||
&& yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo |
|||
ENV CC=clang CXX=clang++ |
|||
|
|||
FROM base-${TARGETARCH} AS base |
|||
ARG CMAKEVERSION |
|||
RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1 |
|||
COPY CMakeLists.txt CMakePresets.json . |
|||
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml |
|||
ENV LDFLAGS=-s |
|||
|
|||
FROM base AS cpu |
|||
# amd64 uses gcc which requires devtoolset-11 for AVX extensions while arm64 uses clang |
|||
RUN if [ "$(uname -m)" = "x86_64" ]; then yum install -y devtoolset-11-gcc devtoolset-11-gcc-c++; fi |
|||
ENV PATH=/opt/rh/devtoolset-11/root/usr/bin:$PATH |
|||
RUN --mount=type=cache,target=/root/.ccache \ |
|||
if grep "^flags" /proc/cpuinfo|grep avx>/dev/null; then \ |
|||
make -j $(nproc) dist ; \ |
|||
else \ |
|||
make -j 5 dist ; \ |
|||
fi |
|||
RUN cd dist/linux-$GOARCH && \ |
|||
tar -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz |
|||
RUN if [ -z ${OLLAMA_SKIP_ROCM_GENERATE} ] ; then \ |
|||
cd dist/linux-$GOARCH-rocm && \ |
|||
tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-rocm.tgz ;\ |
|||
fi |
|||
|
|||
# Jetsons need to be built in discrete stages |
|||
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_5} AS runners-jetpack5-arm64 |
|||
ARG GOLANG_VERSION |
|||
RUN apt-get update && apt-get install -y git curl ccache && \ |
|||
curl -s -L https://dl.google.com/go/go${GOLANG_VERSION}.linux-arm64.tar.gz | tar xz -C /usr/local && \ |
|||
ln -s /usr/local/go/bin/go /usr/local/bin/go && \ |
|||
ln -s /usr/local/go/bin/gofmt /usr/local/bin/gofmt && \ |
|||
apt-get clean && rm -rf /var/lib/apt/lists/* |
|||
WORKDIR /go/src/github.com/ollama/ollama/ |
|||
COPY . . |
|||
ARG CGO_CFLAGS |
|||
ENV GOARCH arm64 |
|||
ARG VERSION |
|||
cmake --preset 'CPU' \ |
|||
&& cmake --build --parallel --preset 'CPU' \ |
|||
&& cmake --install build --component CPU --strip --parallel 8 |
|||
|
|||
FROM base AS cuda-11 |
|||
ARG CUDA11VERSION=11.3 |
|||
RUN yum install -y cuda-toolkit-${CUDA11VERSION//./-} |
|||
ENV PATH=/usr/local/cuda-11/bin:$PATH |
|||
RUN --mount=type=cache,target=/root/.ccache \ |
|||
make -j 5 dist_cuda_v11 \ |
|||
CUDA_ARCHITECTURES="72;87" \ |
|||
GPU_RUNNER_VARIANT=_jetpack5 \ |
|||
DIST_LIB_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ollama \ |
|||
DIST_GPU_RUNNER_DEPS_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ollama/cuda_jetpack5 |
|||
|
|||
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_6} AS runners-jetpack6-arm64 |
|||
ARG GOLANG_VERSION |
|||
RUN apt-get update && apt-get install -y git curl ccache && \ |
|||
curl -s -L https://dl.google.com/go/go${GOLANG_VERSION}.linux-arm64.tar.gz | tar xz -C /usr/local && \ |
|||
ln -s /usr/local/go/bin/go /usr/local/bin/go && \ |
|||
ln -s /usr/local/go/bin/gofmt /usr/local/bin/gofmt && \ |
|||
apt-get clean && rm -rf /var/lib/apt/lists/* |
|||
WORKDIR /go/src/github.com/ollama/ollama/ |
|||
COPY . . |
|||
ARG CGO_CFLAGS |
|||
ENV GOARCH arm64 |
|||
ARG VERSION |
|||
cmake --preset 'CUDA 11' \ |
|||
&& cmake --build --parallel --preset 'CUDA 11' \ |
|||
&& cmake --install build --component CUDA --strip --parallel 8 |
|||
|
|||
FROM base AS cuda-12 |
|||
ARG CUDA12VERSION=12.4 |
|||
RUN yum install -y cuda-toolkit-${CUDA12VERSION//./-} |
|||
ENV PATH=/usr/local/cuda-12/bin:$PATH |
|||
RUN --mount=type=cache,target=/root/.ccache \ |
|||
make -j 5 dist_cuda_v12 \ |
|||
CUDA_ARCHITECTURES="87" \ |
|||
GPU_RUNNER_VARIANT=_jetpack6 \ |
|||
DIST_LIB_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ollama \ |
|||
DIST_GPU_RUNNER_DEPS_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ollama/cuda_jetpack6 |
|||
cmake --preset 'CUDA 12' \ |
|||
&& cmake --build --parallel --preset 'CUDA 12' \ |
|||
&& cmake --install build --component CUDA --strip --parallel 8 |
|||
|
|||
FROM --platform=linux/arm64 unified-builder-arm64 AS build-arm64 |
|||
COPY . . |
|||
ARG OLLAMA_SKIP_CUDA_GENERATE |
|||
ARG OLLAMA_FAST_BUILD |
|||
ARG VERSION |
|||
FROM base AS rocm-6 |
|||
RUN --mount=type=cache,target=/root/.ccache \ |
|||
make -j 5 dist |
|||
COPY --from=runners-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ |
|||
COPY --from=runners-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ |
|||
RUN cd dist/linux-$GOARCH && \ |
|||
tar -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz |
|||
RUN cd dist/linux-$GOARCH-jetpack5 && \ |
|||
tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-jetpack5.tgz |
|||
RUN cd dist/linux-$GOARCH-jetpack6 && \ |
|||
tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-jetpack6.tgz |
|||
|
|||
FROM --platform=linux/amd64 scratch AS dist-amd64 |
|||
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz / |
|||
FROM --platform=linux/arm64 scratch AS dist-arm64 |
|||
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz / |
|||
FROM dist-$TARGETARCH AS dist |
|||
|
|||
|
|||
# For amd64 container images, filter out cuda/rocm to minimize size |
|||
FROM build-amd64 AS runners-cuda-amd64 |
|||
RUN rm -rf \ |
|||
./dist/linux-amd64/lib/ollama/libggml_hipblas.so \ |
|||
./dist/linux-amd64/lib/ollama/runners/rocm* |
|||
|
|||
FROM build-amd64 AS runners-rocm-amd64 |
|||
RUN rm -rf \ |
|||
./dist/linux-amd64/lib/ollama/libggml_cuda*.so \ |
|||
./dist/linux-amd64/lib/ollama/libcu*.so* \ |
|||
./dist/linux-amd64/lib/ollama/runners/cuda* |
|||
|
|||
FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-amd64 |
|||
RUN apt-get update && \ |
|||
apt-get install -y ca-certificates && \ |
|||
apt-get clean && rm -rf /var/lib/apt/lists/* |
|||
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/ |
|||
COPY --from=runners-cuda-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/ |
|||
|
|||
FROM --platform=linux/arm64 ubuntu:22.04 AS runtime-arm64 |
|||
RUN apt-get update && \ |
|||
apt-get install -y ca-certificates && \ |
|||
apt-get clean && rm -rf /var/lib/apt/lists/* |
|||
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/ |
|||
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/ |
|||
COPY --from=runners-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ /lib/ |
|||
COPY --from=runners-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ /lib/ |
|||
|
|||
|
|||
# ROCm libraries larger so we keep it distinct from the CPU/CUDA image |
|||
FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-rocm |
|||
# Frontload the rocm libraries which are large, and rarely change to increase chance of a common layer |
|||
# across releases |
|||
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64-rocm/lib/ /lib/ |
|||
RUN apt-get update && \ |
|||
apt-get install -y ca-certificates && \ |
|||
apt-get clean && rm -rf /var/lib/apt/lists/* |
|||
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/ |
|||
COPY --from=runners-rocm-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/ |
|||
|
|||
EXPOSE 11434 |
|||
ENV OLLAMA_HOST 0.0.0.0 |
|||
|
|||
ENTRYPOINT ["/bin/ollama"] |
|||
CMD ["serve"] |
|||
|
|||
FROM runtime-$TARGETARCH |
|||
EXPOSE 11434 |
|||
ENV OLLAMA_HOST 0.0.0.0 |
|||
cmake --preset 'ROCm 6' \ |
|||
&& cmake --build --parallel --preset 'ROCm 6' \ |
|||
&& cmake --install build --component HIP --strip --parallel 8 |
|||
|
|||
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK5VERSION} AS jetpack-5 |
|||
ARG CMAKEVERSION |
|||
RUN apt-get update && apt-get install -y curl ccache \ |
|||
&& curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1 |
|||
COPY CMakeLists.txt CMakePresets.json . |
|||
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml |
|||
RUN --mount=type=cache,target=/root/.ccache \ |
|||
cmake --preset 'JetPack 5' \ |
|||
&& cmake --build --parallel --preset 'JetPack 5' \ |
|||
&& cmake --install build --component CUDA --strip --parallel 8 |
|||
|
|||
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK6VERSION} AS jetpack-6 |
|||
ARG CMAKEVERSION |
|||
RUN apt-get update && apt-get install -y curl ccache \ |
|||
&& curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1 |
|||
COPY CMakeLists.txt CMakePresets.json . |
|||
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml |
|||
RUN --mount=type=cache,target=/root/.ccache \ |
|||
cmake --preset 'JetPack 6' \ |
|||
&& cmake --build --parallel --preset 'JetPack 6' \ |
|||
&& cmake --install build --component CUDA --strip --parallel 8 |
|||
|
|||
FROM base AS build |
|||
ARG GOVERSION=1.23.4 |
|||
RUN curl -fsSL https://golang.org/dl/go${GOVERSION}.linux-$(case $(uname -m) in x86_64) echo amd64 ;; aarch64) echo arm64 ;; esac).tar.gz | tar xz -C /usr/local |
|||
ENV PATH=/usr/local/go/bin:$PATH |
|||
WORKDIR /go/src/github.com/ollama/ollama |
|||
COPY . . |
|||
ARG GOFLAGS="'-ldflags=-w -s'" |
|||
ENV CGO_ENABLED=1 |
|||
RUN --mount=type=cache,target=/root/.cache/go-build \ |
|||
go build -trimpath -buildmode=pie -o /bin/ollama . |
|||
|
|||
FROM --platform=linux/amd64 scratch AS amd64 |
|||
COPY --from=cuda-11 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_v11 |
|||
COPY --from=cuda-12 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_v12 |
|||
|
|||
FROM --platform=linux/arm64 scratch AS arm64 |
|||
COPY --from=cuda-11 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_v11 |
|||
COPY --from=cuda-12 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_v12 |
|||
COPY --from=jetpack-5 dist/lib/ollama/cuda_v11 lib/ollama/cuda_jetpack5 |
|||
COPY --from=jetpack-6 dist/lib/ollama/cuda_v12 lib/ollama/cuda_jetpack6 |
|||
|
|||
FROM --platform=linux/arm64 scratch AS rocm |
|||
COPY --from=rocm-6 dist/lib/ollama/rocm /lib/ollama/rocm |
|||
|
|||
FROM ${FLAVOR} AS archive |
|||
COPY --from=cpu dist/lib/ollama /lib/ollama |
|||
COPY --from=build /bin/ollama /bin/ollama |
|||
|
|||
FROM ubuntu:20.04 |
|||
RUN apt-get update \ |
|||
&& apt-get install -y ca-certificates \ |
|||
&& apt-get clean \ |
|||
&& rm -rf /var/lib/apt/lists/* |
|||
COPY --from=archive /bin /usr/bin |
|||
ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin |
|||
COPY --from=archive /lib/ollama /usr/lib/ollama |
|||
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64 |
|||
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility |
|||
ENV NVIDIA_VISIBLE_DEVICES=all |
|||
|
|||
ENV OLLAMA_HOST=0.0.0.0:11434 |
|||
EXPOSE 11434 |
|||
ENTRYPOINT ["/bin/ollama"] |
|||
CMD ["serve"] |
|||
|
|||
@ -1,103 +0,0 @@ |
|||
# top level makefile for Ollama
|
|||
include make/common-defs.make |
|||
|
|||
|
|||
# Determine which if any GPU runners we should build
|
|||
include make/cuda-v11-defs.make |
|||
include make/cuda-v12-defs.make |
|||
include make/rocm-defs.make |
|||
|
|||
ifeq ($(CUSTOM_CPU_FLAGS),) |
|||
ifeq ($(ARCH),amd64) |
|||
RUNNER_TARGETS=cpu |
|||
endif |
|||
# Without CUSTOM_CPU_FLAGS we default to build both v11 and v12 if present
|
|||
ifeq ($(OLLAMA_SKIP_CUDA_GENERATE),) |
|||
ifneq ($(CUDA_11_COMPILER),) |
|||
RUNNER_TARGETS += cuda_v11 |
|||
endif |
|||
ifneq ($(CUDA_12_COMPILER),) |
|||
RUNNER_TARGETS += cuda_v12 |
|||
endif |
|||
endif |
|||
else # CUSTOM_CPU_FLAGS is set, we'll build only the latest cuda version detected
|
|||
ifneq ($(CUDA_12_COMPILER),) |
|||
RUNNER_TARGETS += cuda_v12 |
|||
else ifneq ($(CUDA_11_COMPILER),) |
|||
RUNNER_TARGETS += cuda_v11 |
|||
endif |
|||
endif |
|||
|
|||
ifeq ($(OLLAMA_SKIP_ROCM_GENERATE),) |
|||
ifneq ($(HIP_COMPILER),) |
|||
RUNNER_TARGETS += rocm |
|||
endif |
|||
endif |
|||
|
|||
|
|||
all: runners exe |
|||
|
|||
dist: $(addprefix dist_, $(RUNNER_TARGETS)) dist_exe |
|||
|
|||
dist_%: |
|||
@$(MAKE) --no-print-directory -f make/Makefile.$* dist |
|||
|
|||
runners: $(RUNNER_TARGETS) |
|||
|
|||
$(RUNNER_TARGETS): |
|||
@$(MAKE) --no-print-directory -f make/Makefile.$@ |
|||
|
|||
exe dist_exe: |
|||
@$(MAKE) --no-print-directory -f make/Makefile.ollama $@ |
|||
|
|||
help-sync apply-patches create-patches sync sync-clean: |
|||
@$(MAKE) --no-print-directory -f make/Makefile.sync $@ |
|||
|
|||
test integration lint: |
|||
@$(MAKE) --no-print-directory -f make/Makefile.test $@ |
|||
|
|||
clean: |
|||
rm -rf $(BUILD_DIR) $(DIST_LIB_DIR) $(OLLAMA_EXE) $(DIST_OLLAMA_EXE) |
|||
go clean -cache |
|||
|
|||
help: |
|||
@echo "The following make targets will help you build Ollama" |
|||
@echo "" |
|||
@echo " make all # (default target) Build Ollama llm subprocess runners, and the primary ollama executable" |
|||
@echo " make runners # Build Ollama llm subprocess runners; after you may use 'go build .' to build the primary ollama exectuable" |
|||
@echo " make <runner> # Build specific runners. Enabled: '$(RUNNER_TARGETS)'" |
|||
@echo " make dist # Build the runners and primary ollama executable for distribution" |
|||
@echo " make help-sync # Help information on vendor update targets" |
|||
@echo " make help-runners # Help information on runner targets" |
|||
@echo "" |
|||
@echo "The following make targets will help you test Ollama" |
|||
@echo "" |
|||
@echo " make test # Run unit tests" |
|||
@echo " make integration # Run integration tests. You must 'make all' first" |
|||
@echo " make lint # Run lint and style tests" |
|||
@echo "" |
|||
@echo "For more information see 'docs/development.md'" |
|||
@echo "" |
|||
|
|||
|
|||
help-runners: |
|||
@echo "The following runners will be built based on discovered GPU libraries: '$(RUNNER_TARGETS)'" |
|||
@echo "" |
|||
@echo "GPU Runner CPU Flags: '$(GPU_RUNNER_CPU_FLAGS)' (Override with CUSTOM_CPU_FLAGS)" |
|||
@echo "" |
|||
@echo "# CUDA_PATH sets the location where CUDA toolkits are present" |
|||
@echo "CUDA_PATH=$(CUDA_PATH)" |
|||
@echo " CUDA_11_PATH=$(CUDA_11_PATH)" |
|||
@echo " CUDA_11_COMPILER=$(CUDA_11_COMPILER)" |
|||
@echo " CUDA_12_PATH=$(CUDA_12_PATH)" |
|||
@echo " CUDA_12_COMPILER=$(CUDA_12_COMPILER)" |
|||
@echo "" |
|||
@echo "# HIP_PATH sets the location where the ROCm toolkit is present" |
|||
@echo "HIP_PATH=$(HIP_PATH)" |
|||
@echo " HIP_COMPILER=$(HIP_COMPILER)" |
|||
|
|||
.PHONY: all exe dist help help-sync help-runners test integration lint runners clean $(RUNNER_TARGETS) |
|||
|
|||
# Handy debugging for make variables
|
|||
print-%: |
|||
@echo '$*=$($*)' |
|||
@ -0,0 +1,56 @@ |
|||
UPSTREAM=https://github.com/ggerganov/llama.cpp.git |
|||
WORKDIR=llama/vendor |
|||
FETCH_HEAD=46e3556e01b824e52395fb050b29804b6cff2a7c |
|||
|
|||
.PHONY: help |
|||
help: |
|||
@echo "Available targets:" |
|||
@echo " sync Sync with upstream repositories" |
|||
@echo " checkout Checkout upstream repository" |
|||
@echo " apply-patches Apply patches to local repository" |
|||
@echo " format-patches Format patches from local repository" |
|||
@echo " clean Clean local repository" |
|||
@echo |
|||
@echo "Example:" |
|||
@echo " make -f $(lastword $(MAKEFILE_LIST)) clean sync" |
|||
|
|||
.PHONY: sync |
|||
sync: llama/llama.cpp ml/backend/ggml/ggml apply-patches |
|||
|
|||
.PHONY: llama/llama.cpp |
|||
llama/llama.cpp: llama/vendor/ apply-patches |
|||
rsync -arvzc -f "merge $@/.rsync-filter" $< $@ |
|||
|
|||
.PHONY: ml/backend/ggml/ggml apply-patches |
|||
ml/backend/ggml/ggml: llama/vendor/ggml/ apply-patches |
|||
rsync -arvzc -f "merge $@/.rsync-filter" $< $@ |
|||
|
|||
PATCHES=$(wildcard llama/patches/*.patch) |
|||
|
|||
.PHONY: apply-patches |
|||
.NOTPARALLEL: |
|||
apply-patches: $(addsuffix ed, $(PATCHES)) |
|||
|
|||
%.patched: %.patch |
|||
@if git -c user.name=nobody -c 'user.email=<>' -C $(WORKDIR) am -3 $(realpath $<); then touch $@; else git -C $(WORKDIR) am --abort; exit 1; fi |
|||
|
|||
.PHONY: checkout |
|||
checkout: $(WORKDIR) |
|||
git -C $(WORKDIR) fetch |
|||
git -C $(WORKDIR) checkout -f $(FETCH_HEAD) |
|||
|
|||
$(WORKDIR): |
|||
git clone $(UPSTREAM) $(WORKDIR) |
|||
|
|||
.PHONE: format-patches |
|||
format-patches: llama/patches |
|||
git -C $(WORKDIR) format-patch \
|
|||
--no-signature \
|
|||
--no-numbered \
|
|||
--zero-commit \
|
|||
-o $(realpath $<) \
|
|||
$(FETCH_HEAD) |
|||
|
|||
.PHONE: clean |
|||
clean: checkout |
|||
$(RM) $(addsuffix ed, $(PATCHES)) |
|||
@ -0,0 +1,53 @@ |
|||
package discover |
|||
|
|||
import ( |
|||
"os" |
|||
"path/filepath" |
|||
"runtime" |
|||
) |
|||
|
|||
// LibPath is a path to lookup dynamic libraries
|
|||
// in development it's usually 'build/lib/ollama'
|
|||
// in distribution builds it's 'lib/ollama' on Windows
|
|||
// '../lib/ollama' on Linux and the executable's directory on macOS
|
|||
// note: distribution builds, additional GPU-specific libraries are
|
|||
// found in subdirectories of the returned path, such as
|
|||
// 'cuda_v11', 'cuda_v12', 'rocm', etc.
|
|||
var LibOllamaPath string = func() string { |
|||
exe, err := os.Executable() |
|||
if err != nil { |
|||
return "" |
|||
} |
|||
|
|||
exe, err = filepath.EvalSymlinks(exe) |
|||
if err != nil { |
|||
return "" |
|||
} |
|||
|
|||
libPath := filepath.Dir(exe) |
|||
switch runtime.GOOS { |
|||
case "windows": |
|||
libPath = filepath.Join(filepath.Dir(exe), "lib", "ollama") |
|||
case "linux": |
|||
libPath = filepath.Join(filepath.Dir(exe), "..", "lib", "ollama") |
|||
} |
|||
|
|||
cwd, err := os.Getwd() |
|||
if err != nil { |
|||
return "" |
|||
} |
|||
|
|||
// build paths for development
|
|||
buildPaths := []string{ |
|||
filepath.Join(filepath.Dir(exe), "build", "lib", "ollama"), |
|||
filepath.Join(cwd, "build", "lib", "ollama"), |
|||
} |
|||
|
|||
for _, p := range buildPaths { |
|||
if _, err := os.Stat(p); err == nil { |
|||
return p |
|||
} |
|||
} |
|||
|
|||
return libPath |
|||
}() |
|||
@ -1,34 +0,0 @@ |
|||
/**
|
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#include "ggml-backend.h" |
|||
#include "ggml-cpu-impl.h" |
|||
|
|||
// GGML internal header
|
|||
|
|||
#if defined(__AMX_INT8__) && defined(__AVX512VNNI__) |
|||
ggml_backend_buffer_type_t ggml_backend_amx_buffer_type(void); |
|||
#endif |
|||
@ -1,51 +0,0 @@ |
|||
/**
|
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#pragma once |
|||
|
|||
#include "ggml.h" |
|||
#include "ggml-backend.h" |
|||
|
|||
|
|||
#ifdef __cplusplus |
|||
extern "C" { |
|||
#endif |
|||
|
|||
// backend API
|
|||
GGML_BACKEND_API ggml_backend_t ggml_backend_blas_init(void); |
|||
|
|||
GGML_BACKEND_API bool ggml_backend_is_blas(ggml_backend_t backend); |
|||
|
|||
// number of threads used for conversion to float
|
|||
// for openblas and blis, this will also set the number of threads used for blas operations
|
|||
GGML_BACKEND_API void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads); |
|||
|
|||
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_blas_reg(void); |
|||
|
|||
|
|||
#ifdef __cplusplus |
|||
} |
|||
#endif |
|||
@ -1,34 +0,0 @@ |
|||
/**
|
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#pragma once |
|||
|
|||
#include "ggml-cpu-traits.h" |
|||
#include "ggml.h" |
|||
|
|||
// GGML internal header
|
|||
|
|||
ggml_backend_buffer_type_t ggml_backend_cpu_aarch64_buffer_type(void); |
|||
@ -1,64 +0,0 @@ |
|||
/**
|
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#pragma once |
|||
#include "ggml-backend-impl.h" |
|||
#include "ggml-cpu-impl.h" |
|||
#include "ggml.h" |
|||
|
|||
#ifdef __cplusplus |
|||
# include <vector> |
|||
extern "C" { |
|||
#endif |
|||
|
|||
// return true if op part of extra "accelerator"
|
|||
bool ggml_cpu_extra_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op); |
|||
bool ggml_cpu_extra_work_size(int n_threads, const struct ggml_tensor * op, size_t * size); |
|||
|
|||
#ifdef __cplusplus |
|||
} |
|||
|
|||
namespace ggml::cpu { |
|||
// register in tensor->extra
|
|||
class tensor_traits { |
|||
public: |
|||
virtual ~tensor_traits(); |
|||
virtual bool work_size(int n_threads, const struct ggml_tensor * op, size_t & size) = 0; |
|||
virtual bool compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op) = 0; |
|||
}; |
|||
|
|||
class extra_buffer_type { |
|||
public: |
|||
virtual ~extra_buffer_type(); |
|||
virtual bool supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) = 0; |
|||
virtual tensor_traits * get_tensor_traits(const struct ggml_tensor * op) = 0; |
|||
}; |
|||
} // namespace ggml::cpu
|
|||
|
|||
// implemented in ggml-cpu.cpp.
|
|||
std::vector<ggml_backend_buffer_type_t> & ggml_backend_cpu_get_extra_buffers_type(); |
|||
|
|||
#endif |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#include "common.cuh" |
|||
|
|||
#define CUDA_ACC_BLOCK_SIZE 256 |
|||
|
|||
void ggml_cuda_op_acc(ggml_backend_cuda_context & ctx, ggml_tensor * dst); |
|||
@ -1,60 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#include "arange.cuh" |
|||
|
|||
static __global__ void arange_f32(float * dst, const int ne0, const float start, const float step) { |
|||
// blockIDx.x: idx of ne0 / BLOCK_SIZE |
|||
int nidx = threadIdx.x + blockIdx.x * blockDim.x; |
|||
if (nidx >= ne0) { |
|||
return; |
|||
} |
|||
dst[nidx] = start + step * nidx; |
|||
} |
|||
|
|||
static void arange_f32_cuda(float * dst, const int ne0, const float start, const float step, cudaStream_t stream) { |
|||
int num_blocks = (ne0 + CUDA_ARANGE_BLOCK_SIZE - 1) / CUDA_ARANGE_BLOCK_SIZE; |
|||
arange_f32<<<num_blocks, CUDA_ARANGE_BLOCK_SIZE, 0, stream>>>(dst, ne0, start, step); |
|||
} |
|||
|
|||
void ggml_cuda_op_arange(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { |
|||
float * dst_d = (float *)dst->data; |
|||
cudaStream_t stream = ctx.stream(); |
|||
|
|||
GGML_ASSERT(dst->type == GGML_TYPE_F32); |
|||
|
|||
float start; |
|||
float stop; |
|||
float step; |
|||
memcpy(&start, (float *)dst->op_params + 0, sizeof(float)); |
|||
memcpy(&stop, (float *)dst->op_params + 1, sizeof(float)); |
|||
memcpy(&step, (float *)dst->op_params + 2, sizeof(float)); |
|||
|
|||
int64_t steps = (int64_t)ceil((stop - start) / step); |
|||
GGML_ASSERT(ggml_nelements(dst) == steps); |
|||
|
|||
arange_f32_cuda(dst_d, dst->ne[0], start, step, stream); |
|||
} |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#include "common.cuh" |
|||
|
|||
#define CUDA_ARANGE_BLOCK_SIZE 256 |
|||
|
|||
void ggml_cuda_op_arange(ggml_backend_cuda_context & ctx, ggml_tensor * dst); |
|||
@ -1,29 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#include "common.cuh" |
|||
|
|||
void ggml_cuda_argmax(ggml_backend_cuda_context & ctx, ggml_tensor * dst); |
|||
@ -1,29 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#include "common.cuh" |
|||
|
|||
void ggml_cuda_op_argsort(ggml_backend_cuda_context & ctx, ggml_tensor * dst); |
|||
@ -1,35 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#include "common.cuh" |
|||
|
|||
void ggml_cuda_op_repeat(ggml_backend_cuda_context & ctx, ggml_tensor * dst); |
|||
void ggml_cuda_op_add(ggml_backend_cuda_context & ctx, ggml_tensor * dst); |
|||
void ggml_cuda_op_sub(ggml_backend_cuda_context & ctx, ggml_tensor * dst); |
|||
void ggml_cuda_op_mul(ggml_backend_cuda_context & ctx, ggml_tensor * dst); |
|||
void ggml_cuda_op_div(ggml_backend_cuda_context & ctx, ggml_tensor * dst); |
|||
|
|||
void ggml_cuda_op_repeat_back(ggml_backend_cuda_context & ctx, ggml_tensor * dst); |
|||
@ -1,60 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#include "clamp.cuh" |
|||
|
|||
static __global__ void clamp_f32(const float * x, float * dst, const float min, const float max, const int k) { |
|||
const int i = blockDim.x*blockIdx.x + threadIdx.x; |
|||
|
|||
if (i >= k) { |
|||
return; |
|||
} |
|||
|
|||
dst[i] = x[i] < min ? min : (x[i] > max ? max : x[i]); |
|||
} |
|||
|
|||
static void clamp_f32_cuda(const float * x, float * dst, const float min, const float max, const int k, cudaStream_t stream) { |
|||
const int num_blocks = (k + CUDA_CLAMP_BLOCK_SIZE - 1) / CUDA_CLAMP_BLOCK_SIZE; |
|||
clamp_f32<<<num_blocks, CUDA_CLAMP_BLOCK_SIZE, 0, stream>>>(x, dst, min, max, k); |
|||
} |
|||
|
|||
|
|||
void ggml_cuda_op_clamp(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { |
|||
const ggml_tensor * src0 = dst->src[0]; |
|||
const float * src0_d = (const float *)src0->data; |
|||
float * dst_d = (float *)dst->data; |
|||
cudaStream_t stream = ctx.stream(); |
|||
|
|||
GGML_ASSERT(src0->type == GGML_TYPE_F32); |
|||
GGML_ASSERT( dst->type == GGML_TYPE_F32); |
|||
|
|||
float min; |
|||
float max; |
|||
memcpy(&min, dst->op_params, sizeof(float)); |
|||
memcpy(&max, (float *) dst->op_params + 1, sizeof(float)); |
|||
|
|||
clamp_f32_cuda(src0_d, dst_d, min, max, ggml_nelements(src0), stream); |
|||
} |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#include "common.cuh" |
|||
|
|||
#define CUDA_CLAMP_BLOCK_SIZE 256 |
|||
|
|||
void ggml_cuda_op_clamp(ggml_backend_cuda_context & ctx, ggml_tensor * dst); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#include "common.cuh" |
|||
|
|||
#define CUDA_CONCAT_BLOCK_SIZE 256 |
|||
|
|||
void ggml_cuda_op_concat(ggml_backend_cuda_context & ctx, ggml_tensor * dst); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#include "common.cuh" |
|||
|
|||
#define CUDA_CONV_TRANPOSE_1D_BLOCK_SIZE 256 |
|||
|
|||
void ggml_cuda_op_conv_transpose_1d(ggml_backend_cuda_context & ctx, ggml_tensor * dst); |
|||
@ -1,39 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#include "common.cuh" |
|||
|
|||
#define CUDA_DEQUANTIZE_BLOCK_SIZE 256 |
|||
|
|||
template<typename T> |
|||
using to_t_cuda_t = void (*)(const void * __restrict__ x, T * __restrict__ y, int64_t k, cudaStream_t stream); |
|||
|
|||
typedef to_t_cuda_t<float> to_fp32_cuda_t; |
|||
typedef to_t_cuda_t<half> to_fp16_cuda_t; |
|||
|
|||
to_fp16_cuda_t ggml_get_to_fp16_cuda(ggml_type type); |
|||
|
|||
to_fp32_cuda_t ggml_get_to_fp32_cuda(ggml_type type); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#include "common.cuh" |
|||
|
|||
#define CUDA_COUNT_EQUAL_CHUNK_SIZE 128 |
|||
|
|||
void ggml_cuda_count_equal(ggml_backend_cuda_context & ctx, ggml_tensor * dst); |
|||
@ -1,35 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#include "common.cuh" |
|||
|
|||
#define CUDA_CPY_BLOCK_SIZE 64 |
|||
|
|||
void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, ggml_tensor * src1); |
|||
|
|||
void ggml_cuda_dup(ggml_backend_cuda_context & ctx, ggml_tensor * dst); |
|||
|
|||
void* ggml_cuda_cpy_fn(const ggml_tensor * src0, ggml_tensor * src1); |
|||
@ -1,33 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#include "common.cuh" |
|||
|
|||
#define CUDA_CROSS_ENTROPY_LOSS_BLOCK_SIZE 256 |
|||
|
|||
void ggml_cuda_cross_entropy_loss(ggml_backend_cuda_context & ctx, ggml_tensor * dst); |
|||
|
|||
void ggml_cuda_cross_entropy_loss_back(ggml_backend_cuda_context & ctx, ggml_tensor * dst); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#include "common.cuh" |
|||
|
|||
#define CUDA_DIAG_MASK_INF_BLOCK_SIZE 32 |
|||
|
|||
void ggml_cuda_op_diag_mask_inf(ggml_backend_cuda_context & ctx, ggml_tensor * dst); |
|||
@ -1,29 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#include "common.cuh" |
|||
|
|||
void ggml_cuda_flash_attn_ext_tile_f16(ggml_backend_cuda_context & ctx, ggml_tensor * dst); |
|||
@ -1,29 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#include "common.cuh" |
|||
|
|||
void ggml_cuda_flash_attn_ext_tile_f32(ggml_backend_cuda_context & ctx, ggml_tensor * dst); |
|||
@ -1,29 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#include "common.cuh" |
|||
|
|||
void ggml_cuda_flash_attn_ext(ggml_backend_cuda_context & ctx, ggml_tensor * dst); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#include "common.cuh" |
|||
|
|||
#define CUDA_GET_ROWS_BLOCK_SIZE 256 |
|||
|
|||
void ggml_cuda_op_get_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#include "common.cuh" |
|||
|
|||
#define CUDA_IM2COL_BLOCK_SIZE 256 |
|||
|
|||
void ggml_cuda_op_im2col(ggml_backend_cuda_context & ctx, ggml_tensor * dst); |
|||
@ -1,38 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#include "common.cuh" |
|||
|
|||
// maximum number of src0 rows with which to use mul_mat_vec over cuBLAS if FP16 tensor cores are available |
|||
#define MMV_MAX_ROWS 512 |
|||
|
|||
void ggml_cuda_mul_mat_vec(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); |
|||
|
|||
void ggml_cuda_op_mul_mat_vec( |
|||
ggml_backend_cuda_context & ctx, |
|||
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, const char * src0_dd_i, const float * src1_ddf_i, |
|||
const char * src1_ddq_i, float * dst_dd_i, const int64_t row_low, const int64_t row_high, const int64_t src1_ncols, |
|||
const int64_t src1_padded_row_size, cudaStream_t stream); |
|||
@ -1,35 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#include "common.cuh" |
|||
|
|||
#define MMVQ_MAX_BATCH_SIZE 8 // Max. batch size for which to use MMVQ kernels. |
|||
|
|||
void ggml_cuda_op_mul_mat_vec_q( |
|||
ggml_backend_cuda_context & ctx, |
|||
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, const char * src0_dd_i, const float * src1_ddf_i, |
|||
const char * src1_ddq_i, float * dst_dd_i, const int64_t row_low, const int64_t row_high, const int64_t src1_ncols, |
|||
const int64_t src1_padded_row_size, cudaStream_t stream); |
|||
@ -1,33 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#include "common.cuh" |
|||
|
|||
void ggml_cuda_op_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst); |
|||
|
|||
void ggml_cuda_op_group_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst); |
|||
|
|||
void ggml_cuda_op_rms_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#include "common.cuh" |
|||
|
|||
#define CUDA_OPT_STEP_ADAMW_BLOCK_SIZE 256 |
|||
|
|||
void ggml_cuda_opt_step_adamw(ggml_backend_cuda_context & ctx, ggml_tensor * dst); |
|||
@ -1,29 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#include "common.cuh" |
|||
|
|||
void ggml_cuda_out_prod(ggml_backend_cuda_context & ctx, ggml_tensor * dst); |
|||
@ -1,32 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#include "common.cuh" |
|||
|
|||
#define CUDA_PAD_BLOCK_SIZE 256 |
|||
|
|||
void ggml_cuda_op_pad(ggml_backend_cuda_context & ctx, ggml_tensor * dst); |
|||
void ggml_cuda_op_unpad(ggml_backend_cuda_context & ctx, ggml_tensor * dst); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#include "common.cuh" |
|||
|
|||
#define CUDA_POOL2D_BLOCK_SIZE 256 |
|||
|
|||
void ggml_cuda_op_pool2d(ggml_backend_cuda_context & ctx, ggml_tensor * dst); |
|||
@ -1,50 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#pragma once |
|||
|
|||
#include "common.cuh" |
|||
#include "mmq.cuh" |
|||
|
|||
#include <cstdint> |
|||
|
|||
#define CUDA_QUANTIZE_BLOCK_SIZE 256 |
|||
#define CUDA_QUANTIZE_BLOCK_SIZE_MMQ 128 |
|||
|
|||
static_assert(MATRIX_ROW_PADDING % CUDA_QUANTIZE_BLOCK_SIZE == 0, "Risk of out-of-bounds access."); |
|||
static_assert(MATRIX_ROW_PADDING % (4*CUDA_QUANTIZE_BLOCK_SIZE_MMQ) == 0, "Risk of out-of-bounds access."); |
|||
|
|||
typedef void (*quantize_cuda_t)( |
|||
const float * x, void * vy, const int64_t kx0, const int64_t kx1, const int64_t channels, const int64_t kx0_padded, |
|||
const ggml_type type_x, cudaStream_t stream); |
|||
|
|||
void quantize_row_q8_1_cuda( |
|||
const float * x, void * vy, const int64_t kx0, const int64_t kx1, const int64_t channels, const int64_t kx0_padded, |
|||
const ggml_type type_x, cudaStream_t stream); |
|||
|
|||
void quantize_mmq_q8_1_cuda( |
|||
const float * x, void * vy, const int64_t kx0, const int64_t kx1, const int64_t channels, const int64_t kx0_padded, |
|||
const ggml_type type_x, cudaStream_t stream); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#include "common.cuh" |
|||
|
|||
#define CUDA_ROPE_BLOCK_SIZE 256 |
|||
|
|||
void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst); |
|||
@ -1,57 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#include "scale.cuh" |
|||
|
|||
static __global__ void scale_f32(const float * x, float * dst, const float scale, const int k) { |
|||
const int i = blockDim.x*blockIdx.x + threadIdx.x; |
|||
|
|||
if (i >= k) { |
|||
return; |
|||
} |
|||
|
|||
dst[i] = scale * x[i]; |
|||
} |
|||
|
|||
static void scale_f32_cuda(const float * x, float * dst, const float scale, const int k, cudaStream_t stream) { |
|||
const int num_blocks = (k + CUDA_SCALE_BLOCK_SIZE - 1) / CUDA_SCALE_BLOCK_SIZE; |
|||
scale_f32<<<num_blocks, CUDA_SCALE_BLOCK_SIZE, 0, stream>>>(x, dst, scale, k); |
|||
} |
|||
|
|||
void ggml_cuda_op_scale(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { |
|||
const ggml_tensor * src0 = dst->src[0]; |
|||
const float * src0_d = (const float *)src0->data; |
|||
float * dst_d = (float *)dst->data; |
|||
cudaStream_t stream = ctx.stream(); |
|||
|
|||
GGML_ASSERT(src0->type == GGML_TYPE_F32); |
|||
GGML_ASSERT( dst->type == GGML_TYPE_F32); |
|||
|
|||
float scale; |
|||
memcpy(&scale, dst->op_params, sizeof(float)); |
|||
|
|||
scale_f32_cuda(src0_d, dst_d, scale, ggml_nelements(src0), stream); |
|||
} |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#include "common.cuh" |
|||
|
|||
#define CUDA_SCALE_BLOCK_SIZE 256 |
|||
|
|||
void ggml_cuda_op_scale(ggml_backend_cuda_context & ctx, ggml_tensor * dst); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#include "common.cuh" |
|||
|
|||
#define CUDA_SOFT_MAX_BLOCK_SIZE 1024 |
|||
|
|||
void ggml_cuda_op_soft_max(ggml_backend_cuda_context & ctx, ggml_tensor * dst); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#include "common.cuh" |
|||
|
|||
void sum_f32_cuda(ggml_cuda_pool & pool, const float * x, float * dst, const int64_t ne, cudaStream_t stream); |
|||
|
|||
void ggml_cuda_op_sum(ggml_backend_cuda_context & ctx, ggml_tensor * dst); |
|||
@ -1,65 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#include "sumrows.cuh" |
|||
|
|||
static __global__ void k_sum_rows_f32(const float * x, float * dst, const int ncols) { |
|||
const int row = blockIdx.x; |
|||
const int col = threadIdx.x; |
|||
|
|||
float sum = 0.0f; |
|||
for (int i = col; i < ncols; i += blockDim.x) { |
|||
sum += x[row * ncols + i]; |
|||
} |
|||
|
|||
sum = warp_reduce_sum(sum); |
|||
|
|||
if (col == 0) { |
|||
dst[row] = sum; |
|||
} |
|||
} |
|||
|
|||
void sum_rows_f32_cuda(const float * x, float * dst, const int ncols, const int nrows, cudaStream_t stream) { |
|||
const dim3 block_dims(WARP_SIZE, 1, 1); |
|||
const dim3 block_nums(nrows, 1, 1); |
|||
k_sum_rows_f32<<<block_nums, block_dims, 0, stream>>>(x, dst, ncols); |
|||
} |
|||
|
|||
void ggml_cuda_op_sum_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { |
|||
const ggml_tensor * src0 = dst->src[0]; |
|||
const float * src0_d = (const float *)src0->data; |
|||
float * dst_d = (float *)dst->data; |
|||
cudaStream_t stream = ctx.stream(); |
|||
|
|||
GGML_ASSERT(src0->type == GGML_TYPE_F32); |
|||
GGML_ASSERT( dst->type == GGML_TYPE_F32); |
|||
GGML_ASSERT(ggml_is_contiguous(src0)); |
|||
|
|||
const int64_t ncols = src0->ne[0]; |
|||
const int64_t nrows = ggml_nrows(src0); |
|||
|
|||
sum_rows_f32_cuda(src0_d, dst_d, ncols, nrows, stream); |
|||
} |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
#include "common.cuh" |
|||
|
|||
void sum_rows_f32_cuda(const float * x, float * dst, const int ncols, const int nrows, cudaStream_t stream); |
|||
|
|||
void ggml_cuda_op_sum_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_F16); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q4_0); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q4_1); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q5_0); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q5_1); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q8_0); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_F16); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q4_0); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q4_1); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q5_0); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q5_1); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q8_0); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_F16); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q4_0); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q4_1); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q5_0); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q5_1); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q8_0); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_F16); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q4_0); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q4_1); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q5_0); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q5_1); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q8_0); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_F16); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q4_0); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q4_1); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q5_0); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q5_1); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q8_0); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_F16); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q4_0); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q4_1); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q5_0); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q5_1); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q8_0); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(256, GGML_TYPE_F16, GGML_TYPE_F16); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_F16); |
|||
@ -1,31 +0,0 @@ |
|||
/** |
|||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file |
|||
* |
|||
* MIT License |
|||
* |
|||
* Copyright (c) 2023-2024 The ggml authors |
|||
* |
|||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
* of this software and associated documentation files (the "Software"), to deal |
|||
* in the Software without restriction, including without limitation the rights |
|||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
* copies of the Software, and to permit persons to whom the Software is |
|||
* furnished to do so, subject to the following conditions: |
|||
* |
|||
* The above copyright notice and this permission notice shall be included in all |
|||
* copies or substantial portions of the Software. |
|||
* |
|||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
* SOFTWARE. |
|||
*/ |
|||
|
|||
// This file has been autogenerated by generate_cu_files.py, do not edit manually. |
|||
|
|||
#include "../fattn-vec-f16.cuh" |
|||
|
|||
DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q4_0); |
|||
Some files were not shown because too many files changed in this diff
Loading…
Reference in new issue