0
点赞
收藏
分享

微信扫一扫

【MySQL】表的操作【有关表结构的操作】【创建、查看、删除、修改表结构】

weipeng2k 2024-09-12 阅读 42
linux昇腾

目的

从服务器将数据拷贝到昇腾310 ai卡

结论

数据进入到服务器后的内存不能直接用昇腾acl接口拷贝到AI卡。

需要

1)先用acl接口申请内存;

2) 将数据拷贝到acl申请的内存

3)用acl接口将数据拷贝到AI卡

makefile

# Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved.

# CMake lowest version requirement
cmake_minimum_required(VERSION 3.5.1)

# project information
project(mem310p)

set(LIB_PATH $ENV{NPU_HOST_LIB})
# Dynamic libraries in the stub directory can only be used for compilation
if (NOT DEFINED ENV{NPU_HOST_LIB})
    #set(LIB_PATH "/usr/local/Ascend/ascend-toolkit/latest/CANN-6.4/runtime/lib64/stub/aarch64")
    set(LIB_PATH "/usr/local/Ascend/nnrt/latest/runtime/lib64/stub/")
    message(STATUS "set default LIB_PATH: ${LIB_PATH}")
else ()
    message(STATUS "env LIB_PATH: ${LIB_PATH}")
endif()

set(INC_PATH $ENV{NPU_HOST_INC})
# Dynamic libraries in the stub directory can only be used for compilation
if (NOT DEFINED ENV{NPU_HOST_INC})
    #set(INC_PATH "/usr/local/Ascend/ascend-toolkit/latest/CANN-6.4/runtime/include")
    # set(INC_PATH "/usr/local/Ascend/nnrt/6.2.RC2/aarch64-linux/include")
    #  set(INC_PATH "/usr/local/Ascend/nnrt/7.0.0/aarch64-linux/include")
     set(INC_PATH "/usr/local/Ascend/nnrt/latest/aarch64-linux/include")
    message(STATUS "set default INC_PATH: ${INC_PATH}")
else ()
    message(STATUS "env INC_PATH: ${INC_PATH}")
endif()

link_directories(
    ${LIB_PATH}
    ${LIB_PATH}/stub
)

add_executable(mem310p

#add_library(zh_sensor SHARED 
  mem310p.c
)


include_directories(mem310p
    ${INC_PATH}
    ${INC_PATH}/acl/media
   
)

add_compile_options(mem310p
    -O2
    -Wall
    -fpic
)

# sns_zh

target_link_libraries(mem310p
    acl_dvpp_mpi   
    ascendcl
    pthread
)

测试代码


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <pthread.h>
#include <signal.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <errno.h>
#include <stdint.h>
#include "acl/acl.h"
#define SAMPLE_PRT(fmt...)   \
    do { \
        printf("[%s]-%d: ", __FUNCTION__, __LINE__); \
        printf(fmt); \
    } while (0)

int SUCCESS = 0;
int FAILED = 1;
aclrtContext g_context      = NULL;
uint32_t g_device_id        = 0;


typedef uint64_t u64;
typedef uint32_t u32;


int32_t setup_acl_device()
{
    aclError aclRet = aclInit(NULL);
    if (aclRet != ACL_SUCCESS)
    {
        SAMPLE_PRT("aclInit fail with %d.\n", aclRet);
        return aclRet;
    }
    SAMPLE_PRT("aclInit succ.\n");

    aclRet = aclrtSetDevice(g_device_id);
    if (aclRet != ACL_SUCCESS)
    {
        SAMPLE_PRT("aclrtSetDevice %u fail with %d.\n", g_device_id, aclRet);
        aclFinalize();
        return aclRet;
    }
    SAMPLE_PRT("aclrtSetDevice(%u) succ.\n", g_device_id);

    aclRet = aclrtCreateContext(&g_context, g_device_id);
    if (aclRet != ACL_SUCCESS)
    {
        SAMPLE_PRT("acl create context failed with %d.\n", aclRet);
        aclrtResetDevice(g_device_id);
        aclFinalize();
        return aclRet;
    }
    SAMPLE_PRT("create context success\n");

    aclRet = aclrtGetCurrentContext(&g_context);
    if (aclRet != ACL_SUCCESS)
    {
        SAMPLE_PRT("get current context failed\n");
        aclrtDestroyContext(g_context);
        g_context = NULL;
        aclrtResetDevice(g_device_id);
        aclFinalize();
        return aclRet;
    }
    SAMPLE_PRT("get current context success\n");

    return SUCCESS;
}

void destroy_acl_device()
{
    if (g_context)
    {
        aclrtDestroyContext(g_context);
        g_context = NULL;
        aclrtResetDevice(g_device_id);
        aclFinalize();
    }
}

struct dev_addr
{
    u64  phy_addr;
    void  *virt_addr;
    u32  reg_len;
    void *mapbase;
};

static void *ioremap_system_mem(struct dev_addr *addr)
{
    void *map_base;
    int fd;
    off_t target;
    target = addr->phy_addr;
    // if ((fd = open("/dev/mem", O_RDWR | O_SYNC)) == -1)
    if ((fd = open("/dev/mem", O_RDWR | O_NONBLOCK)) == -1)
    {
        fprintf(stderr, "Error at line %d, file %s (%d) [%s]\n", \
                __LINE__, __FILE__, errno, strerror(errno));
        return NULL ;
    }

    fflush(stdout);

    map_base = mmap(0, addr->reg_len, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
                    target);

    if (map_base == (void *) -1)
    {
        fprintf(stderr, "Error at line %d, file %s (%d) [%s]\n", \
                __LINE__, __FILE__, errno, strerror(errno));
        return NULL ;
    }
    fflush(stdout);
    addr->mapbase = map_base;
    addr->virt_addr = map_base;//(map_base + (target & MAP_MASK));

    close(fd);
    return addr->virt_addr;
}

//#ifdef  __ARM__
#if 1
void neon_memcpy(volatile void *dst, volatile void *src, int sz)
{
    if (sz & 63)
    {
        sz = (sz & -64) + 64;
    }
    asm volatile(
        "NEONCopyPLD: \n"
        "sub %[dst], %[dst], #64 \n"
        "1: \n"
        "ldnp q0, q1, [%[src]] \n"
        "ldnp q2, q3, [%[src], #32] \n"
        "add %[dst], %[dst], #64 \n"
        "subs %[sz], %[sz], #64 \n"
        "add %[src], %[src], #64 \n"
        "stnp q0, q1, [%[dst]] \n"
        "stnp q2, q3, [%[dst], #32] \n"
        "b.gt 1b \n"
        : [dst]"+r"(dst), [src]"+r"(src), [sz]"+r"(sz) : : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "cc", "memory");
}

#else

#include <arm_neon.h>

void *neon_memcpy(void *dest, void *src, size_t count)
{
    int i;
    unsigned long *s = (unsigned long *)src;
    unsigned long *d = (unsigned long *)dest;
    for (i = 0; i < count / 64; i++)
    {
        vst1q_u64(&d[0], vld1q_u64(&s[0]));
        vst1q_u64(&d[2], vld1q_u64(&s[2]));
        vst1q_u64(&d[4], vld1q_u64(&s[4]));
        vst1q_u64(&d[6], vld1q_u64(&s[6]));
        d += 8;
        s += 8;
    }
    return dest;
}

#endif
//#endif

struct timeval start_time;
struct timeval end_time;
double elapsed_time = 0.0;

static double elapsed(struct timeval start_time, struct timeval end_time)
{
    elapsed_time = (end_time.tv_sec * 1000000 + end_time.tv_usec - start_time.tv_sec
                    * 1000000 - start_time.tv_usec);
    return elapsed_time;
}


int32_t main(int32_t argc, char *argv[])
{

    int ret = SUCCESS;
    aclError aclRet = ACL_SUCCESS;
    // acl资源初始化
    ret = setup_acl_device();
    if (ret != SUCCESS)
    {
        SAMPLE_PRT("Setup Device failed! ret code:%#x\n", ret);
        return FAILED;
    }
    struct dev_addr atu_addr;
    //atu_addr.phy_addr = 0x2000000000;
    atu_addr.phy_addr = 0x2200000000;  //0x22 0000 0000
    atu_addr.virt_addr = 0;
    atu_addr.reg_len = 0x2000000;


    size_t g_memory_size        = 4752 * 3795;
    if (NULL == ioremap_system_mem(&atu_addr))
    {
        printf("ioremap fail\n");
        return -1;
    }
    void *host_buffer = NULL;
    void *host_buffer1 = NULL;
    void *device_buffer = NULL;

    void *my_host_buffer = NULL;
    my_host_buffer = malloc(g_memory_size);
    if (NULL == my_host_buffer)
    {
        printf("malloc fail\n");
        return 0;
    }
    memset((unsigned char *)my_host_buffer, 0xaa, g_memory_size);

    memset((unsigned char *)atu_addr.virt_addr, 0x55, g_memory_size);
    // 申请device内存
    aclRet = aclrtMalloc(&device_buffer, g_memory_size, ACL_MEM_MALLOC_HUGE_FIRST);
    if (aclRet != ACL_SUCCESS)
    {
        SAMPLE_PRT("aclrtMalloc failed\n");

    }

    // 申请host内存
    aclRet = aclrtMallocHost(&host_buffer, g_memory_size);
    if (aclRet != ACL_SUCCESS)
    {
        SAMPLE_PRT("aclrtMallocHost failed\n");

    }

    // 申请host内存
    aclRet = aclrtMallocHost(&host_buffer1, g_memory_size);
    if (aclRet != ACL_SUCCESS)
    {
        SAMPLE_PRT("aclrtMallocHost failed\n");

    }
    gettimeofday(&(start_time), NULL);
    memcpy(host_buffer1, (unsigned char *)atu_addr.virt_addr, g_memory_size);
    gettimeofday(&(end_time), NULL);
    elapsed_time = elapsed(start_time, end_time);

    // 内存拷贝H2D
    aclRet = aclrtMemcpy(device_buffer, g_memory_size,
                         host_buffer1 /* atu_addr.virt_addr*/, g_memory_size, ACL_MEMCPY_HOST_TO_DEVICE);
    if (aclRet != ACL_SUCCESS)
    {
        SAMPLE_PRT("aclrtMemcpy H2D failed\n");

    }


    printf("....,dma time %f\n", elapsed_time);

    // 内存拷贝D2H
    aclRet = aclrtMemcpy(host_buffer, g_memory_size, device_buffer,
                         g_memory_size, ACL_MEMCPY_DEVICE_TO_HOST);
    if (aclRet != ACL_SUCCESS)
    {
        SAMPLE_PRT("aclrtMemcpy D2H failed\n");

    }
    SAMPLE_PRT("\data  is %x\n",  *(unsigned char *)host_buffer);

    if (munmap(atu_addr.mapbase, atu_addr.reg_len) == -1)
    {
        printf("un ioremap fail\n");
        return -1;
    }

    // 释放al资源
    destroy_acl_device();
    SAMPLE_PRT("run success!\n");
    return SUCCESS;
}

上述测试代码,测试从mmap映射的内存到acl分配的内存,耗时70几个ms,这是不正确的。

耗时分析

待续。。。。

举报

相关推荐

0 条评论