diff --git a/c/coreslam_neon.c b/c/coreslam_neon.c deleted file mode 100644 index c5bed53..0000000 --- a/c/coreslam_neon.c +++ /dev/null @@ -1,117 +0,0 @@ -/* -coreslam_neon.c ARM Cortex Neon acceleration for CoreSLAM - -Copyright (C) 2014 by Simon D. Levy - -This code is free software: you can redistribute it and/or modify -it under the terms of the GNU Lesser General Public License as -published by the Free Software Foundation, either version 3 of the -License, or (at your option) any later version. - -This code is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU Lesser General Public License -along with this code. If not, see . -*/ - - -#ifdef _MSC_VER -typedef __int64 int64_t; /* Define it from MSVC's internal type */ -#else -#include /* Use the C99 official header */ -#endif - -#include -#include - -#include - -#include "coreslam.h" -#include "coreslam_internals.h" - -/* Performs one rotation/translation */ -static void -neon_coord_4( - float32x4_t a_4, - float32x4_t b_4, - float32x4_t x_4, - float32x4_t y_4, - float32x4_t pos_4f, - float32x4_t point5_4, - int * result) -{ - float32x4_t tmp1 = vmulq_f32(a_4, x_4); - float32x4_t tmp2 = vmulq_f32(b_4, y_4); - tmp2 = vaddq_f32(tmp1, tmp2); - tmp2 = vaddq_f32(tmp2, pos_4f); - tmp2 = vaddq_f32(tmp2, point5_4); - int32x4_t c_4 = vcvtq_s32_f32(tmp2); - vst1q_s32(result, c_4); -} - - -int -distance_scan_to_map( - map_t * map, - scan_t * scan, - position_t position) -{ - /* Pre-compute sine and cosine of angle for rotation */ - double position_theta_radians = radians(position.theta_degrees); - double costheta = cos(position_theta_radians) * map->scale_pixels_per_mm; - double sintheta = sin(position_theta_radians) * map->scale_pixels_per_mm; - - /* Pre-compute pixel offset for translation */ - double pos_x_pix = position.x_mm * map->scale_pixels_per_mm; - double pos_y_pix = position.y_mm * map->scale_pixels_per_mm; - - - float32x4_t half_4 = vdupq_n_f32(0.5); - - float32x4_t costheta_4 = vdupq_n_f32(costheta); - float32x4_t sintheta_4 = vdupq_n_f32(sintheta); - float32x4_t nsintheta_4 = vdupq_n_f32(-sintheta); - - float32x4_t pos_x_4 = vdupq_n_f32(pos_x_pix); - float32x4_t pos_y_4 = vdupq_n_f32(pos_y_pix); - - int npoints = 0; /* number of points where scan matches map */ - int64_t sum = 0; - - /* Stride by 4 over obstacle points in scan */ - int i = 0; - for (i=0; iobst_npoints; i+=4) - { - /* Duplicate current obstacle point X and Y in 128-bit registers */ - float32x4_t scan_x_4 = vld1q_f32(&scan->obst_x_mm[i]); - float32x4_t scan_y_4 = vld1q_f32(&scan->obst_y_mm[i]); - - /* Compute X coordinate of 4 rotated / translated points at once */ - int xarr[4]; - neon_coord_4(costheta_4, nsintheta_4, scan_x_4, scan_y_4, pos_x_4, half_4, xarr); - - /* Compute Y coordinate of 4 rotated / translated points at once */ - int yarr[4]; - neon_coord_4(sintheta_4, costheta_4, scan_x_4, scan_y_4, pos_y_4, half_4, yarr); - - /* Handle rotated/translated points serially */ - int j; - for (j=0; j<4 && (i+j)obst_npoints; ++j) - { - int x = xarr[j]; - int y = yarr[j]; - - /* Add point if in map bounds */ - if (x >= 0 && x < map->size_pixels && y >= 0 && y < map->size_pixels) - { - sum += map->pixels[y * map->size_pixels + x]; - npoints++; - } - } - } - - return npoints ? (int)(sum * 1024 / npoints) : -1; -}