Create coreslam_armv7l.c
This commit is contained in:
117
c/coreslam_armv7l.c
Normal file
117
c/coreslam_armv7l.c
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
/*
|
||||||
|
coreslam_armv7l.c.c ARM Cortex Neon acceleration for CoreSLAM
|
||||||
|
|
||||||
|
Copyright (C) 2014 by Simon D. Levy
|
||||||
|
|
||||||
|
This code is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Lesser General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This code is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public License
|
||||||
|
along with this code. If not, see <http:#www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
typedef __int64 int64_t; /* Define it from MSVC's internal type */
|
||||||
|
#else
|
||||||
|
#include <stdint.h> /* Use the C99 official header */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include <arm_neon.h>
|
||||||
|
|
||||||
|
#include "coreslam.h"
|
||||||
|
#include "coreslam_internals.h"
|
||||||
|
|
||||||
|
/* Performs one rotation/translation */
|
||||||
|
static void
|
||||||
|
neon_coord_4(
|
||||||
|
float32x4_t a_4,
|
||||||
|
float32x4_t b_4,
|
||||||
|
float32x4_t x_4,
|
||||||
|
float32x4_t y_4,
|
||||||
|
float32x4_t pos_4f,
|
||||||
|
float32x4_t point5_4,
|
||||||
|
int * result)
|
||||||
|
{
|
||||||
|
float32x4_t tmp1 = vmulq_f32(a_4, x_4);
|
||||||
|
float32x4_t tmp2 = vmulq_f32(b_4, y_4);
|
||||||
|
tmp2 = vaddq_f32(tmp1, tmp2);
|
||||||
|
tmp2 = vaddq_f32(tmp2, pos_4f);
|
||||||
|
tmp2 = vaddq_f32(tmp2, point5_4);
|
||||||
|
int32x4_t c_4 = vcvtq_s32_f32(tmp2);
|
||||||
|
vst1q_s32(result, c_4);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
distance_scan_to_map(
|
||||||
|
map_t * map,
|
||||||
|
scan_t * scan,
|
||||||
|
position_t position)
|
||||||
|
{
|
||||||
|
/* Pre-compute sine and cosine of angle for rotation */
|
||||||
|
double position_theta_radians = radians(position.theta_degrees);
|
||||||
|
double costheta = cos(position_theta_radians) * map->scale_pixels_per_mm;
|
||||||
|
double sintheta = sin(position_theta_radians) * map->scale_pixels_per_mm;
|
||||||
|
|
||||||
|
/* Pre-compute pixel offset for translation */
|
||||||
|
double pos_x_pix = position.x_mm * map->scale_pixels_per_mm;
|
||||||
|
double pos_y_pix = position.y_mm * map->scale_pixels_per_mm;
|
||||||
|
|
||||||
|
|
||||||
|
float32x4_t half_4 = vdupq_n_f32(0.5);
|
||||||
|
|
||||||
|
float32x4_t costheta_4 = vdupq_n_f32(costheta);
|
||||||
|
float32x4_t sintheta_4 = vdupq_n_f32(sintheta);
|
||||||
|
float32x4_t nsintheta_4 = vdupq_n_f32(-sintheta);
|
||||||
|
|
||||||
|
float32x4_t pos_x_4 = vdupq_n_f32(pos_x_pix);
|
||||||
|
float32x4_t pos_y_4 = vdupq_n_f32(pos_y_pix);
|
||||||
|
|
||||||
|
int npoints = 0; /* number of points where scan matches map */
|
||||||
|
int64_t sum = 0;
|
||||||
|
|
||||||
|
/* Stride by 4 over obstacle points in scan */
|
||||||
|
int i = 0;
|
||||||
|
for (i=0; i<scan->obst_npoints; i+=4)
|
||||||
|
{
|
||||||
|
/* Duplicate current obstacle point X and Y in 128-bit registers */
|
||||||
|
float32x4_t scan_x_4 = vld1q_f32(&scan->obst_x_mm[i]);
|
||||||
|
float32x4_t scan_y_4 = vld1q_f32(&scan->obst_y_mm[i]);
|
||||||
|
|
||||||
|
/* Compute X coordinate of 4 rotated / translated points at once */
|
||||||
|
int xarr[4];
|
||||||
|
neon_coord_4(costheta_4, nsintheta_4, scan_x_4, scan_y_4, pos_x_4, half_4, xarr);
|
||||||
|
|
||||||
|
/* Compute Y coordinate of 4 rotated / translated points at once */
|
||||||
|
int yarr[4];
|
||||||
|
neon_coord_4(sintheta_4, costheta_4, scan_x_4, scan_y_4, pos_y_4, half_4, yarr);
|
||||||
|
|
||||||
|
/* Handle rotated/translated points serially */
|
||||||
|
int j;
|
||||||
|
for (j=0; j<4 && (i+j)<scan->obst_npoints; ++j)
|
||||||
|
{
|
||||||
|
int x = xarr[j];
|
||||||
|
int y = yarr[j];
|
||||||
|
|
||||||
|
/* Add point if in map bounds */
|
||||||
|
if (x >= 0 && x < map->size_pixels && y >= 0 && y < map->size_pixels)
|
||||||
|
{
|
||||||
|
sum += map->pixels[y * map->size_pixels + x];
|
||||||
|
npoints++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return npoints ? (int)(sum * 1024 / npoints) : -1;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user