Files
breezyslam/c/coreslam_i686.c
2014-09-07 20:48:10 -04:00

118 lines
3.5 KiB
C

/*
coreslam_i686.c Intel Streaming SIMD Extensions for CoreSLAM
Based on
@InProceedings{,
author={El Hamzaoui, O. and Steux, B.},
title={A fast scan matching for grid-based laser SLAM using streaming SIMD extensions},
booktitle={Control Automation Robotics Vision (ICARCV), 2010 11th International Conference on},
year={2010},
month={Dec},
pages={1986-1990}
}
Copyright (C) 2014 Simon D. Levy
This code is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This code is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this code. If not, see <http:#www.gnu.org/licenses/>.
*/
#ifdef _MSC_VER
typedef __int64 int64_t; /* Define it from MSVC's internal type */
#else
#include <stdint.h> /* Use the C99 official header */
#endif
#include <math.h>
#include "coreslam.h"
#include "coreslam_internals.h"
#include <pmmintrin.h>
#include <xmmintrin.h>
#include <mmintrin.h>
/* This structure supports extracting two 32-bit integer coordinates from a 64-bit register */
typedef union
{
struct
{
int y;
int x;
} pos;
__m64 mmx;
} cs_pos_mmx_t;
int
distance_scan_to_map(
map_t * map,
scan_t * scan,
position_t position)
{
int npoints = 0; /* number of points where scan matches map */
int64_t sum = 0; /* sum of map values at those points */
/* Pre-compute sine and cosine of angle for rotation */
double position_theta_radians = radians(position.theta_degrees);
double costheta = cos(position_theta_radians) * map->scale_pixels_per_mm;
double sintheta = sin(position_theta_radians) * map->scale_pixels_per_mm;
/* Pre-compute pixel offset for translation */
double pos_x_pix = position.x_mm * map->scale_pixels_per_mm;
double pos_y_pix = position.y_mm * map->scale_pixels_per_mm;
__m128 sincos128 = _mm_set_ps (costheta, -sintheta, sintheta, costheta);
__m128 posxy128 = _mm_set_ps (pos_x_pix, pos_y_pix, pos_x_pix, pos_y_pix);
int i = 0;
for (i=0; i<scan->npoints; i++)
{
/* Consider only scan points representing obstacles */
if (scan->value[i] == OBSTACLE)
{
/* Compute coordinate pair using SSE */
__m128 xy128 = _mm_set_ps (scan->x_mm[i], scan->y_mm[i], scan->x_mm[i], scan->y_mm[i]);
xy128 = _mm_mul_ps(sincos128, xy128);
xy128 = _mm_hadd_ps(xy128, xy128);
xy128 = _mm_add_ps(xy128, posxy128);
cs_pos_mmx_t pos;
pos.mmx = _mm_cvtps_pi32(xy128);
/* Extract coordinates */
int x = pos.pos.x;
int y = pos.pos.y;
/* Empty the multimedia state to avoid floating-point errors later */
_mm_empty();
/* Add point if in map bounds */
if (x >= 0 && x < map->size_pixels && y >= 0 && y < map->size_pixels)
{
sum += map->pixels[y * map->size_pixels + x];
npoints++;
}
}
}
/* Return sum scaled by number of points, or -1 if none */
return npoints ? (int)(sum * 1024 / npoints) : -1;
}