/* -*- C -*- */ /* * Copyright (c) 2007 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include "fftw-spu.h" #include "../fftw-cell.h" #define BLOCKSZ 64 void X(spu_do_copy)(const struct copy_context *c) { int n = c->n, v = c->v, nspe = c->nspe, my_id = c->my_id; int is_bytes = c->is_bytes, os_bytes = c->os_bytes; int ivs_bytes = c->ivs_bytes, ovs_bytes = c->ovs_bytes; int nblock; int in, iv, nn, nv; R *A, *Aalign; X(spu_alloc_reset)(); A = X(spu_alloc)(BLOCKSZ * BLOCKSZ * 2 * sizeof(R) + ALIGNMENT); nblock = 0; nv = BLOCKSZ; for (iv = 0; iv < v; iv += nv) { if (nv > v - iv) nv = v - iv; if (nv == 1) nn = BLOCKSZ * BLOCKSZ / 8; /* large 1D copy, heuristic */ else nn = BLOCKSZ; /* 2D copy */ for (in = 0; in < n; in += nn) { if (nn > n - in) nn = n - in; if ((nblock++ % nspe) != my_id) continue; /* block is not ours */ Aalign = A; X(spu_dma2d)(A, c->I + in * is_bytes + iv * ivs_bytes, nn, is_bytes, nv, ivs_bytes, MFC_GET_CMD); X(spu_dma2d)(A, c->O + in * os_bytes + iv * ovs_bytes, nn, os_bytes, nv, ovs_bytes, MFC_PUT_CMD); } } }