1#![cfg_attr(not(feature = "std"), no_std)]
83
84extern crate alloc;
85
86#[cfg(feature = "gpu")]
87use alloc::borrow::Cow;
88use alloc::vec;
89use alloc::vec::Vec;
90use core::ops::{Add, Mul, Sub};
91
92use grafos_std::error::FabricError;
93#[cfg(feature = "gpu")]
94use grafos_std::gpu::{GpuBuilder, GpuLease, GpuSession};
95use grafos_std::mem::{MemBuilder, MemLease};
96#[cfg(all(feature = "gpu", feature = "std"))]
97use std::{env, fs, path::PathBuf};
98
99pub type Result<T> = core::result::Result<T, FabricError>;
101
102pub struct FabricTensor {
129 storage: TensorStorage,
130 shape: Shape,
131 data: Vec<f32>,
132}
133
134#[derive(Debug, Clone, Copy, PartialEq, Eq)]
136pub enum Device {
137 Cpu,
139 Gpu(u128),
141}
142
143enum TensorStorage {
144 Cpu(MemLease),
145 #[cfg(feature = "gpu")]
146 Gpu {
147 gpu_lease: GpuLease,
148 staging_lease: MemLease,
149 },
150}
151
152#[derive(Debug, Clone, PartialEq, Eq)]
170pub struct Shape {
171 dims: Vec<usize>,
172 strides: Vec<usize>,
173}
174
175impl Shape {
176 pub fn new(dims: &[usize]) -> Self {
196 let strides = Self::compute_strides(dims);
197 Shape {
198 dims: dims.to_vec(),
199 strides,
200 }
201 }
202
203 fn compute_strides(dims: &[usize]) -> Vec<usize> {
204 if dims.is_empty() {
205 return Vec::new();
206 }
207 let mut strides = vec![1usize; dims.len()];
208 for i in (0..dims.len() - 1).rev() {
209 strides[i] = strides[i + 1] * dims[i + 1];
210 }
211 strides
212 }
213
214 pub fn numel(&self) -> usize {
216 self.dims.iter().product()
217 }
218
219 pub fn ndim(&self) -> usize {
221 self.dims.len()
222 }
223
224 pub fn dims(&self) -> &[usize] {
226 &self.dims
227 }
228
229 pub fn strides(&self) -> &[usize] {
231 &self.strides
232 }
233
234 fn flat_index(&self, indices: &[usize]) -> Option<usize> {
238 if indices.len() != self.dims.len() {
239 return None;
240 }
241 let mut offset = 0;
242 for (i, &idx) in indices.iter().enumerate() {
243 if idx >= self.dims[i] {
244 return None;
245 }
246 offset += idx * self.strides[i];
247 }
248 Some(offset)
249 }
250}
251
252#[cfg(feature = "gpu")]
267fn submit_signal_kernel(
268 gpu_lease: &GpuLease,
269 op_name: &str,
270 binary: &[u8],
271 args: &[u8],
272 arg_sizes: &[u32],
273) -> Result<()> {
274 let mut sess = GpuSession::new(gpu_lease);
275 let module = sess.module_load(binary)?;
276 sess.launch(&module, op_name, [1, 1, 1], [1, 1, 1], args, arg_sizes)?;
277 sess.sync()?;
278 Ok(())
279}
280
281impl FabricTensor {
282 fn cpu_lease(&self) -> &MemLease {
283 match &self.storage {
284 TensorStorage::Cpu(lease) => lease,
285 #[cfg(feature = "gpu")]
286 TensorStorage::Gpu { staging_lease, .. } => staging_lease,
287 }
288 }
289
290 #[cfg(feature = "gpu")]
291 fn gpu_lease(&self) -> Option<&GpuLease> {
292 match &self.storage {
293 TensorStorage::Cpu(_) => None,
294 TensorStorage::Gpu { gpu_lease, .. } => Some(gpu_lease),
295 }
296 }
297
298 pub fn lease(&self) -> &MemLease {
304 self.cpu_lease()
305 }
306
307 pub fn from_mem_lease(shape: &[usize], lease: MemLease) -> Self {
312 let shape = Shape::new(shape);
313 let numel = shape.numel();
314 FabricTensor {
315 storage: TensorStorage::Cpu(lease),
316 shape,
317 data: vec![0.0; numel],
318 }
319 }
320
321 pub fn zeros(shape: &[usize]) -> Result<Self> {
345 let s = Shape::new(shape);
346 let numel = s.numel();
347 let byte_size = numel * core::mem::size_of::<f32>();
348 let lease = MemBuilder::new().min_bytes(byte_size as u64).acquire()?;
353 Ok(FabricTensor {
354 storage: TensorStorage::Cpu(lease),
355 shape: s,
356 data: vec![0.0f32; numel],
357 })
358 }
359
360 pub fn from_slice(shape: &[usize], data: &[f32]) -> Result<Self> {
383 let s = Shape::new(shape);
384 if data.len() != s.numel() {
385 return Err(FabricError::CapacityExceeded);
386 }
387 let byte_size = std::mem::size_of_val(data);
388 let lease = MemBuilder::new().min_bytes(byte_size as u64).acquire()?;
389 Ok(FabricTensor {
390 storage: TensorStorage::Cpu(lease),
391 shape: s,
392 data: data.to_vec(),
393 })
394 }
395
396 pub fn device(&self) -> Device {
398 #[cfg(feature = "gpu")]
399 {
400 if let Some(gpu) = self.gpu_lease() {
401 return Device::Gpu(gpu.lease_id());
402 }
403 }
404 Device::Cpu
405 }
406
407 pub fn is_cpu(&self) -> bool {
409 matches!(self.device(), Device::Cpu)
410 }
411
412 pub fn is_gpu(&self) -> bool {
414 matches!(self.device(), Device::Gpu(_))
415 }
416
417 pub fn to_gpu(&self) -> Result<FabricTensor> {
421 #[cfg(feature = "gpu")]
422 {
423 if self.is_gpu() {
424 return FabricTensor::from_slice(self.shape(), self.as_slice()).and_then(
425 |mut t| {
426 let gpu = GpuBuilder::new()
427 .min_vram((self.numel() * core::mem::size_of::<f32>()) as u64)
428 .acquire()?;
429 let staging = MemBuilder::new()
430 .min_bytes((self.numel() * core::mem::size_of::<f32>()) as u64)
431 .acquire()?;
432 let args = self.encode_f32_le();
433 let arg_sizes = [args.len() as u32];
434 submit_signal_kernel(
435 &gpu,
436 "tensor_upload",
437 b"grafos.tensor.mock",
438 &args,
439 &arg_sizes,
440 )?;
441 t.storage = TensorStorage::Gpu {
442 gpu_lease: gpu,
443 staging_lease: staging,
444 };
445 Ok(t)
446 },
447 );
448 }
449
450 let bytes = (self.numel() * core::mem::size_of::<f32>()) as u64;
451 let gpu_lease = GpuBuilder::new().min_vram(bytes).acquire()?;
452 let staging_lease = MemBuilder::new().min_bytes(bytes).acquire()?;
453 let args = self.encode_f32_le();
454 let arg_sizes = [args.len() as u32];
455 submit_signal_kernel(
456 &gpu_lease,
457 "tensor_upload",
458 b"grafos.tensor.mock",
459 &args,
460 &arg_sizes,
461 )?;
462
463 return Ok(FabricTensor {
464 storage: TensorStorage::Gpu {
465 gpu_lease,
466 staging_lease,
467 },
468 shape: self.shape.clone(),
469 data: self.data.clone(),
470 });
471 }
472
473 #[cfg(not(feature = "gpu"))]
474 {
475 Err(FabricError::Unsupported)
476 }
477 }
478
479 pub fn to_cpu(&self) -> Result<FabricTensor> {
481 if self.is_cpu() {
482 return FabricTensor::from_slice(self.shape(), self.as_slice());
483 }
484
485 #[cfg(feature = "gpu")]
486 {
487 if let Some(gpu) = self.gpu_lease() {
488 let args = self.encode_f32_le();
489 let arg_sizes = [args.len() as u32];
490 submit_signal_kernel(
491 gpu,
492 "tensor_download",
493 b"grafos.tensor.mock",
494 &args,
495 &arg_sizes,
496 )?;
497 }
498 }
499 FabricTensor::from_slice(self.shape(), self.as_slice())
500 }
501
502 pub fn get(&self, indices: &[usize]) -> Result<f32> {
525 let offset = self
526 .shape
527 .flat_index(indices)
528 .ok_or(FabricError::CapacityExceeded)?;
529 Ok(self.data[offset])
530 }
531
532 pub fn set(&mut self, indices: &[usize], value: f32) -> Result<()> {
551 let offset = self
552 .shape
553 .flat_index(indices)
554 .ok_or(FabricError::CapacityExceeded)?;
555 self.data[offset] = value;
556 Ok(())
557 }
558
559 pub fn shape(&self) -> &[usize] {
561 self.shape.dims()
562 }
563
564 pub fn ndim(&self) -> usize {
566 self.shape.ndim()
567 }
568
569 pub fn numel(&self) -> usize {
571 self.shape.numel()
572 }
573
574 pub fn strides(&self) -> &[usize] {
576 self.shape.strides()
577 }
578
579 pub fn as_slice(&self) -> &[f32] {
581 &self.data
582 }
583
584 #[cfg(feature = "gpu")]
585 fn encode_f32_le(&self) -> Vec<u8> {
586 let mut bytes = Vec::with_capacity(self.data.len() * core::mem::size_of::<f32>());
587 for value in &self.data {
588 bytes.extend_from_slice(&value.to_le_bytes());
589 }
590 bytes
591 }
592
593 #[cfg(feature = "gpu")]
594 fn push_u16_le(buf: &mut Vec<u8>, v: u16) {
595 buf.extend_from_slice(&v.to_le_bytes());
596 }
597
598 #[cfg(feature = "gpu")]
599 fn push_u32_le(buf: &mut Vec<u8>, v: u32) {
600 buf.extend_from_slice(&v.to_le_bytes());
601 }
602
603 #[cfg(feature = "gpu")]
604 fn push_u64_le(buf: &mut Vec<u8>, v: u64) {
605 buf.extend_from_slice(&v.to_le_bytes());
606 }
607
608 #[cfg(feature = "gpu")]
609 fn encode_tensor_descriptor(&self, out: &mut Vec<u8>) -> Result<()> {
610 let ndim: u16 = self
611 .ndim()
612 .try_into()
613 .map_err(|_| FabricError::CapacityExceeded)?;
614 Self::push_u16_le(out, ndim);
615
616 let numel: u64 = self
617 .numel()
618 .try_into()
619 .map_err(|_| FabricError::CapacityExceeded)?;
620 Self::push_u64_le(out, numel);
621 Self::push_u64_le(out, 0); for &dim in self.shape() {
624 let dim_u32: u32 = dim.try_into().map_err(|_| FabricError::CapacityExceeded)?;
625 Self::push_u32_le(out, dim_u32);
626 }
627 for &stride in self.strides() {
628 let stride_u32: u32 = stride
629 .try_into()
630 .map_err(|_| FabricError::CapacityExceeded)?;
631 Self::push_u32_le(out, stride_u32);
632 }
633 Ok(())
634 }
635
636 #[cfg(feature = "gpu")]
637 fn kernel_binary_for(op_name: &str) -> Cow<'static, [u8]> {
638 #[cfg(all(feature = "gpu", feature = "std"))]
639 {
640 if let Some(file) = env::var_os("GRAFOS_TENSOR_HSACO") {
641 let path = PathBuf::from(file);
642 if let Ok(binary) = fs::read(path) {
643 if !binary.is_empty() {
644 return Cow::Owned(binary);
645 }
646 }
647 }
648
649 if let Some(dir) = env::var_os("GRAFOS_TENSOR_KERNEL_DIR") {
650 let base = PathBuf::from(dir);
651 let candidates = [
652 base.join(format!("{op_name}.hsaco")),
653 base.join("tensor_ops_gfx1032.hsaco"),
654 base.join("tensor_ops.hsaco"),
655 ];
656 for candidate in candidates {
657 if let Ok(binary) = fs::read(&candidate) {
658 if !binary.is_empty() {
659 return Cow::Owned(binary);
660 }
661 }
662 }
663 }
664 }
665 Cow::Borrowed(b"grafos.tensor.mock")
666 }
667
668 #[cfg(feature = "gpu")]
669 fn pack_gpu_unary_args(&self, op_name: &str) -> Result<Vec<u8>> {
670 let mut args = Vec::new();
671 args.extend_from_slice(b"GTA0");
672 args.push(1); args.push(1); Self::push_u16_le(&mut args, op_name.len() as u16);
675 args.extend_from_slice(op_name.as_bytes());
676 self.encode_tensor_descriptor(&mut args)?;
677 let data = self.encode_f32_le();
678 Self::push_u64_le(&mut args, data.len() as u64);
679 args.extend_from_slice(&data);
680 Ok(args)
681 }
682
683 #[cfg(feature = "gpu")]
684 fn pack_gpu_binary_args(&self, other: &FabricTensor, op_name: &str) -> Result<Vec<u8>> {
685 let mut args = Vec::new();
686 args.extend_from_slice(b"GTA0");
687 args.push(1); args.push(2); Self::push_u16_le(&mut args, op_name.len() as u16);
690 args.extend_from_slice(op_name.as_bytes());
691
692 self.encode_tensor_descriptor(&mut args)?;
693 let lhs = self.encode_f32_le();
694 Self::push_u64_le(&mut args, lhs.len() as u64);
695 args.extend_from_slice(&lhs);
696
697 other.encode_tensor_descriptor(&mut args)?;
698 let rhs = other.encode_f32_le();
699 Self::push_u64_le(&mut args, rhs.len() as u64);
700 args.extend_from_slice(&rhs);
701 Ok(args)
702 }
703
704 #[cfg(feature = "gpu")]
705 fn dispatch_gpu_unary(&self, op_name: &str) -> Result<()> {
706 let Some(gpu) = self.gpu_lease() else {
707 return Ok(());
708 };
709 let args = self.pack_gpu_unary_args(op_name)?;
710 let binary = Self::kernel_binary_for(op_name);
711 let arg_sizes = [args.len() as u32];
712 submit_signal_kernel(gpu, op_name, binary.as_ref(), &args, &arg_sizes)?;
713 Ok(())
714 }
715
716 #[cfg(feature = "gpu")]
717 fn dispatch_gpu_binary(&self, other: &FabricTensor, op_name: &str) -> Result<()> {
718 let Some(gpu) = self.gpu_lease() else {
719 return Ok(());
720 };
721 let args = self.pack_gpu_binary_args(other, op_name)?;
722 let binary = Self::kernel_binary_for(op_name);
723 let arg_sizes = [args.len() as u32];
724 submit_signal_kernel(gpu, op_name, binary.as_ref(), &args, &arg_sizes)?;
725 Ok(())
726 }
727
728 fn cpu_matmul(&self, other: &FabricTensor) -> Result<FabricTensor> {
729 if self.ndim() != 2 || other.ndim() != 2 {
730 return Err(FabricError::CapacityExceeded);
731 }
732 let m = self.shape.dims[0];
733 let k = self.shape.dims[1];
734 let k2 = other.shape.dims[0];
735 let n = other.shape.dims[1];
736 if k != k2 {
737 return Err(FabricError::CapacityExceeded);
738 }
739 let mut result_data = vec![0.0f32; m * n];
740 for i in 0..m {
741 for j in 0..n {
742 let mut sum = 0.0f32;
743 for p in 0..k {
744 sum += self.data[i * k + p] * other.data[p * n + j];
745 }
746 result_data[i * n + j] = sum;
747 }
748 }
749 FabricTensor::from_slice(&[m, n], &result_data)
750 }
751
752 fn cpu_add(&self, other: &FabricTensor) -> Result<FabricTensor> {
753 if self.shape.dims != other.shape.dims {
754 return Err(FabricError::CapacityExceeded);
755 }
756 let data: Vec<f32> = self
757 .data
758 .iter()
759 .zip(other.data.iter())
760 .map(|(a, b)| a + b)
761 .collect();
762 FabricTensor::from_slice(self.shape.dims(), &data)
763 }
764
765 fn cpu_mul(&self, other: &FabricTensor) -> Result<FabricTensor> {
766 if self.shape.dims != other.shape.dims {
767 return Err(FabricError::CapacityExceeded);
768 }
769 let data: Vec<f32> = self
770 .data
771 .iter()
772 .zip(other.data.iter())
773 .map(|(a, b)| a * b)
774 .collect();
775 FabricTensor::from_slice(self.shape.dims(), &data)
776 }
777
778 pub fn matmul(&self, other: &FabricTensor) -> Result<FabricTensor> {
805 #[cfg(feature = "gpu")]
806 if self.is_gpu() && other.is_gpu() {
807 self.dispatch_gpu_binary(other, "tensor_matmul")?;
808 let cpu_result = self.cpu_matmul(other)?;
809 return cpu_result.to_gpu();
810 }
811 self.cpu_matmul(other)
812 }
813
814 pub fn add(&self, other: &FabricTensor) -> Result<FabricTensor> {
838 #[cfg(feature = "gpu")]
839 if self.is_gpu() && other.is_gpu() {
840 self.dispatch_gpu_binary(other, "tensor_add")?;
841 let cpu_result = self.cpu_add(other)?;
842 return cpu_result.to_gpu();
843 }
844 self.cpu_add(other)
845 }
846
847 pub fn mul(&self, other: &FabricTensor) -> Result<FabricTensor> {
871 #[cfg(feature = "gpu")]
872 if self.is_gpu() && other.is_gpu() {
873 self.dispatch_gpu_binary(other, "tensor_mul")?;
874 let cpu_result = self.cpu_mul(other)?;
875 return cpu_result.to_gpu();
876 }
877 self.cpu_mul(other)
878 }
879
880 pub fn scale(&self, scalar: f32) -> Result<FabricTensor> {
898 #[cfg(feature = "gpu")]
899 if self.is_gpu() {
900 self.dispatch_gpu_unary("tensor_scale")?;
901 let data: Vec<f32> = self.data.iter().map(|&x| x * scalar).collect();
902 let cpu_result = FabricTensor::from_slice(self.shape.dims(), &data)?;
903 return cpu_result.to_gpu();
904 }
905 let data: Vec<f32> = self.data.iter().map(|&x| x * scalar).collect();
906 FabricTensor::from_slice(self.shape.dims(), &data)
907 }
908
909 pub fn relu(&self) -> Result<FabricTensor> {
926 #[cfg(feature = "gpu")]
927 if self.is_gpu() {
928 self.dispatch_gpu_unary("tensor_relu")?;
929 let data: Vec<f32> = self
930 .data
931 .iter()
932 .map(|&x| if x > 0.0 { x } else { 0.0 })
933 .collect();
934 let cpu_result = FabricTensor::from_slice(self.shape.dims(), &data)?;
935 return cpu_result.to_gpu();
936 }
937 let data: Vec<f32> = self
938 .data
939 .iter()
940 .map(|&x| if x > 0.0 { x } else { 0.0 })
941 .collect();
942 FabricTensor::from_slice(self.shape.dims(), &data)
943 }
944
945 pub fn softmax(&self, axis: usize) -> Result<FabricTensor> {
970 #[cfg(feature = "gpu")]
971 if self.is_gpu() {
972 self.dispatch_gpu_unary("tensor_softmax")?;
973 let cpu_result = self.softmax_cpu(axis)?;
974 return cpu_result.to_gpu();
975 }
976 self.softmax_cpu(axis)
977 }
978
979 pub fn transpose(&self) -> Result<FabricTensor> {
1003 #[cfg(feature = "gpu")]
1004 if self.is_gpu() {
1005 self.dispatch_gpu_unary("tensor_transpose")?;
1006 let cpu_result = self.transpose_cpu()?;
1007 return cpu_result.to_gpu();
1008 }
1009 self.transpose_cpu()
1010 }
1011
1012 pub fn reshape(&self, new_shape: &[usize]) -> Result<FabricTensor> {
1036 #[cfg(feature = "gpu")]
1037 if self.is_gpu() {
1038 self.dispatch_gpu_unary("tensor_reshape")?;
1039 let cpu_result = self.reshape_cpu(new_shape)?;
1040 return cpu_result.to_gpu();
1041 }
1042 self.reshape_cpu(new_shape)
1043 }
1044
1045 fn softmax_cpu(&self, axis: usize) -> Result<FabricTensor> {
1046 if axis >= self.ndim() {
1047 return Err(FabricError::CapacityExceeded);
1048 }
1049 let dims = &self.shape.dims;
1050 let strides = &self.shape.strides;
1051 let mut result = self.data.clone();
1052
1053 let axis_len = dims[axis];
1054 let axis_stride = strides[axis];
1055 let outer_size: usize = dims[..axis].iter().product();
1056 let inner_size: usize = dims[axis + 1..].iter().product();
1057
1058 for outer in 0..outer_size {
1059 for inner in 0..inner_size {
1060 let base = outer * (axis_len * inner_size) + inner;
1061 let mut max_val = f32::NEG_INFINITY;
1062 for a in 0..axis_len {
1063 let idx = base + a * axis_stride;
1064 if self.data[idx] > max_val {
1065 max_val = self.data[idx];
1066 }
1067 }
1068
1069 let mut sum = 0.0f32;
1070 for a in 0..axis_len {
1071 let idx = base + a * axis_stride;
1072 let exp_val = (self.data[idx] - max_val).exp();
1073 result[idx] = exp_val;
1074 sum += exp_val;
1075 }
1076
1077 for a in 0..axis_len {
1078 let idx = base + a * axis_stride;
1079 result[idx] /= sum;
1080 }
1081 }
1082 }
1083
1084 FabricTensor::from_slice(dims, &result)
1085 }
1086
1087 fn transpose_cpu(&self) -> Result<FabricTensor> {
1088 if self.ndim() < 2 {
1089 return Err(FabricError::CapacityExceeded);
1090 }
1091 let dims = &self.shape.dims;
1092 let ndim = dims.len();
1093
1094 let mut new_dims = dims.to_vec();
1095 new_dims.swap(ndim - 2, ndim - 1);
1096
1097 let rows = dims[ndim - 2];
1098 let cols = dims[ndim - 1];
1099 let batch_size: usize = dims[..ndim - 2].iter().product();
1100 let matrix_size = rows * cols;
1101
1102 let mut result = vec![0.0f32; self.data.len()];
1103 for b in 0..batch_size {
1104 let src_base = b * matrix_size;
1105 let dst_base = b * matrix_size;
1106 for r in 0..rows {
1107 for c in 0..cols {
1108 result[dst_base + c * rows + r] = self.data[src_base + r * cols + c];
1109 }
1110 }
1111 }
1112
1113 FabricTensor::from_slice(&new_dims, &result)
1114 }
1115
1116 fn reshape_cpu(&self, new_shape: &[usize]) -> Result<FabricTensor> {
1117 let new_numel: usize = new_shape.iter().product();
1118 if new_numel != self.numel() {
1119 return Err(FabricError::CapacityExceeded);
1120 }
1121 FabricTensor::from_slice(new_shape, &self.data)
1122 }
1123
1124 pub fn subtract(&self, other: &FabricTensor) -> Result<FabricTensor> {
1141 if self.shape() != other.shape() {
1142 return Err(FabricError::CapacityExceeded);
1143 }
1144 #[cfg(feature = "gpu")]
1145 if self.is_gpu() {
1146 let neg = other.scale(-1.0)?;
1147 return self.add(&neg);
1148 }
1149 let data: Vec<f32> = self
1150 .data
1151 .iter()
1152 .zip(other.data.iter())
1153 .map(|(&a, &b)| a - b)
1154 .collect();
1155 FabricTensor::from_slice(self.shape(), &data)
1156 }
1157
1158 pub fn sum_axis(&self, axis: usize) -> Result<FabricTensor> {
1183 if axis >= self.ndim() {
1184 return Err(FabricError::CapacityExceeded);
1185 }
1186 let dims = self.shape();
1187 let axis_len = dims[axis];
1188 let mut new_dims: Vec<usize> = dims
1189 .iter()
1190 .enumerate()
1191 .filter(|&(i, _)| i != axis)
1192 .map(|(_, &d)| d)
1193 .collect();
1194 if new_dims.is_empty() {
1195 new_dims.push(1);
1196 }
1197 let new_numel: usize = new_dims.iter().product();
1198 let mut result = vec![0.0f32; new_numel];
1199
1200 let outer_size: usize = dims[..axis].iter().product();
1201 let inner_size: usize = dims[axis + 1..].iter().product();
1202
1203 for outer in 0..outer_size {
1204 for inner in 0..inner_size {
1205 let mut sum = 0.0f32;
1206 for a in 0..axis_len {
1207 let src_idx = outer * (axis_len * inner_size) + a * inner_size + inner;
1208 sum += self.data[src_idx];
1209 }
1210 let dst_idx = outer * inner_size + inner;
1211 result[dst_idx] = sum;
1212 }
1213 }
1214
1215 FabricTensor::from_slice(&new_dims, &result)
1216 }
1217
1218 pub fn sigmoid(&self) -> Result<FabricTensor> {
1234 #[cfg(feature = "gpu")]
1235 if self.is_gpu() {
1236 self.dispatch_gpu_unary("tensor_sigmoid")?;
1237 let cpu_result = self.sigmoid_cpu()?;
1238 return cpu_result.to_gpu();
1239 }
1240 self.sigmoid_cpu()
1241 }
1242
1243 fn sigmoid_cpu(&self) -> Result<FabricTensor> {
1244 let data: Vec<f32> = self
1245 .data
1246 .iter()
1247 .map(|&x| 1.0 / (1.0 + (-x).exp()))
1248 .collect();
1249 FabricTensor::from_slice(self.shape(), &data)
1250 }
1251
1252 pub fn ln(&self) -> Result<FabricTensor> {
1267 #[cfg(feature = "gpu")]
1268 if self.is_gpu() {
1269 self.dispatch_gpu_unary("tensor_ln")?;
1270 let cpu_result = self.ln_cpu()?;
1271 return cpu_result.to_gpu();
1272 }
1273 self.ln_cpu()
1274 }
1275
1276 fn ln_cpu(&self) -> Result<FabricTensor> {
1277 let data: Vec<f32> = self.data.iter().map(|&x| x.ln()).collect();
1278 FabricTensor::from_slice(self.shape(), &data)
1279 }
1280
1281 pub fn clip(&self, min: f32, max: f32) -> Result<FabricTensor> {
1295 let data: Vec<f32> = self.data.iter().map(|&x| x.clamp(min, max)).collect();
1296 FabricTensor::from_slice(self.shape(), &data)
1297 }
1298
1299 pub fn fft(&self) -> Result<FabricTensor> {
1308 if self.ndim() != 1 {
1309 return Err(FabricError::CapacityExceeded);
1310 }
1311 let n = self.shape.dims[0];
1312 if n == 0 || !n.is_power_of_two() {
1313 return Err(FabricError::CapacityExceeded);
1314 }
1315
1316 #[cfg(feature = "gpu")]
1317 if self.is_gpu() {
1318 self.dispatch_gpu_unary("tensor_fft")?;
1319 let cpu_result = self.fft_cpu()?;
1320 return cpu_result.to_gpu();
1321 }
1322
1323 self.fft_cpu()
1324 }
1325
1326 pub fn ifft(&self) -> Result<FabricTensor> {
1335 if self.ndim() != 1 {
1336 return Err(FabricError::CapacityExceeded);
1337 }
1338 let len = self.shape.dims[0];
1339 if len == 0 || !len.is_multiple_of(2) {
1340 return Err(FabricError::CapacityExceeded);
1341 }
1342 let n = len / 2;
1343 if !n.is_power_of_two() {
1344 return Err(FabricError::CapacityExceeded);
1345 }
1346
1347 #[cfg(feature = "gpu")]
1348 if self.is_gpu() {
1349 self.dispatch_gpu_unary("tensor_ifft")?;
1350 let cpu_result = self.ifft_cpu()?;
1351 return cpu_result.to_gpu();
1352 }
1353
1354 self.ifft_cpu()
1355 }
1356
1357 fn fft_cpu(&self) -> Result<FabricTensor> {
1358 let n = self.shape.dims[0];
1359 let mut complex = vec![(0.0f32, 0.0f32); n];
1361 for (i, &val) in self.data.iter().enumerate() {
1362 complex[i] = (val, 0.0);
1363 }
1364
1365 let bits = n.trailing_zeros();
1367 for i in 0..n {
1368 let j = i.reverse_bits() >> (usize::BITS - bits);
1369 if i < j {
1370 complex.swap(i, j);
1371 }
1372 }
1373
1374 let mut len = 2;
1376 while len <= n {
1377 let half = len / 2;
1378 let angle = -2.0 * core::f32::consts::PI / len as f32;
1379 for start in (0..n).step_by(len) {
1380 let mut w_re = 1.0f32;
1381 let mut w_im = 0.0f32;
1382 let step_re = angle.cos();
1383 let step_im = angle.sin();
1384 for k in 0..half {
1385 let (e_re, e_im) = complex[start + k];
1386 let (o_re, o_im) = complex[start + k + half];
1387 let tw_re = o_re * w_re - o_im * w_im;
1388 let tw_im = o_re * w_im + o_im * w_re;
1389 complex[start + k] = (e_re + tw_re, e_im + tw_im);
1390 complex[start + k + half] = (e_re - tw_re, e_im - tw_im);
1391 let new_w_re = w_re * step_re - w_im * step_im;
1392 let new_w_im = w_re * step_im + w_im * step_re;
1393 w_re = new_w_re;
1394 w_im = new_w_im;
1395 }
1396 }
1397 len *= 2;
1398 }
1399
1400 let mut out = Vec::with_capacity(n * 2);
1402 for &(re, im) in &complex {
1403 out.push(re);
1404 out.push(im);
1405 }
1406 FabricTensor::from_slice(&[n * 2], &out)
1407 }
1408
1409 fn ifft_cpu(&self) -> Result<FabricTensor> {
1410 let len = self.shape.dims[0];
1411 let n = len / 2;
1412
1413 let mut complex: Vec<(f32, f32)> = (0..n)
1415 .map(|i| (self.data[i * 2], self.data[i * 2 + 1]))
1416 .collect();
1417
1418 let bits = n.trailing_zeros();
1420 for i in 0..n {
1421 let j = i.reverse_bits() >> (usize::BITS - bits);
1422 if i < j {
1423 complex.swap(i, j);
1424 }
1425 }
1426
1427 let mut blen = 2;
1429 while blen <= n {
1430 let half = blen / 2;
1431 let angle = 2.0 * core::f32::consts::PI / blen as f32;
1432 for start in (0..n).step_by(blen) {
1433 let mut w_re = 1.0f32;
1434 let mut w_im = 0.0f32;
1435 let step_re = angle.cos();
1436 let step_im = angle.sin();
1437 for k in 0..half {
1438 let (e_re, e_im) = complex[start + k];
1439 let (o_re, o_im) = complex[start + k + half];
1440 let tw_re = o_re * w_re - o_im * w_im;
1441 let tw_im = o_re * w_im + o_im * w_re;
1442 complex[start + k] = (e_re + tw_re, e_im + tw_im);
1443 complex[start + k + half] = (e_re - tw_re, e_im - tw_im);
1444 let new_w_re = w_re * step_re - w_im * step_im;
1445 let new_w_im = w_re * step_im + w_im * step_re;
1446 w_re = new_w_re;
1447 w_im = new_w_im;
1448 }
1449 }
1450 blen *= 2;
1451 }
1452
1453 let scale = 1.0 / n as f32;
1455 let out: Vec<f32> = complex.iter().map(|&(re, _)| re * scale).collect();
1456 FabricTensor::from_slice(&[n], &out)
1457 }
1458}
1459
1460impl<'a> Add for &'a FabricTensor {
1466 type Output = Result<FabricTensor>;
1467
1468 fn add(self, rhs: &'a FabricTensor) -> Self::Output {
1469 self.add(rhs)
1470 }
1471}
1472
1473impl<'a> Mul for &'a FabricTensor {
1479 type Output = Result<FabricTensor>;
1480
1481 fn mul(self, rhs: &'a FabricTensor) -> Self::Output {
1482 FabricTensor::mul(self, rhs)
1483 }
1484}
1485
1486impl Mul<f32> for &FabricTensor {
1492 type Output = Result<FabricTensor>;
1493
1494 fn mul(self, rhs: f32) -> Self::Output {
1495 self.scale(rhs)
1496 }
1497}
1498
1499impl<'a> Sub for &'a FabricTensor {
1505 type Output = Result<FabricTensor>;
1506
1507 fn sub(self, rhs: &'a FabricTensor) -> Self::Output {
1508 self.subtract(rhs)
1509 }
1510}
1511
1512#[cfg(test)]
1513mod tests {
1514 use super::*;
1515 use grafos_std::host;
1516
1517 fn setup() {
1518 host::reset_mock();
1519 host::mock_set_fbmu_arena_size(1 << 20); }
1521
1522 #[test]
1525 fn zeros_creates_correct_shape() {
1526 setup();
1527 let t = FabricTensor::zeros(&[3, 4]).unwrap();
1528 assert_eq!(t.shape(), &[3, 4]);
1529 assert_eq!(t.ndim(), 2);
1530 assert_eq!(t.numel(), 12);
1531 assert_eq!(t.strides(), &[4, 1]);
1532 for i in 0..3 {
1533 for j in 0..4 {
1534 assert_eq!(t.get(&[i, j]).unwrap(), 0.0);
1535 }
1536 }
1537 }
1538
1539 #[test]
1540 fn from_slice_roundtrip() {
1541 setup();
1542 let data: Vec<f32> = (1..=6).map(|x| x as f32).collect();
1543 let t = FabricTensor::from_slice(&[2, 3], &data).unwrap();
1544 assert_eq!(t.shape(), &[2, 3]);
1545 assert_eq!(t.get(&[0, 0]).unwrap(), 1.0);
1546 assert_eq!(t.get(&[0, 2]).unwrap(), 3.0);
1547 assert_eq!(t.get(&[1, 0]).unwrap(), 4.0);
1548 assert_eq!(t.get(&[1, 2]).unwrap(), 6.0);
1549 }
1550
1551 #[test]
1552 fn from_slice_wrong_size() {
1553 setup();
1554 let result = FabricTensor::from_slice(&[2, 3], &[1.0, 2.0]);
1555 assert!(result.is_err());
1556 }
1557
1558 #[test]
1559 fn get_out_of_bounds() {
1560 setup();
1561 let t = FabricTensor::zeros(&[2, 3]).unwrap();
1562 assert!(t.get(&[2, 0]).is_err());
1563 assert!(t.get(&[0, 3]).is_err());
1564 assert!(t.get(&[0]).is_err()); }
1566
1567 #[test]
1568 fn set_element() {
1569 setup();
1570 let mut t = FabricTensor::zeros(&[2, 2]).unwrap();
1571 t.set(&[1, 0], 42.0).unwrap();
1572 assert_eq!(t.get(&[1, 0]).unwrap(), 42.0);
1573 assert_eq!(t.get(&[0, 0]).unwrap(), 0.0);
1574 }
1575
1576 #[test]
1579 fn matmul_2x3_times_3x2() {
1580 setup();
1581 let a = FabricTensor::from_slice(&[2, 3], &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1583 let b = FabricTensor::from_slice(&[3, 2], &[7.0, 8.0, 9.0, 10.0, 11.0, 12.0]).unwrap();
1585 let c = a.matmul(&b).unwrap();
1586 assert_eq!(c.shape(), &[2, 2]);
1587 assert_eq!(c.get(&[0, 0]).unwrap(), 58.0);
1589 assert_eq!(c.get(&[0, 1]).unwrap(), 64.0);
1591 assert_eq!(c.get(&[1, 0]).unwrap(), 139.0);
1593 assert_eq!(c.get(&[1, 1]).unwrap(), 154.0);
1595 }
1596
1597 #[test]
1598 fn matmul_incompatible_dims() {
1599 setup();
1600 let a = FabricTensor::zeros(&[2, 3]).unwrap();
1601 let b = FabricTensor::zeros(&[2, 3]).unwrap();
1602 assert!(a.matmul(&b).is_err());
1603 }
1604
1605 #[test]
1606 fn matmul_not_2d() {
1607 setup();
1608 let a = FabricTensor::zeros(&[2, 3, 4]).unwrap();
1609 let b = FabricTensor::zeros(&[4, 2]).unwrap();
1610 assert!(a.matmul(&b).is_err());
1611 }
1612
1613 #[test]
1616 fn add_elementwise() {
1617 setup();
1618 let a = FabricTensor::from_slice(&[2, 2], &[1.0, 2.0, 3.0, 4.0]).unwrap();
1619 let b = FabricTensor::from_slice(&[2, 2], &[10.0, 20.0, 30.0, 40.0]).unwrap();
1620 let c = a.add(&b).unwrap();
1621 assert_eq!(c.as_slice(), &[11.0, 22.0, 33.0, 44.0]);
1622 }
1623
1624 #[test]
1625 fn add_shape_mismatch() {
1626 setup();
1627 let a = FabricTensor::zeros(&[2, 3]).unwrap();
1628 let b = FabricTensor::zeros(&[3, 2]).unwrap();
1629 assert!(a.add(&b).is_err());
1630 }
1631
1632 #[test]
1635 fn mul_elementwise() {
1636 setup();
1637 let a = FabricTensor::from_slice(&[3], &[2.0, 3.0, 4.0]).unwrap();
1638 let b = FabricTensor::from_slice(&[3], &[5.0, 6.0, 7.0]).unwrap();
1639 let c = FabricTensor::mul(&a, &b).unwrap();
1640 assert_eq!(c.as_slice(), &[10.0, 18.0, 28.0]);
1641 }
1642
1643 #[test]
1646 fn scale_scalar() {
1647 setup();
1648 let a = FabricTensor::from_slice(&[2, 2], &[1.0, 2.0, 3.0, 4.0]).unwrap();
1649 let b = a.scale(3.0).unwrap();
1650 assert_eq!(b.as_slice(), &[3.0, 6.0, 9.0, 12.0]);
1651 }
1652
1653 #[test]
1656 fn relu_clamps_negatives() {
1657 setup();
1658 let a = FabricTensor::from_slice(&[4], &[-2.0, -0.5, 0.0, 3.0]).unwrap();
1659 let b = a.relu().unwrap();
1660 assert_eq!(b.as_slice(), &[0.0, 0.0, 0.0, 3.0]);
1661 }
1662
1663 #[test]
1666 fn softmax_sums_to_one() {
1667 setup();
1668 let a = FabricTensor::from_slice(&[1, 4], &[1.0, 2.0, 3.0, 4.0]).unwrap();
1669 let b = a.softmax(1).unwrap();
1670 let sum: f32 = b.as_slice().iter().sum();
1671 assert!((sum - 1.0).abs() < 1e-6, "softmax sum = {sum}");
1672 let s = b.as_slice();
1674 assert!(s[0] < s[1]);
1675 assert!(s[1] < s[2]);
1676 assert!(s[2] < s[3]);
1677 }
1678
1679 #[test]
1680 fn softmax_2d_along_axis0() {
1681 setup();
1682 let a = FabricTensor::from_slice(&[2, 3], &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1684 let b = a.softmax(0).unwrap();
1685 let s = b.as_slice();
1686 for col in 0..3 {
1688 let col_sum = s[col] + s[3 + col];
1689 assert!((col_sum - 1.0).abs() < 1e-6, "column {col} sum = {col_sum}");
1690 }
1691 }
1692
1693 #[test]
1694 fn softmax_invalid_axis() {
1695 setup();
1696 let a = FabricTensor::zeros(&[2, 3]).unwrap();
1697 assert!(a.softmax(2).is_err());
1698 }
1699
1700 #[test]
1703 fn transpose_2d() {
1704 setup();
1705 let a = FabricTensor::from_slice(&[2, 3], &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1707 let b = a.transpose().unwrap();
1708 assert_eq!(b.shape(), &[3, 2]);
1709 assert_eq!(b.get(&[0, 0]).unwrap(), 1.0);
1710 assert_eq!(b.get(&[0, 1]).unwrap(), 4.0);
1711 assert_eq!(b.get(&[1, 0]).unwrap(), 2.0);
1712 assert_eq!(b.get(&[1, 1]).unwrap(), 5.0);
1713 assert_eq!(b.get(&[2, 0]).unwrap(), 3.0);
1714 assert_eq!(b.get(&[2, 1]).unwrap(), 6.0);
1715 }
1716
1717 #[test]
1718 fn transpose_1d_fails() {
1719 setup();
1720 let a = FabricTensor::zeros(&[5]).unwrap();
1721 assert!(a.transpose().is_err());
1722 }
1723
1724 #[test]
1727 fn reshape_preserves_data() {
1728 setup();
1729 let a = FabricTensor::from_slice(&[2, 3], &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1730 let b = a.reshape(&[3, 2]).unwrap();
1731 assert_eq!(b.shape(), &[3, 2]);
1732 assert_eq!(b.as_slice(), a.as_slice());
1733 }
1734
1735 #[test]
1736 fn reshape_wrong_numel() {
1737 setup();
1738 let a = FabricTensor::zeros(&[2, 3]).unwrap();
1739 assert!(a.reshape(&[2, 2]).is_err());
1740 }
1741
1742 #[test]
1745 fn op_add() {
1746 setup();
1747 let a = FabricTensor::from_slice(&[3], &[1.0, 2.0, 3.0]).unwrap();
1748 let b = FabricTensor::from_slice(&[3], &[4.0, 5.0, 6.0]).unwrap();
1749 let c = (&a + &b).unwrap();
1750 assert_eq!(c.as_slice(), &[5.0, 7.0, 9.0]);
1751 }
1752
1753 #[test]
1754 fn op_mul_elementwise() {
1755 setup();
1756 let a = FabricTensor::from_slice(&[3], &[2.0, 3.0, 4.0]).unwrap();
1757 let b = FabricTensor::from_slice(&[3], &[5.0, 6.0, 7.0]).unwrap();
1758 let c = (&a * &b).unwrap();
1759 assert_eq!(c.as_slice(), &[10.0, 18.0, 28.0]);
1760 }
1761
1762 #[test]
1763 fn op_mul_scalar() {
1764 setup();
1765 let a = FabricTensor::from_slice(&[2], &[3.0, 4.0]).unwrap();
1766 let b = (&a * 2.0).unwrap();
1767 assert_eq!(b.as_slice(), &[6.0, 8.0]);
1768 }
1769
1770 #[test]
1773 fn tensor_3d_access() {
1774 setup();
1775 let data: Vec<f32> = (0..24).map(|x| x as f32).collect();
1777 let t = FabricTensor::from_slice(&[2, 3, 4], &data).unwrap();
1778 assert_eq!(t.ndim(), 3);
1779 assert_eq!(t.numel(), 24);
1780 assert_eq!(t.strides(), &[12, 4, 1]);
1781 assert_eq!(t.get(&[1, 2, 3]).unwrap(), 23.0);
1783 }
1784
1785 #[test]
1788 fn scalar_tensor() {
1789 setup();
1790 let t = FabricTensor::from_slice(&[], &[42.0]).unwrap();
1791 assert_eq!(t.ndim(), 0);
1792 assert_eq!(t.numel(), 1);
1793 assert_eq!(t.get(&[]).unwrap(), 42.0);
1794 }
1795
1796 #[test]
1797 fn from_mem_lease_wraps_existing() {
1798 setup();
1799 let lease = MemBuilder::new().min_bytes(16).acquire().unwrap();
1800 let t = FabricTensor::from_mem_lease(&[2, 2], lease);
1801 assert_eq!(t.shape(), &[2, 2]);
1802 assert_eq!(t.numel(), 4);
1803 assert!(t.is_cpu());
1804 }
1805
1806 #[test]
1809 fn transpose_3d_swaps_last_two() {
1810 setup();
1811 let data: Vec<f32> = (0..24).map(|x| x as f32).collect();
1813 let t = FabricTensor::from_slice(&[2, 3, 4], &data).unwrap();
1814 let t2 = t.transpose().unwrap();
1815 assert_eq!(t2.shape(), &[2, 4, 3]);
1816 assert_eq!(t2.get(&[0, 2, 1]).unwrap(), 6.0);
1818 assert_eq!(t2.get(&[1, 3, 0]).unwrap(), 15.0);
1820 }
1821
1822 #[test]
1823 fn default_device_is_cpu() {
1824 setup();
1825 let t = FabricTensor::zeros(&[2, 2]).unwrap();
1826 assert_eq!(t.device(), Device::Cpu);
1827 assert!(t.is_cpu());
1828 assert!(!t.is_gpu());
1829 }
1830
1831 #[test]
1832 #[cfg(not(feature = "gpu"))]
1833 fn to_gpu_without_feature_is_unsupported() {
1834 setup();
1835 let t = FabricTensor::zeros(&[2, 2]).unwrap();
1836 assert!(matches!(t.to_gpu(), Err(FabricError::Unsupported)));
1837 }
1838
1839 #[test]
1840 #[cfg(feature = "gpu")]
1841 fn to_gpu_to_cpu_roundtrip_preserves_data() {
1842 setup();
1843 let t = FabricTensor::from_slice(&[2, 2], &[1.0, 2.0, 3.0, 4.0]).unwrap();
1844 let gpu_t = t.to_gpu().unwrap();
1845 assert!(gpu_t.is_gpu());
1846 assert!(!gpu_t.is_cpu());
1847
1848 let cpu_t = gpu_t.to_cpu().unwrap();
1849 assert!(cpu_t.is_cpu());
1850 assert_eq!(cpu_t.shape(), &[2, 2]);
1851 assert_eq!(cpu_t.as_slice(), &[1.0, 2.0, 3.0, 4.0]);
1852 }
1853
1854 #[test]
1855 #[cfg(feature = "gpu")]
1856 fn gpu_matmul_dispatches_when_both_operands_are_gpu() {
1857 setup();
1858 let a = FabricTensor::from_slice(&[2, 2], &[1.0, 2.0, 3.0, 4.0])
1859 .unwrap()
1860 .to_gpu()
1861 .unwrap();
1862 let b = FabricTensor::from_slice(&[2, 2], &[5.0, 6.0, 7.0, 8.0])
1863 .unwrap()
1864 .to_gpu()
1865 .unwrap();
1866 host::test_mock::_set_gpu_session_error(Some(-1));
1867 assert!(matches!(a.matmul(&b), Err(FabricError::Disconnected)));
1868 host::test_mock::_set_gpu_session_error(None);
1869 }
1870
1871 #[test]
1872 #[cfg(feature = "gpu")]
1873 fn gpu_binary_ops_return_gpu_placed_result() {
1874 setup();
1875 let a = FabricTensor::from_slice(&[2, 2], &[1.0, 2.0, 3.0, 4.0])
1876 .unwrap()
1877 .to_gpu()
1878 .unwrap();
1879 let b = FabricTensor::from_slice(&[2, 2], &[5.0, 6.0, 7.0, 8.0])
1880 .unwrap()
1881 .to_gpu()
1882 .unwrap();
1883 let result = a.add(&b).unwrap();
1884 assert!(result.is_gpu());
1885 assert_eq!(result.shape(), &[2, 2]);
1886 assert_eq!(result.as_slice(), &[6.0, 8.0, 10.0, 12.0]);
1887 }
1888
1889 #[test]
1890 #[cfg(feature = "gpu")]
1891 fn gpu_unary_ops_return_gpu_placed_result() {
1892 setup();
1893 let a = FabricTensor::from_slice(&[4], &[-2.0, -0.5, 0.0, 3.0])
1894 .unwrap()
1895 .to_gpu()
1896 .unwrap();
1897 let result = a.relu().unwrap();
1898 assert!(result.is_gpu());
1899 assert_eq!(result.shape(), &[4]);
1900 assert_eq!(result.as_slice(), &[0.0, 0.0, 0.0, 3.0]);
1901 }
1902
1903 #[test]
1906 fn fft_ifft_roundtrip() {
1907 setup();
1908 let input = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
1909 let t = FabricTensor::from_slice(&[8], &input).unwrap();
1910 let freq = t.fft().unwrap();
1911 assert_eq!(freq.shape(), &[16]); let recovered = freq.ifft().unwrap();
1913 assert_eq!(recovered.shape(), &[8]);
1914 for (i, &val) in recovered.as_slice().iter().enumerate() {
1915 assert!(
1916 (val - input[i]).abs() < 1e-4,
1917 "sample {i}: expected {}, got {val}",
1918 input[i]
1919 );
1920 }
1921 }
1922
1923 #[test]
1924 fn fft_not_1d_fails() {
1925 setup();
1926 let t = FabricTensor::zeros(&[2, 4]).unwrap();
1927 assert!(t.fft().is_err());
1928 }
1929
1930 #[test]
1931 fn fft_non_power_of_two_fails() {
1932 setup();
1933 let t = FabricTensor::from_slice(&[3], &[1.0, 2.0, 3.0]).unwrap();
1934 assert!(t.fft().is_err());
1935 }
1936
1937 #[test]
1938 #[cfg(feature = "gpu")]
1939 fn gpu_fft_dispatches_and_produces_correct_result() {
1940 setup();
1941 let input = vec![1.0, 0.0, -1.0, 0.0];
1942 let t = FabricTensor::from_slice(&[4], &input)
1943 .unwrap()
1944 .to_gpu()
1945 .unwrap();
1946 let freq = t.fft().unwrap();
1947 assert!(freq.is_gpu());
1948 assert_eq!(freq.shape(), &[8]); let recovered = freq.ifft().unwrap();
1952 assert!(recovered.is_gpu());
1953 assert_eq!(recovered.shape(), &[4]);
1954 for (i, &val) in recovered.as_slice().iter().enumerate() {
1955 assert!(
1956 (val - input[i]).abs() < 1e-4,
1957 "sample {i}: expected {}, got {val}",
1958 input[i]
1959 );
1960 }
1961 }
1962
1963 #[test]
1966 fn subtract_elementwise() {
1967 setup();
1968 let a = FabricTensor::from_slice(&[3], &[5.0, 3.0, 1.0]).unwrap();
1969 let b = FabricTensor::from_slice(&[3], &[1.0, 2.0, 3.0]).unwrap();
1970 let c = a.subtract(&b).unwrap();
1971 assert_eq!(c.as_slice(), &[4.0, 1.0, -2.0]);
1972 }
1973
1974 #[test]
1975 fn subtract_operator() {
1976 setup();
1977 let a = FabricTensor::from_slice(&[2], &[10.0, 5.0]).unwrap();
1978 let b = FabricTensor::from_slice(&[2], &[3.0, 7.0]).unwrap();
1979 let c = (&a - &b).unwrap();
1980 assert_eq!(c.as_slice(), &[7.0, -2.0]);
1981 }
1982
1983 #[test]
1984 fn subtract_shape_mismatch() {
1985 setup();
1986 let a = FabricTensor::from_slice(&[2], &[1.0, 2.0]).unwrap();
1987 let b = FabricTensor::from_slice(&[3], &[1.0, 2.0, 3.0]).unwrap();
1988 assert!(a.subtract(&b).is_err());
1989 }
1990
1991 #[test]
1994 fn sum_axis_row_sums() {
1995 setup();
1996 let a = FabricTensor::from_slice(&[2, 3], &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1997 let s = a.sum_axis(1).unwrap();
1998 assert_eq!(s.shape(), &[2]);
1999 assert_eq!(s.as_slice(), &[6.0, 15.0]);
2000 }
2001
2002 #[test]
2003 fn sum_axis_col_sums() {
2004 setup();
2005 let a = FabricTensor::from_slice(&[2, 3], &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
2006 let s = a.sum_axis(0).unwrap();
2007 assert_eq!(s.shape(), &[3]);
2008 assert_eq!(s.as_slice(), &[5.0, 7.0, 9.0]);
2009 }
2010
2011 #[test]
2012 fn sum_axis_1d() {
2013 setup();
2014 let a = FabricTensor::from_slice(&[4], &[1.0, 2.0, 3.0, 4.0]).unwrap();
2015 let s = a.sum_axis(0).unwrap();
2016 assert_eq!(s.shape(), &[1]);
2017 assert_eq!(s.as_slice(), &[10.0]);
2018 }
2019
2020 #[test]
2021 fn sum_axis_out_of_bounds() {
2022 setup();
2023 let a = FabricTensor::from_slice(&[2, 3], &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
2024 assert!(a.sum_axis(2).is_err());
2025 }
2026
2027 #[test]
2030 fn sigmoid_values() {
2031 setup();
2032 let a = FabricTensor::from_slice(&[3], &[0.0, 100.0, -100.0]).unwrap();
2033 let b = a.sigmoid().unwrap();
2034 assert!((b.as_slice()[0] - 0.5).abs() < 1e-6);
2035 assert!((b.as_slice()[1] - 1.0).abs() < 1e-4);
2036 assert!(b.as_slice()[2] < 1e-4);
2037 }
2038
2039 #[test]
2042 fn ln_values() {
2043 setup();
2044 let a = FabricTensor::from_slice(&[3], &[1.0, core::f32::consts::E, 10.0]).unwrap();
2045 let b = a.ln().unwrap();
2046 assert!((b.as_slice()[0] - 0.0).abs() < 1e-6);
2047 assert!((b.as_slice()[1] - 1.0).abs() < 1e-5);
2048 assert!((b.as_slice()[2] - 10.0f32.ln()).abs() < 1e-5);
2049 }
2050
2051 #[test]
2054 fn clip_values() {
2055 setup();
2056 let a = FabricTensor::from_slice(&[4], &[-1.0, 0.5, 1.5, 3.0]).unwrap();
2057 let b = a.clip(0.0, 1.0).unwrap();
2058 assert_eq!(b.as_slice(), &[0.0, 0.5, 1.0, 1.0]);
2059 }
2060}