diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 5b686fb..5f88caa 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -14,7 +14,6 @@ jobs:
     steps:
       - name: checkout code
         uses: actions/checkout@v4
-
       - name: run test script
         run: bash ./ci/script.sh
 
diff --git a/.gitignore b/.gitignore
index 16117da..7fe8987 100644
--- a/.gitignore
+++ b/.gitignore
@@ -260,5 +260,10 @@ paket-files/
 # Python Tools for Visual Studio (PTVS)
 __pycache__/
 *.pyc
+*.egg-info/
+*/venv/
+*/dist/
+*/build/
+
 
 cmake-*
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 447a7f7..536b867 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,24 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+<!-- ---------------------
+      v1.7.0
+     --------------------- -->
+## v1.7.0 - 3-12-2024
+
+### Added
+
+- The project now uses the GNU General Public License (GPL) v3; license file added
+- Introduces new Python package for storing and loading numpy arrays; in can be installed with `pip install gputils-api`; 
+  unit tests and documentation
+
+### Fixed
+
+- When compiling with `cmake`, the unit tests will not be compiled by default unless the flag `GPUTILS_BUILD_TEST` is set
+- Clang clippy recommendations applied
+- Proper error handling when binary tensor file is not found  
+
+
 <!-- ---------------------
       v1.6.0
      --------------------- -->
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4b87d7b..9b20dbc 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,11 +2,10 @@
 # GPUtils
 # ====================================================================
 cmake_minimum_required(VERSION 3.20 FATAL_ERROR)
-
 if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.29")
     cmake_policy(SET CMP0135 NEW)
 endif()
-
+# ----
 # Set C++ version and SM architecture
 if (NOT DEFINED CPPVERSION)
     set(CPPVERSION 20) # A40: 20, Orin: 17
@@ -14,8 +13,7 @@ endif()
 if (NOT DEFINED SM_ARCH)
     set(SM_ARCH 86)# A40: 86, Orin: 87
 endif()
-
-
+# ----
 project(GPUtils
         DESCRIPTION "Easy use of vectors and matrices on GPGPU devices."
         HOMEPAGE_URL "https://github.com/GPUEngineering/GPUtils"
@@ -31,8 +29,8 @@ set(CMAKE_CUDA_FLAGS "-std=c++${CPPVERSION}")
 set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}; "-std=c++${CPPVERSION}")
 enable_language(CUDA)
 # ----
-add_library(device_compiler_flags INTERFACE)
-target_compile_features(device_compiler_flags INTERFACE cxx_std_${CPPVERSION})
+add_library(gputils_compiler_flags INTERFACE)
+target_compile_features(gputils_compiler_flags INTERFACE cxx_std_${CPPVERSION})
 set(CMAKE_CXX_EXTENSIONS OFF)
 # ----
 add_library(developer_flags INTERFACE)
@@ -45,30 +43,31 @@ target_compile_options(developer_flags
         # flags for CUDA builds
         $<$<COMPILE_LANGUAGE:CUDA>:${cuda_flags}>
         )
-target_link_libraries(device_compiler_flags INTERFACE $<BUILD_INTERFACE:developer_flags>)
+target_link_libraries(gputils_compiler_flags INTERFACE $<BUILD_INTERFACE:developer_flags>)
 # ----
-
-
-# ====================================================================
-# comment out for release
-# ====================================================================
-add_executable(main)
-target_sources(main
+add_executable(gputils_main)
+target_sources(gputils_main
         PRIVATE
         main.cu
         )
-target_link_libraries(main
+target_link_libraries(gputils_main
         PRIVATE
-        device_compiler_flags
+        gputils_compiler_flags
         cublas
         cusolver
         cudadevrt
         )
-target_include_directories(main
+target_include_directories(gputils_main
         PRIVATE
         "${PROJECT_BINARY_DIR}"
         "${PROJECT_SOURCE_DIR}/include"
         )
 # ----
-add_subdirectory(test)
+if(NOT GPUTILS_BUILD_TEST)
+    set(GPUTILS_BUILD_TEST OFF)  # Set to ON for local testing (or add `-DGPUTILS_BUILD_TEST=ON` to your CMake profile)
+endif()
+if (GPUTILS_BUILD_TEST)
+    add_subdirectory(test)
+endif()
+unset(GPUTILS_BUILD_TEST CACHE)
 # ----
diff --git a/ci/script.sh b/ci/script.sh
index 6cf351d..9b887fc 100644
--- a/ci/script.sh
+++ b/ci/script.sh
@@ -7,7 +7,7 @@ tests() {
     cpp_version=17 # default
     sm_arch=86 # default
     hwInfoOrin=`lshw | grep Orin` ||
-    if [ ! -z "${hwInfoOrin}" ]; then
+    if [ -n "${hwInfoOrin}" ]; then
       echo "Running on Orin";
       sm_arch=87
       cpp_version=17
@@ -17,12 +17,26 @@ tests() {
       cpp_version=20
     fi
 
+
+    # ------------------------------------
+    # Run Python tests first
+    # ------------------------------------
+    pushd python
+    export PYTHONPATH=.
+    python -m venv venv
+    source venv/bin/activate
+    pip install --upgrade pip
+    pip install .
+    python -W ignore test/test.py -v
+    deactivate
+    popd
+
     # ------------------------------------
     # Run tensor gtests
     # ------------------------------------
 
     # -- create build files
-    cmake -DCPPVERSION=${cpp_version} -DSM_ARCH=${sm_arch} -S . -B ./build -Wno-dev
+    cmake -DCPPVERSION=${cpp_version} -DSM_ARCH=${sm_arch} -DGPUTILS_BUILD_TEST=ON -S . -B ./build -Wno-dev
 
     # -- build files in build folder
     cmake --build ./build
@@ -34,7 +48,7 @@ tests() {
 
       # -- run compute sanitizer
       pushd ./build/test
-      mem=$(/usr/local/cuda/bin/compute-sanitizer --tool memcheck --leak-check=full ./device_test)
+      mem=$(/usr/local/cuda/bin/compute-sanitizer --tool memcheck --leak-check=full ./gputils_test)
       grep "0 errors" <<< "$mem"
       popd
 
@@ -44,7 +58,7 @@ tests() {
 
       # -- create build files
       cd example
-      cmake  -DCPPVERSION=${cpp_version} -DSM_ARCH=${sm_arch} -S . -B ./build -Wno-dev
+      cmake -DCPPVERSION=${cpp_version} -DSM_ARCH=${sm_arch} -S . -B ./build -Wno-dev
 
       # -- build files in build folder
       cmake --build ./build
diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt
index 4f84a78..1140d95 100644
--- a/example/CMakeLists.txt
+++ b/example/CMakeLists.txt
@@ -28,10 +28,8 @@ target_compile_options(example_developer_flags
     # flags for CUDA builds
     $<$<COMPILE_LANGUAGE:CUDA>:${cuda_flags}>
 )
-target_link_libraries(example_compiler_flags INTERFACE $<BUILD_INTERFACE:example_developer_flags>
-)
+target_link_libraries(example_compiler_flags INTERFACE $<BUILD_INTERFACE:example_developer_flags>)
 # ----
-set(GPUTILS_BUILD_TESTING OFF)
 include(FetchContent)
 FetchContent_Declare(
     gputils
diff --git a/include/tensor.cuh b/include/tensor.cuh
index cee9dff..bca5706 100644
--- a/include/tensor.cuh
+++ b/include/tensor.cuh
@@ -608,7 +608,7 @@ data_t<T> vectorFromTextFile(std::string path_to_file) {
     data_t<T> dataStruct;
     std::ifstream file;
     file.open(path_to_file, std::ios::in);
-    if (!file.is_open()) { throw std::invalid_argument("the file you provided does not exist"); };
+    if (!file.is_open()) { throw std::invalid_argument("[vectorFromTextFile] the file does not exist"); }
 
     std::string line;
     getline(file, line); dataStruct.numRows = atoi(line.c_str());
@@ -655,6 +655,7 @@ data_t<T> vectorFromBinaryFile(std::string path_to_file) {
     /* Read from binary file */
     std::ifstream inFile;
     inFile.open(path_to_file, std::ios::binary);
+    if (!inFile.is_open()) { throw std::invalid_argument("[vectorFromBinaryFile] the file does not exist"); }
     inFile.read(reinterpret_cast<char *>(&(dataStruct.numRows)), sizeof(uint64_t));
     inFile.read(reinterpret_cast<char *>(&(dataStruct.numCols)), sizeof(uint64_t));
     inFile.read(reinterpret_cast<char *>(&(dataStruct.numMats)), sizeof(uint64_t));
@@ -723,7 +724,7 @@ void DTensor<T>::reshape(size_t newNumRows, size_t newNumCols, size_t newNumMats
         char errMessage[256];
         sprintf(errMessage,
                 "DTensor[%lu x %lu x %lu] with %lu elements cannot be reshaped into DTensor[%lu x %lu x %lu] (%lu elements)",
-                numRows(), numRows(), numMats(), numEl(), newNumRows, newNumCols, newNumMats, newNumElements);
+                numRows(), numCols(), numMats(), numEl(), newNumRows, newNumCols, newNumMats, newNumElements);
         throw std::invalid_argument(errMessage);
     }
 
diff --git a/python/README.md b/python/README.md
new file mode 100644
index 0000000..ebde65a
--- /dev/null
+++ b/python/README.md
@@ -0,0 +1,28 @@
+## GPUtils API
+
+### Installation
+
+As simple as...
+```bash
+pip install gputils-api
+```
+of course, preferably from within a virtual environment.
+
+### Write to file
+
+```python
+import numpy as np
+import gputils_api as g
+a = np.eye(3)
+g.write_array_to_gputils_binary_file(a, 'my_data.bt')
+```
+
+### Read from file
+
+```python
+import numpy as np
+import gputils_api as g
+x = g.read_array_from_gputils_binary_file('my_data.bt')
+```
+
+
diff --git a/python/VERSION b/python/VERSION
new file mode 100644
index 0000000..9dbb0c0
--- /dev/null
+++ b/python/VERSION
@@ -0,0 +1 @@
+1.7.0
\ No newline at end of file
diff --git a/python/gputils_api/__init__.py b/python/gputils_api/__init__.py
new file mode 100644
index 0000000..04bdfa9
--- /dev/null
+++ b/python/gputils_api/__init__.py
@@ -0,0 +1 @@
+from .gputils_api import *
\ No newline at end of file
diff --git a/python/gputils_api/gputils_api.py b/python/gputils_api/gputils_api.py
new file mode 100644
index 0000000..e4cc1a1
--- /dev/null
+++ b/python/gputils_api/gputils_api.py
@@ -0,0 +1,43 @@
+import numpy as np
+
+def read_array_from_gputils_binary_file(path, dt=np.dtype('d')):
+    """
+    Reads an array from a bt file
+    :param path: path to file
+    :param dt: numpy-compatible data type
+    :raises ValueError: if the file name specified `path` does not have the .bt extension
+    """
+    if not path.endswith(".bt"):
+        raise ValueError("The file must have the .bt extension")
+    with open(path, 'rb') as f:
+        nr = int.from_bytes(f.read(8), byteorder='little', signed=False)  # read number of rows
+        nc = int.from_bytes(f.read(8), byteorder='little', signed=False)  # read number of columns
+        nm = int.from_bytes(f.read(8), byteorder='little', signed=False)  # read number of matrices
+        dat = np.fromfile(f, dtype=np.dtype(dt))  # read data
+        dat = dat.reshape((nr, nc, nm))  # reshape
+    return dat
+
+
+def write_array_to_gputils_binary_file(x, path):
+    """
+    Writes a numpy array into a bt file
+
+    :param x: numpy array to save to file
+    :param path: path to file
+    :raises ValueError: if `x` has more than 3 dimensions
+    :raises ValueError: if the file name specified `path` does not have the .bt extension
+    """
+    if not path.endswith(".bt"):
+        raise ValueError("The file must have the .bt extension")
+    x_shape = x.shape
+    x_dims = len(x_shape)
+    if x_dims >= 4:
+        raise ValueError("given array cannot have more than 3 dimensions")
+    nr = x_shape[0]
+    nc = x_shape[1] if x_dims >= 2 else 1
+    nm = x_shape[2] if x_dims == 3 else 1
+    with open(path, 'wb') as f:
+        f.write(nr.to_bytes(8, 'little'))  # write number of rows
+        f.write(nc.to_bytes(8, 'little'))  # write number of columns
+        f.write(nm.to_bytes(8, 'little'))  # write number of matrices
+        x.reshape(nr*nc*nm, 1).tofile(f)  # write data
\ No newline at end of file
diff --git a/python/setup.py b/python/setup.py
new file mode 100644
index 0000000..0495aab
--- /dev/null
+++ b/python/setup.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python
+
+from setuptools import setup, find_packages
+import io
+import os
+
+# To publish to pypi, run:
+# rm -rf ./build ./dist opengen.egg-info ; pip install . ; python setup.py sdist bdist_wheel; twine upload dist/*
+
+here = os.path.abspath(os.path.dirname(__file__))
+
+NAME = 'gputils_api'
+
+# Import version from file
+version_file = open(os.path.join(here, 'VERSION'))
+VERSION = version_file.read().strip()
+
+DESCRIPTION = 'Python API for GPUtils'
+
+
+# Import the README and use it as the long-description.
+# Note: this will only work if 'README.md' is present in your MANIFEST.in file!
+try:
+    with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f:
+        long_description = '\n' + f.read()
+except FileNotFoundError:
+    long_description = DESCRIPTION
+
+setup(name=NAME,
+      version=VERSION,
+      description=DESCRIPTION,
+      long_description=long_description,
+      long_description_content_type='text/markdown',
+      author=['Pantelis Sopasakis', 'Ruairi Moran'],
+      author_email='p.sopasakis@gmail.com',
+      license='GNU General Public License v3 (GPLv3)',
+      packages=find_packages(
+          exclude=["private"]),
+      include_package_data=True,
+      install_requires=[
+          'numpy', 'setuptools'
+      ],
+      classifiers=[
+          'Development Status :: 2 - Pre-Alpha',
+          'License :: OSI Approved :: GNU General Public License v3 (GPLv3)',
+          'Programming Language :: Python',
+          'Environment :: GPU :: NVIDIA CUDA',
+          'Intended Audience :: Developers',
+          'Topic :: Software Development :: Libraries'
+      ],
+      keywords=['api', 'GPU'],
+      url=(
+          'https://github.com/GPUEngineering/GPUtils'
+      ),
+      zip_safe=False)
diff --git a/python/test/test.py b/python/test/test.py
new file mode 100644
index 0000000..8c3b6b6
--- /dev/null
+++ b/python/test/test.py
@@ -0,0 +1,83 @@
+import os
+import unittest
+import numpy as np
+import gputils_api as gpuapi
+
+
+class GputilApiTestCase(unittest.TestCase):
+
+    _B = np.array([
+        [[1, 2], [3, 4], [5, 6]],
+        [[7, 8], [9, 10], [11, 12]],
+        [[13, 14], [15, 16], [17, 18]]
+    ], dtype=np.dtype('d'))
+
+    @staticmethod
+    def local_abs_path():
+        cwd = os.getcwd()
+        return cwd.split('open-codegen')[0]
+
+    @classmethod
+    def setUpClass(cls):
+        n = 5
+        base_dir = GputilApiTestCase.local_abs_path()
+        eye_d = np.eye(n, dtype=np.dtype('d'))
+        gpuapi.write_array_to_gputils_binary_file(eye_d, os.path.join(base_dir, 'eye_d.bt'))
+
+        eye_f = np.eye(n, dtype=np.dtype('f'))
+        gpuapi.write_array_to_gputils_binary_file(eye_f, os.path.join(base_dir, 'eye_f.bt'))
+
+        xd = np.random.randn(2, 4, 6).astype('d')
+        xd[1, 2, 3] = -12.3
+        gpuapi.write_array_to_gputils_binary_file(xd, os.path.join(base_dir, 'rand_246_d.bt'))
+
+        xf = np.random.randn(2, 4, 6).astype('f')
+        xf[1, 2, 3] = float(-12.3)
+        gpuapi.write_array_to_gputils_binary_file(xf, os.path.join(base_dir, 'rand_246_f.bt'))
+
+        a = np.linspace(-100, 100, 4 * 5).reshape((4, 5)).astype('d')
+        gpuapi.write_array_to_gputils_binary_file(a, os.path.join(base_dir, 'a_d.bt'))
+        gpuapi.write_array_to_gputils_binary_file(cls._B, os.path.join(base_dir, 'b_d.bt'))
+
+    def __test_read_eye(self, dt):
+        base_dir = GputilApiTestCase.local_abs_path()
+        path = os.path.join(base_dir, f'eye_{dt}.bt')
+        r = gpuapi.read_array_from_gputils_binary_file(path, dt=np.dtype(dt))
+        err = r[:, :, 0] - np.eye(5)
+        err_norm = np.linalg.norm(err, np.inf)
+        self.assertTrue(err_norm < 1e-12)
+
+    def test_read_eye_d(self):
+        self.__test_read_eye('d')
+
+    def test_read_eye_f(self):
+        self.__test_read_eye('f')
+
+    def __test_read_rand(self, dt):
+        base_dir = GputilApiTestCase.local_abs_path()
+        path = os.path.join(base_dir, f'rand_246_{dt}.bt')
+        r = gpuapi.read_array_from_gputils_binary_file(path, dt=np.dtype(dt))
+        r_shape = r.shape
+        self.assertEqual(2, r_shape[0])
+        self.assertEqual(4, r_shape[1])
+        self.assertEqual(6, r_shape[2])
+        e = np.abs(r[1, 2, 3] + 12.3)
+        self.assertTrue(e < 1e-6)
+
+    def test_read_rand_d(self):
+        self.__test_read_rand('d')
+
+    def test_read_rand_f(self):
+        self.__test_read_rand('f')
+
+    def test_read_a_tensor_3d(self):
+        self.__test_read_rand('f')
+        base_dir = GputilApiTestCase.local_abs_path()
+        path = os.path.join(base_dir, 'b_d.bt')
+        b = gpuapi.read_array_from_gputils_binary_file(path)
+        self.assertEqual(GputilApiTestCase._B.shape, b.shape)
+        self.assertLess(np.linalg.norm((b - GputilApiTestCase._B).reshape(-1, 1)), 1e-6)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 9259aa9..4abfa1c 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -9,24 +9,23 @@ set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
 FetchContent_MakeAvailable(googletest)
 # ----
 enable_testing()
-add_executable(device_test)
-
-target_sources(device_test  # add files
+add_executable(gputils_test)
+target_sources(gputils_test  # add files
         PRIVATE
         testTensor.cu
 )
-target_link_libraries(device_test
+target_link_libraries(gputils_test
     PRIVATE
-    device_compiler_flags
+    gputils_compiler_flags
     cublas
     cusolver
     cudadevrt
     GTest::gtest_main)
-target_include_directories(device_test
+target_include_directories(gputils_test
     PRIVATE
     "${PROJECT_BINARY_DIR}"
     "${PROJECT_SOURCE_DIR}/include"
 )
 include(GoogleTest)
-gtest_discover_tests(device_test)
+gtest_discover_tests(gputils_test)
 # ----
diff --git a/test/testTensor.cu b/test/testTensor.cu
index 0e63c2c..fc2e5b7 100644
--- a/test/testTensor.cu
+++ b/test/testTensor.cu
@@ -175,6 +175,20 @@ TEST_F(TensorTest, parseTensorFromFileBinary) {
     parseTensorFromFileBinary<double>();
 }
 
+
+/* ---------------------------------------
+ * Parse files generated by Python
+ * --------------------------------------- */
+
+TEST_F(TensorTest, parseTensorFromBinaryPython) {
+    std::string fName = "../../python/b_d.bt";
+    DTensor<double> b = DTensor<double>::parseFromFile(fName);
+    std::vector<double> vb(12);
+    b.download(vb);
+    for (size_t i = 0; i < 12; i++) EXPECT_NEAR(i + 1., vb[i], PRECISION_HIGH);
+}
+
+
 /* ---------------------------------------
  * Move constructor
  * --------------------------------------- */