diff --git a/README.md b/README.md index 798c0e951..ee9babde6 100644 --- a/README.md +++ b/README.md @@ -157,6 +157,40 @@ This project is based on the [llama.cpp](https://github.com/ggerganov/llama.cpp) ## Installation +### Ubuntu quck start + +``` +sudo apt install ccache clang libomp-dev +sudo swapoff -a + +git clone --recursive https://github.com/microsoft/BitNet.git +cd BitNet/ + +gguf="ggml-model-i2_s.gguf" +mdir="models/BitNet-b1.58-2B-4T" +url="https://huggingface.co/microsoft/bitnet-b1.58-2B-4T-gguf" +link="$url/resolve/main/$gguf" +modprm="$mdir/$gguf" + +mkdir -p $mdir && wget -c "$link" -O $modprm +{ python3 -m pip install --upgrade pip; pip install -r requirements.txt; }\ + | grep -ve "^Requirement already satisfied:" +python3 setup_env.py -md $mdir -q i2_s +cmake --build build --config Release + +export PATH="$PATH:$PWD/build/bin/" +sysprompt="You are a helpful assistant" +python3 run_inference.py -m $modprm -p "$sysprompt" -cnv --temp 0.3 -t $(nproc) + +# Alternative with a file prompt and sepcific parameters +tempr="--temp 0.3 --dynatemp-range 0.1 --no-warmup" +file_prompt=${file_prompt:-/dev/null -p '$sysprompt'} +pretkns="--override-kv tokenizer.ggml.pre=str:llama3 --mlock" +intcnv="-i --multiline-input -cnv -c 8192 -b 4096 -co --keep -1 -n -1" +llama-cli -m $modprm -f ${file_prompt} -t $(nproc) $pretkns $tempr $intcnv + +``` + ### Requirements - python>=3.9 - cmake>=3.22