Llama on Windows

Building the model (Ran on Amazon Linux 2023 EC2 Instance)
             
sudo yum install make
sudo yum insatll git
git clone https://github.com/ggerganov/llama.cpp
cd llama.cpp
make


This will create a binary called main, you can test the model with the following command. Update -t with the number of CPU threads on your system Cpu X Threads
             
./main -t 8 -m ./models/llama-2-13b-chat.ggmlv3.q4_1.bin --color -c 4096 --temp 0.7 --repeat_penalty 1.1 -n -1 -p "### Instruction: Write a story about llamas\n### Response:"

This will also create a .bin file in the models folder, copy this to the windows machine. (/home/ec2-user/llama.cpp/models/llama-2-13b-chat.ggmlv3.q4_1.bin)

Nuget Packages

LLamaSharp
LLamaSharp.Backend.Cpu

C# Code
            

using LLama.Common;
using LLama;
using static System.Net.Mime.MediaTypeNames;
using System.Security.Cryptography.X509Certificates;

namespace WinFormsApp1
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }

        private void button1_Click(object sender, EventArgs e)
        {

            var prompt = "User: " + textBox2.Text;
            foreach (var text in Globals.session.Chat(prompt, Globals.AllaInferenceParams))
            {
                if (text != "AI" && text != "?" && text != " Ass" && text != "istant" && text != "User" && text != ":") 
                textBox1.Text = textBox1.Text + text;
            }
            textBox1.Text = textBox1.Text + Environment.NewLine;
        }

        private void textBox1_TextChanged(object sender, EventArgs e)
        {

        }

        private void Form1_Load(object sender, EventArgs e)
        {

        }

        private void button2_Click(object sender, EventArgs e)
        {
            textBox1.Text = "";
            textBox2.Text = "";
        }

        public class Globals
        {
            public static string modelPath;
            public static InteractiveExecutor ex;
            public static ChatSession session;
            public static InferenceParams AllaInferenceParams;

        }

        public void button3_Click(object sender, EventArgs e)
        {
            MessageBox.Show("Model loading may take 2-3 Minutes.");
            Globals.modelPath = ".\\Model\\llama-2-13b-chat.ggmlv3.q4_1.bin";
            Globals.ex = new InteractiveExecutor(new LLamaModel(new ModelParams(Globals.modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: -1)));
            Globals.session = new ChatSession(Globals.ex);
            Globals.AllaInferenceParams = new InferenceParams()
            {
                Temperature = 0.9f,
                AntiPrompts = new List>string> { "User:" },
                //MaxTokens = 128,
                MirostatTau = 10,
            };
            MessageBox.Show("Model loading complete!");
            button3.Enabled = false;
        }

        private void label1_Click(object sender, EventArgs e)
        {

        }
    }
}


Links
https://github.com/SciSharp/LLamaSharp
https://github.com/ggerganov/llama.cpp