Llama on Windows
sudo yum install make
sudo yum insatll git
git clone https://github.com/ggerganov/llama.cpp
cd llama.cpp
make
This will create a binary called main, you can test the model with the following command. Update -t with the number of CPU threads on your system Cpu X Threads
./main -t 8 -m ./models/llama-2-13b-chat.ggmlv3.q4_1.bin --color -c 4096 --temp 0.7 --repeat_penalty 1.1 -n -1 -p "### Instruction: Write a story about llamas\n### Response:"
This will also create a .bin file in the models folder, copy this to the windows machine. (/home/ec2-user/llama.cpp/models/llama-2-13b-chat.ggmlv3.q4_1.bin)
LLamaSharp
LLamaSharp.Backend.Cpu
C# Code
using LLama.Common;
using LLama;
using static System.Net.Mime.MediaTypeNames;
using System.Security.Cryptography.X509Certificates;
namespace WinFormsApp1
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void button1_Click(object sender, EventArgs e)
{
var prompt = "User: " + textBox2.Text;
foreach (var text in Globals.session.Chat(prompt, Globals.AllaInferenceParams))
{
if (text != "AI" && text != "?" && text != " Ass" && text != "istant" && text != "User" && text != ":")
textBox1.Text = textBox1.Text + text;
}
textBox1.Text = textBox1.Text + Environment.NewLine;
}
private void textBox1_TextChanged(object sender, EventArgs e)
{
}
private void Form1_Load(object sender, EventArgs e)
{
}
private void button2_Click(object sender, EventArgs e)
{
textBox1.Text = "";
textBox2.Text = "";
}
public class Globals
{
public static string modelPath;
public static InteractiveExecutor ex;
public static ChatSession session;
public static InferenceParams AllaInferenceParams;
}
public void button3_Click(object sender, EventArgs e)
{
MessageBox.Show("Model loading may take 2-3 Minutes.");
Globals.modelPath = ".\\Model\\llama-2-13b-chat.ggmlv3.q4_1.bin";
Globals.ex = new InteractiveExecutor(new LLamaModel(new ModelParams(Globals.modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: -1)));
Globals.session = new ChatSession(Globals.ex);
Globals.AllaInferenceParams = new InferenceParams()
{
Temperature = 0.9f,
AntiPrompts = new List>string> { "User:" },
//MaxTokens = 128,
MirostatTau = 10,
};
MessageBox.Show("Model loading complete!");
button3.Enabled = false;
}
private void label1_Click(object sender, EventArgs e)
{
}
}
}