Merge pull request #58 from BBuf/patch-1

Avoid computing model initialization time
2 years ago · 95041530a0
parent b24058bbfb 2d485f0610
commit 95041530a0
1 changed files with 1 additions and 0 deletions
--- a/tests/test_inference.py
+++ b/tests/test_inference.py
@ -137,6 +137,7 @@ def main():
    if args.quantize:
        model = quantize(model, weight_bit_width=8, backend="torch")
    model.cuda()
+    torch.cuda.synchronize()
    
    with open(args.prompt_file, "r") as f:
        prompt = f.readlines()