huggingface - 拉取模型大文件 - 国内镜像 AI



https://huggingface.co/models


---权限拉取

git lfs install

git clone https://hdongquane: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@huggingface.co/openbmb/MiniCPM-V-2_6


---拉取大文件进程kill,分2部

git lfs install

-1.如果你不想克隆大文件——只需要它们的指针

GIT_LFS_SKIP_SMUDGE=1 git clone https://hdongquane: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@huggingface.co/stabilityai/stable-diffusion-3-medium

-2.将大文件pull回来, 只下载:safetensors后缀文件

git lfs pull --include="*.safetensors"  

git lfs pull --include="*.safetensors"   https://hdongquane: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@huggingface.co/black-forest-labs/FLUX.1-dev

git lfs pull 


---国内镜像网站

https://hf-mirror.com


---将 HuggingFace 模型转换为 GGUF

https://hub.docker.com/r/continuumio/anaconda3

docker run -itd --name gguf-convert  -v /data/site/htmltoo.opt/huggingface:/opt/huggingface continuumio/anaconda3:main

docker exec -it gguf-convert /bin/bash

cd  /opt/huggingface

-下载ollama/ollama代码

git clone https://github.com/ollama/ollama.git ollama

cd ollama

-获取其llama.cpp子模块

git submodule init

git submodule update llm/llama.cpp

-创建python环境

conda create -n llama-env python=3.12

conda activate llama-env

pip3 install -r  llm/llama.cpp/requirements.txt

pip3 install tiktoken

-安装make

make --version

apt-get update

apt-get install build-essential

-构建quantize工具

make -C llm/llama.cpp quantize

cd llm/llama.cpp

-切换git版本

---截至到2024年05月22日ollama的开源代码是有问题的,需要回退到指定版本---

cd /opt/work/ollama/llm/llama.cp

git reset --hard 46e12c4692a37bdd31a0432fc5153d7d22bc7f72

-转换模型

cd  /opt/huggingface

python  ollama/llm/llama.cpp/convert-hf-to-gguf.py ./galactica-6.7b

python  ollama/llm/llama.cpp/convert_image_gguf.py --model  ./galactica-6.7b --output outfile/galactica-6.7b.gguf

vim   convert_image_gguf.py

import os
import typing
import argparse
import numpy as np
import torch
from gguf import *
from safetensors import safe_open
def k(raw_key: str, arch: str) -> str:
    return raw_key.format(arch=arch)
class Args:
    def __init__(self, model, output):
        self.model = model
        self.output = output
class SafetensorsIndexFile(typing.TypedDict):
    weight_map: typing.Dict[str, str]
class SafetensorsIndex:
    def __init__(self, index_file_path: str):
        directory = os.path.dirname(index_file_path)
        self.index = typing.cast(SafetensorsIndexFile, json.load(open(index_file_path)))
        self.weight_map = self.index["weight_map"]
        files = set(self.weight_map.values())
        self.tensors = {file: safe_open(os.path.join(directory, file), framework="pt") for file in files}
    def get_tensor(self, key: str) -> npt.NDArray[np.float32]:
        # convert to float32 and cast to np array
        return typing.cast(npt.NDArray[np.float32], self.tensors[self.weight_map[key]].get_tensor(key).to(torch.float32).numpy())
def main():
    parser = argparse.ArgumentParser(description="Extract vision model from safetensors to GGUF")
    parser.add_argument("--model", type=str, required=True, help="Input safetensors file")
    parser.add_argument("--output", type=str, required=True, help="Output GGUF file")
    args = parser.parse_args()
    import pathlib
    dir_model = pathlib.Path(args.model)
    config = json.load(open(dir_model / "config.json"))
    # tensors = safe_open(args.model, framework="np", device="cpu")
    tensors = SafetensorsIndex((dir_model / "model.safetensors.index.json").as_posix())
    ftype = 1  # fp16
    # source https://github.com/huggingface/transformers/blob/87134662f73d5e89bb015531ddd1d4662371d317/src/transformers/models/clip/configuration_clip.py#L209
    # hidden_size=768,
    # intermediate_size=3072,
    # projection_dim=512,
    # num_hidden_layers=12,
    # num_attention_heads=12,
    # num_channels=3,
    # image_size=224,
    # patch_size=32,
    # hidden_act="quick_gelu",
    # layer_norm_eps=1e-5,
    # attention_dropout=0.0,
    # initializer_range=0.02,
    # initializer_factor=1.0,
    clip_vision_config = {
        "hidden_size": 1024,
        "intermediate_size": 4096,
        "projection_dim": 1024,
        "num_hidden_layers": 24,
        "num_attention_heads": 16,
        "num_channels": 3,
        "image_size": 224,
        "patch_size": 14,
        "hidden_act": "quick_gelu",
        "layer_norm_eps": 1e-5,
        "attention_dropout": 0.0,
        "initializer_range": 0.02,
        "initializer_factor": 1.0,
    }
    # CLIP_VIT_LARGE_PATCH14_336_CONFIG = CLIPVisionConfig(
    #   attention_dropout=0.0,
    #   dropout=0.0,
    #   hidden_act="quick_gelu",
    #   hidden_size=1024,
    #   image_size=336,
    #   initializer_factor=1.0,
    #   initializer_range=0.02,
    #   intermediate_size=4096,
    #   layer_norm_eps=1e-05,
    #   num_attention_heads=16,
    #   num_channels=3,
    #   num_hidden_layers=24,
    #   patch_size=14,
    #   projection_dim=768
    # )
    clip_vision_config.update(dict(
        attention_dropout=0.0,
        dropout=0.0,
        hidden_act="quick_gelu",
        hidden_size=1024,
        image_size=224,
        initializer_factor=1.0,
        initializer_range=0.02,
        intermediate_size=4096,
        layer_norm_eps=1e-05,
        num_attention_heads=16,
        num_channels=3,
        num_hidden_layers=24,
        patch_size=14,
        projection_dim=1024
    ))
    fout = GGUFWriter(args.output, arch="clip")
    fout.add_bool("clip.has_text_encoder", False)
    fout.add_bool("clip.has_vision_encoder", True)
    fout.add_bool("clip.has_llava_projector", True)
    fout.add_file_type(ftype)
    model_name = "microsoft/phi-3.5-vision-instruct"
    fout.add_name(model_name)
    fout.add_description("image encoder for " + model_name)
    fout.add_string("clip.projector_type", "mlp")
    # Vision model hparams
    VISION = "clip.vision"
    fout.add_uint32("clip.vision.image_size", clip_vision_config["image_size"])
    fout.add_uint32("clip.vision.patch_size", clip_vision_config["patch_size"])
    fout.add_uint32(k(KEY_EMBEDDING_LENGTH, VISION), clip_vision_config["hidden_size"])
    fout.add_uint32(k(KEY_FEED_FORWARD_LENGTH, VISION), clip_vision_config["intermediate_size"])
    fout.add_uint32("clip.vision.projection_dim", clip_vision_config["projection_dim"])
    fout.add_uint32(k(KEY_ATTENTION_HEAD_COUNT, VISION), clip_vision_config["num_attention_heads"])
    fout.add_float32(k(KEY_ATTENTION_LAYERNORM_EPS, VISION), clip_vision_config["layer_norm_eps"])
    fout.add_uint32(k(KEY_BLOCK_COUNT, VISION), clip_vision_config["num_hidden_layers"])
    fout.add_array("clip.vision.image_mean", [0.48145466, 0.4578275, 0.40821073])
    fout.add_array("clip.vision.image_std", [0.26862954, 0.26130258, 0.27577711])
    fout.add_bool("clip.use_gelu", clip_vision_config["hidden_act"] != "quick_gelu")
    # Vision model tensors
    prefix = "model.vision_embed_tokens.img_processor.vision_model."
    fout.add_tensor(
        "v.class_embd",
        tensors.get_tensor(f"{prefix}embeddings.class_embedding").astype(np.float32),
    )
    fout.add_tensor(
        "v.patch_embd.weight",
        tensors.get_tensor(f"{prefix}embeddings.patch_embedding.weight")
            .reshape(clip_vision_config["hidden_size"], 3, clip_vision_config["patch_size"], clip_vision_config["patch_size"])
            .astype(np.float16),
    )
    fout.add_tensor(
        "v.position_embd.weight",
        tensors.get_tensor(f"{prefix}embeddings.position_embedding.weight").astype(np.float16),
    )
    fout.add_tensor(
        "v.sub_gn",
        tensors.get_tensor("model.vision_embed_tokens.sub_GN").astype(np.float32),
    )
    fout.add_tensor(
        "v.glb_gn",
        tensors.get_tensor("model.vision_embed_tokens.glb_GN").astype(np.float32),
    )
    fout.add_tensor(
        "mm.0.weight",
        tensors.get_tensor("model.vision_embed_tokens.img_projection.0.weight").astype(np.float16),
    )
    fout.add_tensor(
        "mm.0.bias",
        tensors.get_tensor("model.vision_embed_tokens.img_projection.0.bias").astype(np.float32),
    )
    fout.add_tensor(
        "mm.2.weight",
        tensors.get_tensor("model.vision_embed_tokens.img_projection.2.weight").astype(np.float16),
    )
    fout.add_tensor(
        "mm.2.bias",
        tensors.get_tensor("model.vision_embed_tokens.img_projection.2.bias").astype(np.float32),
    )
    for i in range(clip_vision_config["num_hidden_layers"]):
        # attention norm
        fout.add_tensor(
            f"v.blk.{i}.attn_norm.weight",
            tensors.get_tensor(f"{prefix}encoder.layers.{i}.layer_norm1.weight").astype(np.float32),
        )
        fout.add_tensor(
            f"v.blk.{i}.attn_norm.bias",
            tensors.get_tensor(f"{prefix}encoder.layers.{i}.layer_norm1.bias").astype(np.float32),
        )
        fout.add_tensor(
            f"v.blk.{i}.ffn_norm.weight",
            tensors.get_tensor(f"{prefix}encoder.layers.{i}.layer_norm2.weight").astype(np.float32),
        )
        fout.add_tensor(
            f"v.blk.{i}.ffn_norm.bias",
            tensors.get_tensor(f"{prefix}encoder.layers.{i}.layer_norm2.bias").astype(np.float32),
        )
        # feed forward
        fout.add_tensor(
            f"v.blk.{i}.ffn_down.weight",
            tensors.get_tensor(f"{prefix}encoder.layers.{i}.mlp.fc1.weight").astype(np.float16),
        )
        fout.add_tensor(
            f"v.blk.{i}.ffn_down.bias",
            tensors.get_tensor(f"{prefix}encoder.layers.{i}.mlp.fc1.bias").astype(np.float32),
        )
        fout.add_tensor(
            f"v.blk.{i}.ffn_up.weight",
            tensors.get_tensor(f"{prefix}encoder.layers.{i}.mlp.fc2.weight").astype(np.float16),
        )
        fout.add_tensor(
            f"v.blk.{i}.ffn_up.bias",
            tensors.get_tensor(f"{prefix}encoder.layers.{i}.mlp.fc2.bias").astype(np.float32),
        )
        # attention
        fout.add_tensor(
            f"v.blk.{i}.attn_k.weight",
            tensors.get_tensor(f"{prefix}encoder.layers.{i}.self_attn.k_proj.weight").astype(np.float16),
        )
        fout.add_tensor(
            f"v.blk.{i}.attn_k.bias",
            tensors.get_tensor(f"{prefix}encoder.layers.{i}.self_attn.k_proj.bias").astype(np.float32),
        )
        fout.add_tensor(
            f"v.blk.{i}.attn_out.weight",
            tensors.get_tensor(f"{prefix}encoder.layers.{i}.self_attn.out_proj.weight").astype(np.float16),
        )
        fout.add_tensor(
            f"v.blk.{i}.attn_out.bias",
            tensors.get_tensor(f"{prefix}encoder.layers.{i}.self_attn.out_proj.bias").astype(np.float32),
        )
        fout.add_tensor(
            f"v.blk.{i}.attn_q.weight",
            tensors.get_tensor(f"{prefix}encoder.layers.{i}.self_attn.q_proj.weight").astype(np.float16),
        )
        fout.add_tensor(
            f"v.blk.{i}.attn_q.bias",
            tensors.get_tensor(f"{prefix}encoder.layers.{i}.self_attn.q_proj.bias").astype(np.float32),
        )
        fout.add_tensor(
            f"v.blk.{i}.attn_v.weight",
            tensors.get_tensor(f"{prefix}encoder.layers.{i}.self_attn.v_proj.weight").astype(np.float16),
        )
        fout.add_tensor(
            f"v.blk.{i}.attn_v.bias",
            tensors.get_tensor(f"{prefix}encoder.layers.{i}.self_attn.v_proj.bias").astype(np.float32),
        )
        # layer norm
        fout.add_tensor(
            f"v.blk.{i}.ln1.weight",
            tensors.get_tensor(f"{prefix}encoder.layers.{i}.layer_norm1.weight").astype(np.float32),
        )
        fout.add_tensor(
            f"v.blk.{i}.ln1.bias",
            tensors.get_tensor(f"{prefix}encoder.layers.{i}.layer_norm1.bias").astype(np.float32),
        )
        fout.add_tensor(
            f"v.blk.{i}.ln2.weight",
            tensors.get_tensor(f"{prefix}encoder.layers.{i}.layer_norm2.weight").astype(np.float32),
        )
        fout.add_tensor(
            f"v.blk.{i}.ln2.bias",
            tensors.get_tensor(f"{prefix}encoder.layers.{i}.layer_norm2.bias").astype(np.float32),
        )
    fout.add_tensor(
        "v.post_ln.weight",
        tensors.get_tensor(f"{prefix}post_layernorm.weight").astype(np.float32),
    )
    fout.add_tensor(
        "v.post_ln.bias",
        tensors.get_tensor(f"{prefix}post_layernorm.bias").astype(np.float32),
    )
    fout.add_tensor(
        "v.pre_ln.weight",
        tensors.get_tensor(f"{prefix}pre_layrnorm.weight").astype(np.float32),
    )
    fout.add_tensor(
        "v.pre_ln.bias",
        tensors.get_tensor(f"{prefix}pre_layrnorm.bias").astype(np.float32),
    )
    fout.write_header_to_file()
    fout.write_kv_data_to_file()
    fout.write_tensors_to_file()
    fout.close()
if __name__ == "__main__":
    main()



-从 HuggingFace 下载 Model

cd  /data/site/htmltoo.opt/huggingface

pip3  install -U huggingface_hub  --break-system-packages

-huggingface下载

vim  download.py

from huggingface_hub import snapshot_download
model_id="facebook/galactica-6.7b"
snapshot_download(repo_id=model_id, local_dir="galactica-6.7b",
                          local_dir_use_symlinks=False, revision="main")

python3 download.py


--转换hf模型为gguf

git clone https://github.com/Rayrtfr/llama.cpp

---异常---

git clone https://github.com/ggerganov/llama.cpp.git

pip3 install -r  llama.cpp/requirements.txt

python3  llama.cpp/convert_hf_to_gguf.py -h

python3  llama.cpp/convert_hf_to_gguf.py  ./galactica-6.7b --outtype q8_0 --verbose --outfile  galactica-6.7b_q8_0.gguf

------

make

--执行转换

python3  llama.cpp/convert.py -h

-转换为GGUF的FP16格式

python3  llama.cpp/convert.py  ./galactica-6.7b  --outfile  outfile/galactica-6.7b_f16.gguf

./llama.cpp/quantize  outfile/galactica-6.7b_f16.gguf   outfile/galactica-6.7b_q8_0.gguf q8_0

python3  llama.cpp/convert.py  --outfile  galactica-6.7b_q8_0.gguf   ./galactica-6.7b --vocab-type hfft


-这里--outtype是输出类型,代表含义:

q2_k:特定张量(Tensor)采用较高的精度设置,而其他的则保持基础级别。

q3_k_l、q3_k_m、q3_k_s:这些变体在不同张量上使用不同级别的精度,从而达到性能和效率的平衡。

q4_0:这是最初的量化方案,使用 4 位精度。

q4_1 和 q4_k_m、q4_k_s:这些提供了不同程度的准确性和推理速度,适合需要平衡资源使用的场景。

q5_0、q5_1、q5_k_m、q5_k_s:这些版本在保证更高准确度的同时,会使用更多的资源并且推理速度较慢。

q6_k 和 q8_0:这些提供了最高的精度,但是因为高资源消耗和慢速度,可能不适合所有用户。

fp16 和 f32: 不量化,保留原始精度。


签名:这个人很懒,什么也没有留下!
最新回复 (0)
返回