js展开代码docker run \
-e QT_X11_NO_MITSHM=1 \
-e DISPLAY \
-v ~/.Xauthority:/root/.Xauthority:rw \
-v ~/tmp/.X11-unix:/tmp/.X11-unix:ro \
--net host \
--gpus all \
-v ~/out_home:/out_home \
-v /data/xiedong/xd_ws:/xd_ws \
-it kevinchina/deeplearning:ros-noetic-cuda11.4.2-v5 bash
训练数据存储在JSONL文件中,每行是一个JSON对象,包含以下关键字段:
json展开代码{
"image": {
"<image_00>": "/path/to/step_0.png",
"<image_01>": "/path/to/step_1.png"
},
"conversations": [
{
"role": "system",
"content": "系统提示词..."
},
{
"role": "user",
"content": "<Question>用户问题</Question>\n当前屏幕截图:<image_00>"
},
{
"role": "assistant",
"content": "{\"POINT\":[723,536],\"to\":\"up\"}"
}
],
"bbox": [[x1,y1,x2,y2], ...], // 可选:边界框信息
"bbox2": [...], // 可选:第二个边界框
"id": 0
}
在beginner_tutorials包中创建src/add_two_ints_server.cpp文件并粘贴以下内容进去:
bash展开代码#include "ros/ros.h"
#include "beginner_tutorials/AddTwoInts.h"
bool add(beginner_tutorials::AddTwoInts::Request &req,
beginner_tutorials::AddTwoInts::Response &res)
{
res.sum = req.a + req.b;
ROS_INFO("request: x=%ld, y=%ld", (long int)req.a, (long int)req.b);
ROS_INFO("sending back response: [%ld]", (long int)res.sum);
return true;
}
int main(int argc, char **argv)
{
ros::init(argc, argv, "add_two_ints_server");
ros::NodeHandle n;
ros::ServiceServer service = n.advertiseService("add_two_ints", add);
ROS_INFO("Ready to add two ints.");
ros::spin();
return 0;
}
https://github.com/OpenBMB/AgentCPM-GUI/blob/main/README_zh.md
AgentCPM-GUI是由清华大学THUNLP实验室、中国人民大学与面壁智能团队联合开发的开源端侧智能体大模型,基于MiniCPM-V构建,总参数量8B,接受手机屏幕图像作为输入,自动执行用户提出的任务。AgentCPM-GUI的主要特性包括:
LLaMA-Factory使用HuggingFace的datasets
库来管理数据。
python展开代码from datasets import Dataset, DatasetDict, load_from_disk, save_to_disk
ROS 软件源的签名密钥已过期,导致系统拒绝信任该软件源。bash展开代码The following signatures were invalid: EXPKEYSIG F42ED6FBAB17C654
运行以下命令修复 GPG 密钥:
bash展开代码sudo apt-key del F42ED6FBAB17C654 # 删除旧密钥
sudo apt update # 此时会提示缺少密钥
sudo apt install curl -y # 确保 curl 已安装
curl -s https://raw.githubusercontent.com/ros/rosdistro/master/ros.asc | sudo apt-key add -
bash展开代码sudo apt update --fix-missing
sudo apt clean # 清理损坏的缓存
sudo apt update
https://huggingface.co/docs/tokenizers/api/added-tokens
python展开代码from rich import print
from transformers import AutoTokenizer
def add_new_tokens(load_path: str, save_path: str, new_tokens: list[str]):
tokenizer = AutoTokenizer.from_pretrained(load_path)
tokenizer.add_tokens(new_tokens)
tokenizer.save_pretrained(save_path)
def update_tokenizer(model_path: str):
add_new_tokens(
model_path, model_path, ["<|call_start|>", "<|call_end|>", "<|toolcall_start|>", "<|toolcall_end|>"]
)
def test_tokenizer(model_path: str):
tokenizer = AutoTokenizer.from_pretrained(model_path)
text = "hello <|call_start|> world <|call_end|>"
print(text, tokenizer.tokenize(text), tokenizer.encode(text))
text = "hello <|toolcall_start|> world <|toolcall_end|>"
print(text, tokenizer.tokenize(text), tokenizer.encode(text))
if __name__ == "__main__":
update_tokenizer("/mnt/jfs6/model_ok/qwen2vl-0811-1/checkpoint-6400")
test_tokenizer("/mnt/jfs6/model_ok/qwen2vl-0811-1/checkpoint-6400")
使用 /usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc
bash展开代码
def draw_bbox_on_pil_image(pil_image, bbox, text, color, thickness=2, font_size=12):
"""在PIL图片上绘制bbox和文本"""
draw = ImageDraw.Draw(pil_image)
# 获取图片尺寸
img_width, img_height = pil_image.size
# 转换bbox格式 [x_br, y_br, x_tl, y_tl] -> [x1, y1, x2, y2]
x_br, y_br, x_tl, y_tl = bbox
# 将0-1000的归一化坐标转换为实际图片坐标
x_br = int(x_br * img_width / 1000)
y_br = int(y_br * img_height / 1000)
x_tl = int(x_tl * img_width / 1000)
y_tl = int(y_tl * img_height / 1000)
# 确保坐标顺序正确
x1 = min(x_tl, x_br)
y1 = min(y_tl, y_br)
x2 = max(x_tl, x_br)
y2 = max(y_tl, y_br)
# 绘制矩形框
draw.rectangle([x1, y1, x2, y2], outline=color, width=thickness)
# 绘制文本
try:
font = ImageFont.truetype("/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc", font_size)
except:
font = ImageFont.load_default()
# 获取文本尺寸
bbox_text = draw.textbbox((0, 0), text, font=font)
text_width = bbox_text[2] - bbox_text[0]
text_height = bbox_text[3] - bbox_text[1]
# 绘制文本背景
draw.rectangle([x1, y1 - text_height - 5, x1 + text_width + 10, y1],
fill=(255, 255, 255), outline=(0, 0, 0))
# 绘制文本
draw.text((x1 + 5, y1 - text_height - 2), text, fill=(0, 0, 0), font=font)
return pil_image