1 Star 5 Fork 5

州的先生 / 觅思文档导入工具箱

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
yuque2mrdoc.py 9.07 KB
一键复制 编辑 原始数据 按行查看 历史
zmister 提交于 2023-03-15 23:07 . 新增语雀知识库导入脚本
# coding:utf-8
import requests
import re
import yaml
import os
from configparser import ConfigParser
class YuqueExport():
def __init__(self):
self.base_url = YUQUE_URL
self.yuque_token = YUQUE_TOKEN
self.mrdoc_url = MRDOC_URL
self.mrdoc_token = MRDOC_TOKEN
self.headers = {
"User-Agent": "yuque_export",
"X-Auth-Token": YUQUE_TOKEN
}
self.doc_uuid_map = {}
def get_user_info(self):
res_obj = requests.get(url=self.base_url + '/user', headers=self.headers)
if res_obj.status_code != 200:
raise ValueError("Token 信息错误")
user_json = res_obj.json()
self.login_id = user_json['data']['login']
self.uid = user_json['data']['id']
self.username = user_json['data']['name']
print("=========== 用户信息初始化成功 ==========")
# 获取知识库列表
def get_repos(self):
repos_json = requests.get(self.base_url + '/users/' + self.login_id + '/repos', headers=self.headers).json()
for item in repos_json['data']:
print(">>>发现知识库:",item['id'],item['name'])
rid = item['id'] # 知识库id
name = item['name'] # 知识库名称
desc = item['description'] # 知识库描述
item = {
"name":name,
"desc":desc,
"role":1
}
project_created = self.create_project(info=item)
if project_created['status']:
print("+已新建文集:",name)
project_id = project_created['data']
else:
raise "[x]新建文集异常:{}".format(project_created)
docs = self.get_repo_docs(rid=rid)
# 按目录导入文档
if YUQUE_TOC:
repo_toc = self.get_repo_toc(rid=rid)
for toc in repo_toc:
if toc['type'] == 'META':
continue
for doc in docs:
if toc['type'] == 'DOC' and toc['id'] == doc['id']:
print(">>>发现文档信息:", doc['id'], doc['title'], doc['slug'], doc['format'])
doc_slug = doc['slug']
doc_title = doc['title']
break
doc_content = self.convert_img(doc=self.get_repo_doc(rid=rid,slug=doc_slug))
parent_doc = 0 if toc['parent_uuid'] == '' else self.doc_uuid_map[toc['parent_uuid']]
# print("上级文档为:",parent_doc)
data = {
'pid': project_id,
'title': doc_title,
'doc': doc_content,
'editor_mode': editor_mode,
'parent_doc': parent_doc
}
mrdoc_doc = self.create_doc(info=data)
if mrdoc_doc['status']:
self.doc_uuid_map[toc['uuid']] = mrdoc_doc['data']
# print("当前UUID目录:",self.doc_uuid_map)
print("+已新建文档:",doc_title,"上级文档设为:",parent_doc)
# 直接导入全部文档
else:
for doc in docs:
print(">>>发现文档信息:",doc['id'],doc['title'],doc['slug'],doc['format'])
doc_content = self.convert_img(doc=self.get_repo_doc(rid=rid,slug=doc['slug']))
data = {
'pid': project_id,
'title': doc['title'],
'doc': doc_content,
'editor_mode': editor_mode,
}
mrdoc_doc = self.create_doc(info=data)
if mrdoc_doc['status']:
print("+已新建文档:",doc['title'])
# 获取知识库目录
def get_repo_toc(self,rid):
repo_json = requests.get(self.base_url + '/repos/' + str(rid), headers=self.headers).json()
# print(repo_json)
repo_data = repo_json['data']
toc = repo_data['toc_yml']
# print(toc)
yaml_toc = yaml.safe_load(toc)
# print(yaml_toc)
# for t in yaml_toc:
# print(t)
return yaml_toc
# 获取知识库文档列表
def get_repo_docs(self,rid):
docs_json = requests.get(self.base_url + '/repos/' + str(rid) + '/docs',headers=self.headers).json()
return docs_json['data']
# 获取知识库文档详情
def get_repo_doc(self,rid,slug):
doc_json = requests.get(self.base_url + '/repos/' + str(rid) + '/docs/' + slug,headers=self.headers).json()
if editor_mode == 1:
doc_body = doc_json['data']['body']
else:
doc_body = doc_json['data']['body_html']
doc_content = self.convert_img(doc_body)
return doc_content
# 转换文档内的图片
def convert_img(self,doc):
# 查找MD内容中的静态文件
pattern = r"\!\[.*?\]\(.*?\)"
media_list = re.findall(pattern, doc)
if len(media_list) > 0:
for media in media_list:
img_url = media.split("(")[-1].split(")")[0] # 媒体文件的文件名
print(">>>发现图片:",img_url)
is_upload_img = self.upload_img(url=img_url)
if is_upload_img['code'] == 0:
print("+转存图片成功:",img_url)
doc = doc.replace(img_url,is_upload_img['data']['url'])
# 查找<img>标签形式的静态图片
img_pattern = r'<img[^>]*/>'
img_list = re.findall(img_pattern, doc)
if len(img_list) > 0:
for img in img_list:
img_url = re.findall('src="([^"]+)"', img)[0]
print(">>>发现图片:",img_url)
is_upload_img = self.upload_img(url=img_url)
if is_upload_img['code'] == 0:
print("+转存图片成功:", img_url)
doc = doc.replace(img_url,is_upload_img['data']['url'])
return doc
# 新建文集
def create_project(self,info):
resp = requests.post(self.mrdoc_url + "/api/create_project/?token="+self.mrdoc_token,data=info)
return resp.json()
# 新建文档
def create_doc(self,info):
resp = requests.post(self.mrdoc_url + "/api/create_doc/?token="+self.mrdoc_token,data=info)
return resp.json()
# 上传图片
def upload_img(self,url):
resp = requests.post(self.mrdoc_url + "/api/upload_img_url/?token="+self.mrdoc_token,data={"url":url})
return resp.json()
if __name__ == '__main__':
print("""
__ __ ___ __ __ _____
\ \ / / |__ \ | \/ | | __ \
\ \_/ / _ __ _ _ _ ___ ) | | \ / |_ __| | | | ___ ___
\ / | | |/ _` | | | |/ _ \ / / | |\/| | '__| | | |/ _ \ / __|
| || |_| | (_| | |_| | __/ / /_ | | | | | | |__| | (_) | (__
|_| \__,_|\__, |\__,_|\___| |____| |_| |_|_| |_____/ \___/ \___|
| |
|_|
【语雀知识库导入到觅思文档】 v20230316
* 遍历用户所有知识库;
* 支持按知识库目录导入;
* 支持文档内的图片转存至觅思文档;
""")
CONFIG = ConfigParser()
CONFIG.read(os.path.join('./config.ini'), encoding='utf-8')
MRDOC_URL = CONFIG.get("mrdoc", "url", fallback="")
if MRDOC_URL == "":
MRDOC_URL = input("1)请输入你的觅思文档地址:")
else:
print("1)你配置的觅思文档地址为:", MRDOC_URL)
if MRDOC_URL.endswith("/"):
MRDOC_URL = MRDOC_URL[:-1]
MRDOC_TOKEN = CONFIG.get("mrdoc", "token", fallback="")
if MRDOC_TOKEN == "":
MRDOC_TOKEN = input("2)请输入你的觅思文档用户 Token:")
else:
print("2)你配置的觅思文档用户Token为:", MRDOC_TOKEN)
YUQUE_URL = CONFIG.get('yuque','url',fallback="")
if YUQUE_URL == '':
YUQUE_URL = "https://www.yuque.com/api/v2"
print("3)你配置的语雀域名为:",YUQUE_URL)
YUQUE_TOKEN = CONFIG.get('yuque', 'token', fallback="")
if YUQUE_TOKEN == "":
YUQUE_TOKEN = input( "4)请输入你的语雀用户 Token:")
else:
print("4)你配置的语雀用户Token为:",YUQUE_TOKEN)
while True:
try:
YUQUE_TOC = int(input("5)是否按知识库目录进行导入,1-是 2否:"))
break
except ValueError:
print("请仅输入整数数字")
if YUQUE_TOC == 1:
YUQUE_TOC = True
else:
YUQUE_TOC = False
while True:
try:
editor_mode = int(input("6)请输入文档编辑模式,1-markdown 2-富文本:"))
break
except ValueError:
print("请仅输入整数数字")
if editor_mode != 1:
editor_mode = 3
op = YuqueExport()
op.get_user_info()
op.get_repos()
os.system('pause')
Python
1
https://gitee.com/zmister/mrdoc-import-toolbox.git
git@gitee.com:zmister/mrdoc-import-toolbox.git
zmister
mrdoc-import-toolbox
觅思文档导入工具箱
master

搜索帮助