1 Star 0 Fork 0

iamdsy / nginx-blog

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
lib_markdown_parse.cpp 19.11 KB
一键复制 编辑 原始数据 按行查看 历史
iamdsy 提交于 2017-12-28 22:29 . add-readme-and-fix-markdown-parse-bug
extern "C"{
}
//need c++11 version
#include "lib_markdown_parse.h"
markdown_parse::markdown_parse(){
}
markdown_parse::~markdown_parse(){
}
bool markdown_parse::parse_file(stringstream* md_str_stream){
string *parsed_str = new(nothrow) string("");
if(parsed_str == NULL){
return false;
}
int* int_title_level = new(nothrow) int();
int level1 = 0;
int level2 = 0;
int level3 = 0;
int level4 = 0;
int level5 = 0;
int level6 = 0;
int level7 = 0;
string md_str_line;
do{
getline(*md_str_stream,md_str_line);
if(strim_string(md_str_line) == string("```")){
parsed_all_string = parsed_all_string + string("<code><pre>") + string("<br>");
do{
getline(*md_str_stream,md_str_line);
if(strim_string(md_str_line) != string("```")){
parsed_all_string = parsed_all_string + md_str_line + string("\n");
}
else{
parsed_all_string = parsed_all_string + string("</pre></code>");
getline(*md_str_stream,md_str_line);
break;
}
}while(md_str_stream->good());
}
int ret = this->parse_string(md_str_line,parsed_str,int_title_level);
if(ret != false){
if(*int_title_level != 0){
switch (*int_title_level) {
case 1:
catalog = catalog + string("<h4>\n") +
string("<a href = \"#h1_") + to_string(level1) +
string("\">") + *parsed_str + string("</a>") + string("</h4>");
parsed_all_string = parsed_all_string + string("<a name=\"h1_") +
to_string(level1) + string("\">") + string("<h1>") +
*parsed_str + string("</h1>") + string("</a>");
level1 = level1 + 1;
break;
case 2:
catalog = catalog + string("<h4>\n") +
string("<a href = \"#h2_") + to_string(level2) +
string("\">") + *parsed_str + string("</a>") + string("</h4>");
parsed_all_string = parsed_all_string + string("<a name=\"h2_") +
to_string(level2) + string("\">") + string("<h2>") +
*parsed_str + string("</h2>") + string("</a>");
level2 = level2 + 1;
break;
case 3:
catalog = catalog + string("<h4>\n") +
string("<a href = \"#h3_") + to_string(level3) +
string("\">") + *parsed_str + string("</a>") + string("</h4>");
parsed_all_string = parsed_all_string + string("<a name=\"h3_") +
to_string(level3) + string("\">") + string("<h3>") +
*parsed_str + string("</h3>") + string("</a>");
level3 = level3 + 1;
break;
case 4:
catalog = catalog + string("<h4>\n") +
string("<a href = \"#h4_") + to_string(level4) +
string("\">") + *parsed_str + string("</a>") + string("</h4>");
parsed_all_string = parsed_all_string + string("<a name=\"h4_") +
to_string(level4) + string("\">") + string("<h4>") +
*parsed_str + string("</h4>") + string("</a>");
level4 = level4 + 1;
break;
case 5:
catalog = catalog + string("<h4>\n") +
string("<a href = \"#h5_") + to_string(level5) +
string("\">") + *parsed_str + string("</a>") + string("</h4>");
parsed_all_string = parsed_all_string + string("<a name=\"h5_") +
to_string(level5) + string("\">") + string("<h5>") +
*parsed_str + string("</h5>") + string("</a>");
level5 = level5 + 1;
break;
case 6:
catalog = catalog + string("<h4>\n") +
string("<a href = \"#h6_") + to_string(level6) +
string("\">") + *parsed_str + string("</a>") + string("</h4>");
parsed_all_string = parsed_all_string + string("<a name=\"h6_") +
to_string(level6) + string("\">") + string("<h6>") +
*parsed_str + string("</h6>") + string("</a>");
level6 = level6 + 1;
break;
case 7:
catalog = catalog + string("<h4>\n") +
string("<a href = \"#h7_") + to_string(level7) +
string("\">") + *parsed_str + string("</a>") + string("</h4>");
parsed_all_string = parsed_all_string + string("<a name=\"h7_") +
to_string(level7) + string("\">") + string("<h7>") +
*parsed_str + string("</h7>") + string("</a>");
level7 = level7 + 1;
break;
default:
break;
}
}
else{
parsed_all_string = parsed_all_string + *parsed_str;
}
}
}while(md_str_stream->good());
delete(parsed_str);
delete(int_title_level);
return true;
}
bool markdown_parse::parse_string(string md_str, string *parsed_string,int* int_title_level){
// string* parsed_string = new(nothrow) string();
// if(parsed_string == NULL){
// return NULL;
// }
*int_title_level = 0;
//parse space,tab,empty line
string empty_str = md_str;
//erase begin space
empty_str.erase(0,empty_str.find_first_not_of(" "));
//erase end space
empty_str.erase(empty_str.find_last_not_of(" ")+1);
//erase begin tab
empty_str.erase(0,empty_str.find_first_not_of(" "));
//erase end tab
empty_str.erase(empty_str.find_last_not_of(" ")+1);
if(empty_str.empty() ){
*parsed_string = string("<br>");
return true;
}
//parse html
if(md_str[0] == '<'){
*parsed_string = md_str;
return true;
}
//parse title
if(md_str[0] == '#'){
size_t space_pos = md_str.find_first_of(' ');
size_t no_pound_pos = md_str.find_first_not_of('#');
if((space_pos != string::npos) && (no_pound_pos == space_pos)){
//string title_pre = "<h";
//string title_pos = ">";
//string title_close_pre = "</";
//string title_level = title_pre + to_string(space_pos) + title_pos;
//string title_level_close = title_close_pre + &title_level[1];
//string title = &md_str[space_pos + 1];
//*parsed_string = title_level + title + title_level_close;
*parsed_string = &md_str[space_pos + 1];
*int_title_level = space_pos;
return true;
}
}
//parse unordered list,parting line
if(md_str[0] == '-'){
//parse parting line
if((md_str[1] == '-') && (md_str[2] == '-') && (md_str.find_first_of('-',3) == string::npos)){
*parsed_string = "<p><hr></p>";
return true;
}
//parse unordered list
if(md_str[1] == ' '){
*parsed_string = string("<li>") + string(&md_str[2]) + string("</li><br>");
md_str = *parsed_string;
*parsed_string = "";
}
}
//parse code guide
//do nothing,should parse when parse the whole article
if((md_str[0] == '`') && (md_str[1] == '`') && (md_str[2] == '`') && (md_str.find_first_of('`',3) == string::npos)){
*parsed_string = md_str;
return true;
}
string before_hyper_link_text = "";
string after_hyper_link_text = "";
//parse hyper link and photo
if(md_str.find_first_of('!') != string::npos){
size_t first_exclamation_pos = md_str.find_first_of('!');
if(md_str[first_exclamation_pos+1] == '['){
size_t first_brakets_pos = md_str.find_first_of('[',first_exclamation_pos);
size_t back_brakets_pos = md_str.find_first_of("]",first_brakets_pos);
size_t paren_pos = md_str.find_first_of("(",back_brakets_pos);
size_t back_paren_pos = md_str.find_first_of(")",paren_pos);
if((back_brakets_pos == string::npos) ||
(paren_pos == string::npos) ||
(back_brakets_pos == string::npos)){
*parsed_string = md_str;
}
else{
if(first_exclamation_pos != 0){
before_hyper_link_text = md_str.substr(0,first_exclamation_pos);
}
string hyper_link_text = md_str.substr(first_brakets_pos+1,back_brakets_pos-first_brakets_pos-1);
string hyper_link = md_str.substr(paren_pos+1,back_paren_pos-paren_pos-1);
*parsed_string = before_hyper_link_text + string("<img src=") + string("\"") + hyper_link
+ string("\" alt=\"") + hyper_link_text + string("\"/><br>");
after_hyper_link_text = md_str.substr(back_paren_pos+1);
while(after_hyper_link_text.find_first_of('!') != string::npos){
first_exclamation_pos = after_hyper_link_text.find_first_of('!');
if(after_hyper_link_text[first_exclamation_pos+1] == '['){
first_brakets_pos = after_hyper_link_text.find_first_of('[',first_exclamation_pos);
back_brakets_pos = after_hyper_link_text.find_first_of("]",first_brakets_pos);
paren_pos = after_hyper_link_text.find_first_of("(",back_brakets_pos);
back_paren_pos = after_hyper_link_text.find_first_of(")",paren_pos);
if((back_brakets_pos == string::npos) ||
(paren_pos == string::npos) ||
(back_brakets_pos == string::npos)){
break;
}
else{
before_hyper_link_text = "";
if(first_brakets_pos != 0){
before_hyper_link_text = after_hyper_link_text.substr(0,first_exclamation_pos);
}
hyper_link_text = after_hyper_link_text.substr(first_brakets_pos+1,back_brakets_pos-first_brakets_pos-1);
hyper_link = after_hyper_link_text.substr(paren_pos+1,back_paren_pos-paren_pos-1);
*parsed_string = *parsed_string + before_hyper_link_text + string("<img src=\"") + hyper_link
+ string("\" alt=\"") + hyper_link_text + string("\"/><br>");
after_hyper_link_text = after_hyper_link_text.substr(back_paren_pos+1);
}
}
}
}
*parsed_string = *parsed_string + after_hyper_link_text;
md_str = *parsed_string;
*parsed_string = "";
}
}
if(md_str.find_first_of('[') != string::npos){
size_t first_brakets_pos = md_str.find_first_of('[');
size_t back_brakets_pos = md_str.find_first_of("]",first_brakets_pos);
size_t paren_pos = md_str.find_first_of("(",back_brakets_pos);
size_t back_paren_pos = md_str.find_first_of(")",paren_pos);
if((back_brakets_pos == string::npos) ||
(paren_pos == string::npos) ||
(back_brakets_pos == string::npos)){
*parsed_string = md_str;
}
else{
before_hyper_link_text = "";
if (first_brakets_pos != 0){
before_hyper_link_text = md_str.substr(0,first_brakets_pos);
}
string hyper_link_text = md_str.substr(first_brakets_pos+1,back_brakets_pos-first_brakets_pos-1);
string hyper_link = md_str.substr(paren_pos+1,back_paren_pos-paren_pos-1);
*parsed_string = before_hyper_link_text + string("<a href=\"") + hyper_link
+ string("\">") + hyper_link_text + string("</a>");
after_hyper_link_text = md_str.substr(back_paren_pos+1);
while(after_hyper_link_text.find_first_of('[') != string::npos){
first_brakets_pos = after_hyper_link_text.find_first_of('[');
back_brakets_pos = after_hyper_link_text.find_first_of("]",first_brakets_pos);
paren_pos = after_hyper_link_text.find_first_of("(",back_brakets_pos);
back_paren_pos = after_hyper_link_text.find_first_of(")",paren_pos);
if((back_brakets_pos == string::npos) ||
(paren_pos == string::npos) ||
(back_brakets_pos == string::npos)){
break;
}
else{
before_hyper_link_text = "";
if(first_brakets_pos != 0){
before_hyper_link_text = after_hyper_link_text.substr(0,first_brakets_pos);
}
hyper_link_text = after_hyper_link_text.substr(first_brakets_pos+1,back_brakets_pos-first_brakets_pos-1);
hyper_link = after_hyper_link_text.substr(paren_pos+1,back_paren_pos-paren_pos-1);
*parsed_string = *parsed_string + before_hyper_link_text + string("<a href=\"") + hyper_link
+ string("\">") + hyper_link_text + string("</a>");
after_hyper_link_text = after_hyper_link_text.substr(back_paren_pos+1);
}
}
}
*parsed_string = *parsed_string + after_hyper_link_text;
md_str = *parsed_string;
*parsed_string = "";
}
//parse em and strong
string before_star_str;
string after_laststar_str = "";
if (md_str.find_first_of('*') != string::npos ){
//get first *
size_t first_star_pos = md_str.find_first_of('*');
size_t back_star_pos_1;
string em_str;
string strong_str;
//get second *,means it is strong
if(md_str[first_star_pos+1] == '*'){
//before first * substring
before_star_str = "";
if(first_star_pos != 0){
before_star_str = md_str.substr(0,first_star_pos);
}
back_star_pos_1 = md_str.find_first_of('*',first_star_pos+2);
//no back * means it is normal text
if ((back_star_pos_1 == string::npos)){
*parsed_string = md_str;
}
//there back * and after back * means it is strong text
else if(md_str[back_star_pos_1+1] == '*'){
strong_str = md_str.substr(first_star_pos+2,back_star_pos_1-first_star_pos-2);
*parsed_string = before_star_str + string("<strong>") + strong_str + string("</strong>");
after_laststar_str = md_str.substr(back_star_pos_1+2);
}
else {
em_str = md_str.substr(first_star_pos+2,back_star_pos_1-first_star_pos-2);
*parsed_string = before_star_str + string("<em>") + em_str + string("</em>");
after_laststar_str = md_str.substr(back_star_pos_1+1);
}
}
else{
back_star_pos_1 = md_str.find_first_of('*',first_star_pos + 2);
if(back_star_pos_1 == string::npos){
*parsed_string = md_str;
}
else{
before_star_str = "";
if(first_star_pos != 0){
before_star_str = md_str.substr(0,first_star_pos);
}
em_str = md_str.substr(first_star_pos+1,back_star_pos_1-first_star_pos-1);
*parsed_string = before_star_str + string("<em>") + em_str + string("</em>");
after_laststar_str = md_str.substr(back_star_pos_1 + 1);
}
}
while(after_laststar_str.find_first_of('*') != string::npos){
first_star_pos = after_laststar_str.find_first_of('*');
if(after_laststar_str[first_star_pos+1] == '*'){
//before first * substring
before_star_str = "";
if(first_star_pos != 0){
before_star_str = after_laststar_str.substr(0,first_star_pos);
}
back_star_pos_1 = after_laststar_str.find_first_of('*',first_star_pos+3);
if ((back_star_pos_1 == string::npos)){
break;
}
else if(after_laststar_str[back_star_pos_1+1] == '*'){
strong_str = after_laststar_str.substr(first_star_pos+2,back_star_pos_1-first_star_pos-2);
*parsed_string = *parsed_string + before_star_str + string("<strong>") + strong_str + string("</strong>");
after_laststar_str = after_laststar_str.substr(back_star_pos_1+2);
}
else {
em_str = after_laststar_str.substr(first_star_pos+2,back_star_pos_1-first_star_pos-1);
*parsed_string = *parsed_string + before_star_str + string("<em>") + em_str + string("</em>");
after_laststar_str = after_laststar_str.substr(back_star_pos_1+1);
}
}
else{
back_star_pos_1 = after_laststar_str.find_first_of('*',first_star_pos + 2);
if(back_star_pos_1 == string::npos){
break;
}
else{
before_star_str = "";
if(first_star_pos != 0){
before_star_str = after_laststar_str.substr(0,first_star_pos);
}
em_str = after_laststar_str.substr(first_star_pos+1,back_star_pos_1-first_star_pos-1);
*parsed_string = *parsed_string + before_star_str + string("<em>") + em_str + string("</em>");
after_laststar_str = after_laststar_str.substr(back_star_pos_1 + 1);
}
}
}
*parsed_string = *parsed_string + after_laststar_str;
md_str = *parsed_string;
*parsed_string = "";
}
//parse ordered list
try{
string::size_type sz;
if(stoi(md_str,&sz) >= 0){
if(md_str.substr(sz).find_first_of('.') == 0){
md_str = md_str + string("<br>");
}
}
}
catch(...){
}
*parsed_string = md_str;
return true;
}
string markdown_parse::strim_string(string before){
before = before.erase(0,before.find_first_not_of(" "));
before = before.erase(before.find_last_not_of(" ")+1);
return before;
}
C++
1
https://gitee.com/iamdsy/nginx-blog.git
git@gitee.com:iamdsy/nginx-blog.git
iamdsy
nginx-blog
nginx-blog
master

搜索帮助