貝葉斯實現文字分類C++實現

kkkkkkkkq發表於2017-08-05

原文網址 : https://blog.csdn.net/kkkkkkkkq/article/details/76721739

//NaiveBayes.h
#ifndef NAIVEBAYES_H_
#define NAIVEBAYES_H_
#include<iostream>
#include<map>
#include<set>
#include<cmath>
#include<vector>
#include<algorithm>
#include<numeric>
#include<cstring>
#include<stdio.h>
#include<cstdlib>
using namespace std;


class NaiveBayes
{
private:
	vector< vector<string> > list_of_posts;
	vector<int> list_classes;
	map<string, int>  my_vocab_list;
	int *return_vec;
	vector< vector<int> > train_mat;
	vector<float> p0vect;
	vector<float> p1vect;
	float p_abusive;


public:
	NaiveBayes();
	void create_vocab_list();//create_vocab_list
	void set_of_words_to_vec(int idx);
	void get_train_matrix();
	void print();
	void train_NB0();
	int classify_NB(string *doc_to_classify);
	
};
#endif // !NAIVEBAYES_H_

//NaiveBayes.cpp
#include"stdafx.h"
#include"NaiveBayes.h"

string posting_list[6][10] = 
{
	{ "my", "dog", "has", "flea", "problems", "help", "please", "null" },
	{ "maybe", "not", "take", "him", "to", "dog", "park", "stupid", "null" },
	{ "my", "dalmation", "is", "so", "cute", "I", "love", "him", "null" },
	{ "stop", "posting", "stupid", "worthless", "garbage", "null" },
	{ "mr", "licks", "ate", "my", "steak", "how", "to", "stop", "him", "null" },
	{ "quit", "buying", "worthless", "dog", "food", "stupid", "null" }
};
int class_vec[6] = { 0, 1, 0, 1, 0, 1 };//1 is abusive ,0 not


NaiveBayes::NaiveBayes()
{
	vector<string> vec;
	for (int i = 0; i<6; i++)
	{
		vec.clear();
		for (int j = 0; posting_list[i][j] != "null"; j++)
		{
			vec.push_back(posting_list[i][j]);
		}
		list_of_posts.push_back(vec);
	}

	for (int i = 0; i<sizeof(class_vec) / sizeof(class_vec[0]); i++)
	{
		list_classes.push_back(class_vec[i]);
	}

}

void NaiveBayes::create_vocab_list()
{
	vector< vector<string> > ::iterator it = list_of_posts.begin();
	int index = 1;
	while (it != list_of_posts.end())
	{
		//vector<string> vec( *it.begin(),*it.end() );
		vector<string> vec = *it;

		vector<string> ::iterator tmp_it = vec.begin();

		while (tmp_it != vec.end())
		{
			//cout<<*tmp_it<<" ";
			if (my_vocab_list[*tmp_it] == 0)
			{
				my_vocab_list[*tmp_it] = index++; //index is the location of the vovabulary
			}
			tmp_it++;
		}
		it++;
	}

}//create_vocab_list

//set some one word to vec with 0 and 1.
void NaiveBayes::set_of_words_to_vec(int idx)
{
	cout << "set of words to vec begin the document id is : " << idx << endl;
	int len = my_vocab_list.size() + 1;
	return_vec = new int[len](); //pay attention to the difference between "new int[len]". initalize all the element to zero.
	fill(return_vec, return_vec + len, 0);
	for (int i = 0; i<len; i++)
		cout << return_vec[i] << " ";
	for (int i = 0; posting_list[idx][i] != "null"; i++)
	{
		//cout<<posting_list[idx][i]<<" ";
		int pos = my_vocab_list[posting_list[idx][i]];
		if (pos != 0)
		{
			return_vec[pos] = 1;
		}
	}
	cout << endl;
}//set_of_words_to_vec

void NaiveBayes::get_train_matrix()
{
	cout << "get train matrix begin : " << endl;
	train_mat.clear();
	for (int i = 0; i<6; i++)
	{
		set_of_words_to_vec(i);
		vector<int> vec(return_vec, return_vec + my_vocab_list.size() + 1);
		train_mat.push_back(vec);
		delete[]return_vec;
	}
}//get train matrix

void NaiveBayes::print()
{
	cout << "print the train matrix begin : " << endl;
	vector< vector<int> > ::iterator it = train_mat.begin();
	while (it != train_mat.end())
	{
		vector<int> vec = *it;
		vector<int> ::iterator itt = vec.begin();
		while (itt != vec.end())
		{
			cout << *itt << " ";
			itt++;
		}
		cout << endl;
		it++;
	}

}//print()

void NaiveBayes::train_NB0()
{
	int num_train_docs = train_mat.size();//sizeof(posting_lists)/sizeof(posting_lists[0]);
	cout << "num_train_docs = " << num_train_docs << endl;
	int num_words = train_mat[0].size() - 1;
	/* calculatr the sum of the abusive classes */
	int sum = accumulate(list_classes.begin(), list_classes.end(), 0);

	cout << "sum = " << sum << endl;
	//float p_abusive = (float)sum/(float)num_train_docs;
	p_abusive = (float)sum / (float)num_train_docs;
	cout << "p_abusive = " << p_abusive << endl;

	//vector<float> p0vect(train_mat[0].size(),1); //the frequency of each word in non-absusive docs
	p0vect.resize(train_mat[0].size(), 1);
	//vector<float> p1vect(train_mat[0].size(),1); //the frequency of each word in abusive docs
	p1vect.resize(train_mat[0].size(), 1);
	printf("p0num.size() = %d , p1num.size() = %d\n", p0vect.size(), p1vect.size());
	float p0Denom = 2.0; //the total number of words in non-abusive docs
	float p1Denom = 2.0; //the total number of words in abusive docs

	/* calculate the p0num,p1num,p0Denom,p1Denom */
	for (int i = 0; i<list_classes.size(); i++)
	{
		if (list_classes[i] == 1)  //abusive doc
		{
			for (int j = 0; j<p1vect.size(); j++)
			{
				p1vect[j] += train_mat[i][j];
				if (train_mat[i][j] == 1)
					p1Denom++;
			}
		}
		else   //non-abusive doc
		{
			for (int j = 0; j<p0vect.size(); j++)
			{
				p0vect[j] += train_mat[i][j];
				if (train_mat[i][j] == 1)
					p0Denom++;
			}
		}
	}

	for (int i = 0; i<p1vect.size(); i++)
	{
		p0vect[i] = log(p0vect[i] / p0Denom);
		p1vect[i] = log(p1vect[i] / p1Denom);
	}

	cout << "print the p0vect values : " << endl;
	for (int i = 0; i<p0vect.size(); i++)
		cout << p0vect[i] << " ";
	cout << "\nprint the p1vect values : " << endl;
	for (int i = 0; i<p1vect.size(); i++)
		cout << p1vect[i] << " ";
	cout << endl;
}

int NaiveBayes::classify_NB(string *doc_to_classify)
{
	return_vec = new int[my_vocab_list.size() + 1]();
	for (int i = 0; doc_to_classify[i] != "null"; i++)
	{
		int pos = my_vocab_list[doc_to_classify[i]];
		if (pos != 0)
		{
			return_vec[pos] = 1;
		}
	}//for

	for (int i = 0; i<my_vocab_list.size() + 1; i++)
		cout << return_vec[i] << " ";
	cout << endl;
	float p1 = inner_product(p1vect.begin() + 1, p1vect.end(), return_vec + 1, 0) + log(p_abusive);
	float p0 = inner_product(p0vect.begin() + 1, p0vect.end(), return_vec + 1, 0) + log(1 - p_abusive);

	cout << "p1 = " << p1 << endl;
	cout << "p0 = " << p0 << endl;

	if (p1>p0)
	{
		return 1;
	}
	else
	{
		return 0;
	}
}

//main.cpp
#include"stdafx.h"
#include"stdlib.h "
#include<iostream>
#include"NaiveBayes.h"
using namespace std;

int main()
{
	NaiveBayes nb;
	nb.create_vocab_list();
	nb.get_train_matrix();
	nb.print();
	nb.train_NB0();

	string doc1_to_classify[] = { "love", "my", "dalmation", "null" };
	string doc2_to_classify[] = { "stupid", "garbage", "null" };
	cout << "doc1 classified as : " << nb.classify_NB(doc1_to_classify) << endl;
	cout << "doc2 classified as : " << nb.classify_NB(doc2_to_classify) << endl;
	system("pause");
	return 0;
}

樸素貝葉斯實現文件分類
2019-10-07
（實戰）樸素貝葉斯實現垃圾分類_201121
2020-11-21
樸素貝葉斯和半樸素貝葉斯（AODE）分類器Python實現
2019-12-30
Python
樸素貝葉斯分類-實戰篇-如何進行文字分類
2020-11-25
文字分類
概率分類之樸素貝葉斯分類（垃圾郵件分類python實現）
2020-10-05
Python
樸素貝葉斯/SVM文字分類
2018-10-25
文字分類
貝葉斯推斷架構實現
2024-05-18
架構
樸素貝葉斯分類和預測演算法的原理及實現
2018-03-28
演算法
樸素貝葉斯分類
2023-01-27
Python實現利用樸素貝葉斯模型（NBC）進行問句意圖分類
2021-07-15
Python模型
樸素貝葉斯演算法的實現與推理
2022-05-06
演算法
分類演算法-樸素貝葉斯
2020-01-17
演算法
使用貝葉斯進行新聞分類
2020-09-29
《統計學習方法》——樸素貝葉斯程式碼實現
2021-02-28
樸素貝葉斯分類流程圖介紹
2018-05-09
流程圖
Sklearn中的樸素貝葉斯分類器`
2020-10-20
CNN+pytorch實現文字二分類
2021-07-07
CNNPyTorch
Bert文字分類實踐（一）：實現一個簡單的分類模型
2021-10-10
文字分類模型
貝葉斯分類器詳解從零開始從理論到實踐
2020-12-23
樸素貝葉斯--新浪新聞分類例項
2019-03-02
機器學習之樸素貝葉斯分類
2019-02-28
機器學習
[譯] Sklearn 中的樸素貝葉斯分類器
2018-08-28
HanLP-樸素貝葉斯分類預測缺陷
2019-07-22
HanLP
教程 | 用Scikit-Learn實現多類別文字分類
2018-05-14
文字分類
如何透過Scikit-Learn實現多類別文字分類？
2018-03-05
文字分類
如何通過Scikit-Learn實現多類別文字分類？
2018-03-05
文字分類
簡單易懂的樸素貝葉斯分類演算法
2018-09-21
演算法
機器學習讀書筆記：貝葉斯分類器
2020-10-05
機器學習筆記
高階人工智慧系列（一）——貝葉斯網路、機率推理和樸素貝葉斯網路分類器
2022-11-20
人工智慧
變分貝葉斯自編碼器
2020-04-06
04貝葉斯演算法-貝葉斯網路
2018-12-19
演算法
Pytorch實現分類器
2023-04-17
PyTorch
02貝葉斯演算法-案例一-鳶尾花資料分類
2018-12-18
演算法
有監督學習——支援向量機、樸素貝葉斯分類
2023-03-15
貝葉斯定理
2024-11-29
貝葉斯公式
2024-09-07
公式
C++ 大整數類（BigInteger類）實現
2020-10-17
C++
通用mapper和分類實現
2018-03-15
APP
貝葉斯分類演算法例項 --根據姓名推測男女
2018-11-29
演算法

貝葉斯實現文字分類C++實現

相關文章