Hadoop 2.6 使用Map Reduce實現矩陣相乘1 矩陣轉置

土豆拍死馬鈴薯發表於2017-10-10

原文網址 : https://blog.csdn.net/csj941227/article/details/78190127

專案地址：https://github.com/tudoupaisimalingshu/hadoop_matrix

矩陣相乘

一、理論基礎

二、如何用程式實現？A[M][N]*B[N][P]

import java.util.Arrays;

public class Matrix {

	public static void main(String[] args) {
		int[][] matrix1 = {
				{1,2,-2,0},
				{3,3,4,-3},
				{-2,0,2,3},
				{5,3,-1,2},
				{-4,2,0,2}};//左矩陣，5*4
		int[][] matrix2 = {
				{0,3,-1,2,-3},
				{1,3,5,-2,-1},
				{0,1,4,-1,2},
				{-2,2,-1,1,2}};//右矩陣，4*5
		int [][] matrix3 = new int[5][5];//結果矩陣，5*5
		for(int i=0;i<5;i++)//計算結果矩陣的每一行
		{
			int[] row = matrix1[i];//左邊矩陣第i行
			System.out.println("row=" + Arrays.toString(row));
			for(int j=0;j<5;j++)//計算結果矩陣的每一列
			{
				int[] line = new int[4];//右邊矩陣第j列
				for(int k=0;k<4;k++)
				{
					line[k] = matrix2[k][j];
				}//因為列向量是豎著的，用迴圈獲得該列的各個元素
				System.out.println("line=" + Arrays.toString(line));
				int result_i_j = 0;//定義相乘結果
				for(int m=0;m<4;m++)
				{
					result_i_j += row[m] * line[m];//累加乘積
				}
				System.out.println("result_i_j=" + result_i_j);
				System.out.println("--------------------");
				matrix3[i][j] = result_i_j;//設定結果矩陣對應位置的值
			}
		}
		
		//輸出結果矩陣
		for(int i=0;i<5;i++)
		{
			for(int j=0;j<5;j++)
			{
				System.out.print(matrix3[i][j] + "\t");
			}
			System.out.println();
		}
	}
}

輸出：

row=[1, 2, -2, 0]
line=[0, 1, 0, -2]
result_i_j=2
--------------------
line=[3, 3, 1, 2]
result_i_j=7
--------------------
line=[-1, 5, 4, -1]
result_i_j=1
--------------------
line=[2, -2, -1, 1]
result_i_j=0
--------------------
line=[-3, -1, 2, 2]
result_i_j=-9
--------------------
row=[3, 3, 4, -3]
line=[0, 1, 0, -2]
result_i_j=9
--------------------
line=[3, 3, 1, 2]
result_i_j=16
--------------------
line=[-1, 5, 4, -1]
result_i_j=31
--------------------
line=[2, -2, -1, 1]
result_i_j=-7
--------------------
line=[-3, -1, 2, 2]
result_i_j=-10
--------------------
row=[-2, 0, 2, 3]
line=[0, 1, 0, -2]
result_i_j=-6
--------------------
line=[3, 3, 1, 2]
result_i_j=2
--------------------
line=[-1, 5, 4, -1]
result_i_j=7
--------------------
line=[2, -2, -1, 1]
result_i_j=-3
--------------------
line=[-3, -1, 2, 2]
result_i_j=16
--------------------
row=[5, 3, -1, 2]
line=[0, 1, 0, -2]
result_i_j=-1
--------------------
line=[3, 3, 1, 2]
result_i_j=27
--------------------
line=[-1, 5, 4, -1]
result_i_j=4
--------------------
line=[2, -2, -1, 1]
result_i_j=7
--------------------
line=[-3, -1, 2, 2]
result_i_j=-16
--------------------
row=[-4, 2, 0, 2]
line=[0, 1, 0, -2]
result_i_j=-2
--------------------
line=[3, 3, 1, 2]
result_i_j=-2
--------------------
line=[-1, 5, 4, -1]
result_i_j=12
--------------------
line=[2, -2, -1, 1]
result_i_j=-10
--------------------
line=[-3, -1, 2, 2]
result_i_j=14
--------------------
2	7	1	0	-9	
9	16	31	-7	-10	
-6	2	7	-3	16	
-1	27	4	7	-16	
-2	-2	12	-10	14

三、傳統程式的問題：

1、不能併發執行，總是按照迴圈的條件一次一次執行。

2、如果矩陣的規模很大，以至於放不到記憶體中，則可能要放入檔案中，那麼對於左側矩陣還好說，每次只需要讀取一行放入記憶體，下次迴圈再讀取下一行即可；從程式中可以看到，對於右側矩陣，我們需要得到列向量，也就是遍歷所有的行，每行取一個元素，然後組成列向量，當檔案很大時，速度太慢。

四、解決方案

1、針對問題1引入併發執行框架Hadoop，其中的Map和Reduce操作可以併發執行。

2、針對問題2，將右邊矩陣轉置，從而實現列向量轉為行向量

五、使用Hadoop Map Reduce 進行矩陣相乘

1、矩陣的儲存結構

為什麼要把每一行的所有列寫在一行？

矩陣檔案可能很大，此時Hadoop的HDFS就會將檔案分片，如果沒有將同一行的所有列寫在一起，則屬於同一行的元素可能會被分到不同的分片，導致後面還會消耗時間和空間去查詢拼接，也就是還需要寫reduce來合併行。

為什麼要對一行的每一個元素標出列的序號？

由於Hadoop是並行的，在進行map拆分的之後進行reduce合併的過程中，並不能保證一行的各個元素是有序的，因此要標出元素對應的下標，在hadoop中，由於行號是唯一的，再加上標明的列號，就能保證在並行處理過程中的正確性。

2、矩陣轉置的Map Reduce實現

package hadoop;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;

public class Step1 {
	public static class Mapper1 extends Mapper<LongWritable,Text,Text,Text>
	{
		private Text outKey = new Text();
		private Text outValue = new Text();
		
		/*
			待轉置矩陣
			0	3	-1	2	-3
			1	3	5	-2	-1
			0	1	4	-1	2
			-2	2	-1	1	2
		*/
		/*
			目標矩陣
			0	1	1	-2
			3	3	1	2
			-1	5	4	-1
			2	-2	-1	1
			-3	-1	2	2
		*/
		//對於每一行，以第一行為例
		//key : 1
		//value : "1	1_0,2_3,3_-1,4_2,5_-3"
		@Override
		protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
				throws IOException, InterruptedException {
			String[] rowAndline = value.toString().split("\t");
			//rowAndline : {"1","1_0,2_3,3_-1,4_2,5_-3"}
			String row = rowAndline[0];
			//row "1"
			String[] lines = rowAndline[1].split(",");
			//rowAndline[1] : "1_0,2_3,3_-1,4_2,5_-3"
			//lines : {"1_0","2_3","3_-1","4_2","5_-3"}
			for(String line : lines)//對於每一列，以第一列為例，line "1_0"
			{
				String colunm = line.split("_")[0];
				//colunm : 1
				String valueStr = line.split("_")[1];
				//valueStr : 0 
				outKey.set(colunm);
				//將列作為行
				outValue.set(row + "_" + valueStr);
				//將行作為列
				context.write(outKey, outValue);
				// 產生(1,"1_0")
			}
			//迴圈結束，對於{"1_0","2_3","3_-1","4_2","5_-3"}
			//產生(1,"1_0") 第一行，第一列_0    (2,"1_3")  第二行，第一列_3		(3,"1_-1") (4,"1_2")(5,"1_-3")
			/*
			目標轉置矩陣
			0	1	1	-2
			3	3	1	2
			-1	5	4	-1
			2	-2	-1	1
			-3	-1	2	2
			*/
			//正好對應於轉置矩陣的第一列
		}
		/*
			所有map操作產生
			 ("1","1_0")	("2","1_3") 	("3","1_-1")	("4","1_2")		("5","1_-3")
			（"1","2_1"）	("2","2_3") 	("3","2_5")	    ("4","2_-2")	("5","2_-1")
			（"1","3_0"）	("2","3_1")	    ("3","3_4")		("4","3_-1")	("5","3_2")
			（"1","4_-2"）  ("2","4_2")	    ("3","4_-1")	("4","4_1")		("5","4_2")
		*/

	}
	

	/*
		Reduce任務，將map操作產生的所有鍵值對集合進行合併，生成轉置矩陣的儲存表示
		key值相同的值會組成值的集合
		如：
		key:"1"時
		values:{"3_0","1_0","4_-2","2_1"} 
		注意：這裡就是為什麼要進行列標號的原因，values的順序不一定就是原來矩陣列的順序
	*/
	
	public static class Reducer1 extends Reducer<Text,Text,Text,Text>
	{
		private Text outKey = new Text();
		private Text outValue = new Text();
		
		@Override
		protected void reduce(Text key, Iterable<Text> values, Context context)
				throws IOException, InterruptedException {
			
			StringBuilder sb = new StringBuilder();
			for(Text text : values)
			{
				sb.append(text + ",");
			}
			//sb : "3_0,1_0,4_-2,2_1,"
			//注意這裡末尾有個逗號
			String line = "";
			if(sb.toString().endsWith(","))
			{
				line = sb.substring(0,sb.length()-1);
			}
			//去掉逗號
			//line : "3_0,1_0,4_-2,2_1"
			outKey.set(key);
			outValue.set(line);
			//("1","3_0,1_0,4_-2,2_1")
			context.write(outKey, outValue);
		}
		
	}
	
	private static final String INPATH = "input/matrix.txt";//輸入檔案路徑
	private static final String OUTPATH = "output/step1";//輸出檔案路徑
	private static final String HDFS = "hdfs://pc1:9000";//HDFS路徑
	
	public void run() throws IOException, ClassNotFoundException, InterruptedException {
		 Configuration conf = new Configuration();
		    //String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
		    //String[] otherArgs = {"hdfs://pc1:9000/input/chenjie.txt","hdfs://pc1:9000/output/out4"};
		    String[] otherArgs = {"input/matrix.txt","hdfs://pc1:9000/output/step1"};
		    //這裡需要配置引數即輸入和輸出的HDFS的檔案路徑
		    if (otherArgs.length != 2) {
		      System.err.println("Usage: wordcount <in> <out>");
		      System.exit(2);
		    }
		    //conf.set("fs.defaultFS",HDFS);
		   // JobConf conf1 = new JobConf(WordCount.class);
		    Job job = new Job(conf, "step1");//Job(Configuration conf, String jobName) 設定job名稱和
		    job.setJarByClass(Step1.class);
		    job.setMapperClass(Mapper1.class); //為job設定Mapper類 
		    //job.setCombinerClass(IntSumReducer.class); //為job設定Combiner類  
		    job.setReducerClass(Reducer1.class); //為job設定Reduce類 

		    job.setMapOutputKeyClass(Text.class);  
		    job.setMapOutputValueClass(Text.class); 

		    job.setOutputKeyClass(Text.class);        //設定輸出key的型別
		    job.setOutputValueClass(Text.class);//  設定輸出value的型別

		    job.setOutputFormatClass(SequenceFileOutputFormat.class);
		    FileInputFormat.addInputPath(job, new Path(otherArgs[0])); //為map-reduce任務設定InputFormat實現類   設定輸入路徑

		    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));//為map-reduce任務設定OutputFormat實現類  設定輸出路徑
		    System.exit(job.waitForCompletion(true) ? 0 : 1);
		
		
		/*Configuration conf = new Configuration();
		conf.set("fs.defaultFS",HDFS);
		Job job = Job.getInstance(conf,"step1");
		job.setJarByClass(Step1.class);
		job.setMapperClass(Mapper1.class);
		job.setReducerClass(Reducer1.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		FileSystem fs = FileSystem.get(conf);
		Path inPath = new Path(INPATH);
		if(fs.exists(inPath))
		{
			//FileInputFormat.addInputPath(conf, inPath);
		}
		Path outPath = new Path(OUTPATH);
		if(fs.exists(outPath))
		{
			fs.delete(outPath, true);
		}*/
		
	}
	
	public static void main(String[] args)
	{
		try {
			new Step1().run();
		} catch (ClassNotFoundException | IOException | InterruptedException e) {
			e.printStackTrace();
		}
	}
	
}

執行結果：

使用hadoop fs -text 檔案路徑檢視轉置結果：

然後進行矩陣相乘（點選開啟）

資料結構：陣列，稀疏矩陣，矩陣的壓縮。應用：矩陣的轉置，矩陣相乘
2020-10-28
資料結構陣列矩陣
矩陣相乘
2020-11-01
矩陣
矩陣轉置
2020-12-16
矩陣
置換矩陣
2024-04-04
矩陣
矩陣置0
2024-10-19
矩陣
python兩個三階矩陣相乘
2024-07-25
Python矩陣
巨大的矩陣（矩陣加速）
2024-08-16
矩陣
鄰接矩陣、度矩陣
2021-12-07
矩陣
奇異矩陣，非奇異矩陣，偽逆矩陣
2020-09-29
矩陣
zip矩陣轉至
2020-06-20
矩陣
LeetCode每日一題: 轉置矩陣（No.867）
2019-04-20
LeetCode每日一題矩陣
THREE 矩陣優先原則和平移旋轉矩陣
2022-04-18
矩陣
LeetCodeHot100 73. 矩陣置零 54. 螺旋矩陣 48. 旋轉影像 240. 搜尋二維矩陣 II
2024-03-14
LeetCode矩陣
矩陣：如何使用矩陣操作進行 PageRank 計算？
2019-03-21
矩陣
矩陣置零—leetcode73
2020-11-22
矩陣LeetCode
矩陣
2024-04-28
矩陣
求任意矩陣的伴隨矩陣
2024-06-18
矩陣
矩陣求導公式【轉】
2019-02-22
矩陣求導公式
高等代數1 矩陣
2020-08-28
矩陣
圖形學旋轉與投影矩陣—1
2021-12-23
矩陣
矩陣和陣列
2020-10-06
矩陣陣列
每日一題@49矩陣置零
2020-10-06
每日一題矩陣
Python程式碼閱讀（第41篇）：矩陣轉置
2021-11-29
Python矩陣
理解矩陣
2018-08-06
矩陣
海浪矩陣
2024-05-05
矩陣
稀疏矩陣
2020-10-15
矩陣
螺旋矩陣
2024-09-03
矩陣
矩陣乘法
2024-11-07
矩陣
8.6 矩陣？
2024-08-06
矩陣
找矩陣
2024-08-21
矩陣
矩陣分解
2020-12-06
矩陣
快手矩陣管理平臺，矩陣管理有方法
2020-07-31
矩陣
協方差矩陣推導1
2024-10-19
矩陣
reduce實現filter,map 陣列扁平化等
2019-04-30
Filter陣列
VIVADO vhdl verilog 實現矩陣運算
2020-05-05
矩陣
verilog實現矩陣卷積運算
2019-05-24
矩陣卷積
機器學習中的矩陣向量求導(五) 矩陣對矩陣的求導
2019-05-27
機器學習矩陣求導
演算法學習：矩陣快速冪/矩陣加速
2024-08-11
演算法矩陣
三維旋轉矩陣推導
2019-03-15
矩陣

Hadoop 2.6 使用Map Reduce實現矩陣相乘1 矩陣轉置

相關文章