有共享變數時,並行程式就難寫了(已完成)

lt發表於2016-11-08

在前次N王后問題中加入瞭解法的輸出.只有原始的才正確.沒想到其他分佈的變數怎麼儲存,明天再寫。 以下程式碼中陣列變數被並行寫,引發了錯誤。

#include <iostream>                // std::cout
#include <future>                // std::async, std::future, std::launch


#include <ctime>
int n = 7; 
int s[20]={0};
int q(int i, int j, int k, int l) 
{ 
  int ans = 0;;
  for(int a = ((1 << n) - 1) & ~(i | j | k), p = a & -a; a!=0; a ^= p, p = a & -a) 
{s[l]=p;
  ans += q(i | p, (j | p) * 2, (k | p) / 2, l + 1);

}
if(l==n){
 for(int x=0;x<n;x++)
  printf("%d ",s[x]);
 printf("\n");
}
  return l == n ? 1 : ans;
}
int main(int argc,char*argv[]) 
{ 
  int a=0;
  int t=clock();
  if (argc==2)
    n=atoi(argv[1]);
  a=q(0, 0, 0, 0);  
  printf("(normal)result of %d Q is %d,time is %d ms\n", n,a,clock()-t);
for(int x=0;x<n;s[x++]=0);
  a=0;
  t=clock();
  std::future < int >fb[20];
  for(int i=0;i<n;i++)
    fb[i]=std::async(std::launch::async, q,(1<<i),(1<<i)*2,(1<<i)/2,1);
  for(int i=0;i<n;i++)
    a+=fb[i].get();
  printf("(mthred)result of %d Q is %d,time is %d ms\n",n,a,clock()-t);
for(int x=0;x<n;s[x++]=0);
  a=0;
  t=clock();
  for(int i=0;i<n/2+(n%2);i++)
    fb[i]=std::async(std::launch::async, q,(1<<i),(1<<i)*2,(1<<i)/2,1);
  int b=0;
  if(n%2==1)
    b=fb[n/2+(n%2)-1].get();
  for(int i=0;i<n/2;i++)
    a+=fb[i].get();
  a=2*a+b;
  printf("(mthred+half)result of %d Q is %d,time is %d ms\n",n,a,clock()-t);
for(int x=0;x<n;s[x++]=0);
  a=0;
  int fnomp[20];
  t=clock();
  for(int i=0;i<n/2+(n%2);i++)
    fnomp[i]=q((1<<i),(1<<i)*2,(1<<i)/2,1);
  b=0;
  if(n%2==1)
    b=fnomp[n/2+(n%2)-1];
  for(int i=0;i<n/2;i++)
    a+=fnomp[i];
  a=2*a+b;
  printf("(no_omp+half)result of %d Q is %d,time is %d ms\n",n,a,clock()-t);
for(int x=0;x<n;s[x++]=0);
  a=0;
  int fomp[20];
  t=clock();
  #pragma omp parallel for
  for(int i=0;i<n/2+(n%2);i++)
    fomp[i]=q((1<<i),(1<<i)*2,(1<<i)/2,1);
  b=0;
  if(n%2==1)
    b=fomp[n/2+(n%2)-1];
  for(int i=0;i<n/2;i++)
    a+=fomp[i];
  a=2*a+b;
  printf("(omp+half)result of %d Q is %d,time is %d ms\n",n,a,clock()-t);

  return 0;
}

用陣列複製和二維陣列解決了部分問題,遺留了n為奇數計算一半問題。

#include <iostream>                // std::cout
#include <future>                // std::async, std::future, std::launch


#include <ctime>
#include <cmath>
int n = 5; 
int s[20]={0};
int f=0;
void printq(int s[],int n,int f)
{
 for(int x=0;x<n;x++)
  printf("%.*s\n",n,"------------------Q------------------"+(19-n)+(int)round(log(s[x])/log(2)));
  printf("\n");
 if(f==2)
 {
 for(int x=0;x<n;x++)
  printf("%.*s\n",n,"------------------Q------------------"+(19-n)+(int)round(log((1<<(n-1))/s[x])/log(2)));
  printf("mirror\n");
 }
}

int q(int s[20],int i, int j, int k, int l) 
{ 
  int ans = 0;;
  for(int a = ((1 << n) - 1) & ~(i | j | k), p = a & -a; a!=0; a ^= p, p = a & -a){
   s[l]=p;
   ans += q(s,i | p, (j | p) * 2, (k | p) / 2, l + 1);

}
if(l==n){
 for(int x=0;x<n;x++)
  printf("%d ",s[x]);
 printf("\n");
}
  return l == n ? 1 : ans;
}
int q1(int s[20],int i, int j, int k, int l,int f) 
{ 
  int ans = 0;
  int s1[20]; //temp array
  for(int x=0;x<l;x++)
    s1[x]=s[x];
  for(int a = ((1 << n) - 1) & ~(i | j | k), p = a & -a; a!=0; a ^= p, p = a & -a) { 
    s1[l]=p;
    ans += q1(s1,i | p, (j | p) * 2, (k | p) / 2, l + 1,f);
}
if(l==n){
    printq(s1,n,f);
}
  return l == n ? 1 : ans;
}

int main(int argc,char*argv[]) 
{ 
  int a=0;
  int t=clock();
  if (argc==2)
    n=atoi(argv[1]);
  //method 1 normal

  a=q1(s,0, 0, 0, 0,1);  
  printf("(normal)result of %d Q is %d,time is %d ms\n", n,a,clock()-t);

  //method 2 std:thread/async
  a=0;
  t=clock();
  std::future < int >fb[20];
  int s2[20][20]={{0}};
  for(int i=0;i<n;i++) { 
    s2[i][0]=1<<i;
    fb[i]=std::async(std::launch::async, q1,s2[i],(1<<i),(1<<i)*2,(1<<i)/2,1,1);
  }
  for(int i=0;i<n;i++)
    a+=fb[i].get();
  printf("(mthred)result of %d Q is %d,time is %d ms\n",n,a,clock()-t);
  //method 3 std:thread/async compute half
  a=0;
  t=clock();
  for(int i=0;i<n/2+(n%2);i++)
    fb[i]=std::async(std::launch::async, q1,s2[i],(1<<i),(1<<i)*2,(1<<i)/2,1,((n%2==1) && (i==n/2))?1:2);  
  int b=0;
  if(n%2==1)
    b=fb[n/2+(n%2)-1].get();
  for(int i=0;i<n/2;i++)
    a+=fb[i].get();
  a=2*a+b;
  printf("(mthred+half)result of %d Q is %d,time is %d ms\n",n,a,clock()-t);
for(int x=0;x<n;s[x++]=0);
  a=0;
  int fnomp[20];
  t=clock();
  for(int i=0;i<n/2+(n%2);i++){
    s[0]=(1<<i);
    fnomp[i]=q1(s,(1<<i),(1<<i)*2,(1<<i)/2,1,((n%2==1) && (i==n/2))?1:2);
  }
  b=0;
  if(n%2==1)
    b=fnomp[n/2+(n%2)-1];
  for(int i=0;i<n/2;i++)
    a+=fnomp[i];
  a=2*a+b;
  printf("(no_omp+half)result of %d Q is %d,time is %d ms\n",n,a,clock()-t);
for(int x=0;x<n;s[x++]=0);
  a=0;
  int fomp[20];
  t=clock();
  #pragma omp parallel for
  for(int i=0;i<n/2+(n%2);i++){
    s2[i][0]=1<<i;
    fomp[i]=q1(s2[i],(1<<i),(1<<i)*2,(1<<i)/2,1,((n%2==1) && (i==n/2))?1:2);
  }
  b=0;
  if(n%2==1)
    b=fomp[n/2+(n%2)-1];
  for(int i=0;i<n/2;i++)
    a+=fomp[i];
  a=2*a+b;
  printf("(omp+half)result of %d Q is %d,time is %d ms\n",n,a,clock()-t);

  return 0;
}

以上程式碼用傳引數f解決了何時列印映象的問題。如果不用引數,用全域性變數,仍存在併發訪問,導致不該映象的(首行棋子在中間格的)也被映象了。 關鍵程式碼如下


  //method 3 std:thread/async compute half
  a=0;
  t=clock();
  for(int i=0;i<n/2+(n%2);i++)
    fb[i]=std::async(std::launch::async, q1,s2[i],(1<<i),(1<<i)*2,(1<<i)/2,1,((n%2==1) && (i==n/2))?1:2);

最後修改完版本,幾乎所有多執行緒都比單執行緒慢。

D:\>g++ mtqp.cpp -fopenmp

D:\>a 12 |find "ms"
(normal)result of 12 Q is 14200,time is 610 ms
(mthred)result of 12 Q is 14200,time is 820 ms
(mthred+half)result of 12 Q is 14200,time is 850 ms
(no_omp+half)result of 12 Q is 14200,time is 590 ms
(omp+half)result of 12 Q is 14200,time is 790 ms

D:\>a 13 |find "ms"
(normal)result of 13 Q is 73712,time is 3660 ms
(mthred)result of 13 Q is 73712,time is 5070 ms
(mthred+half)result of 13 Q is 73712,time is 4990 ms
(no_omp+half)result of 13 Q is 73712,time is 3630 ms
(omp+half)result of 13 Q is 73712,time is 4820 ms

與不輸出圖形的版本差別巨大

D:\>mtq31 14
(normal)result of 14 Q is 365596,time is 680 ms
(mthred)result of 14 Q is 365596,time is 282 ms
(mthred+half)result of 14 Q is 365596,time is 132 ms

相關文章