使这个C数组处理代码更加python(甚至numpy)

我试图了解python的惊人列表处理能力(最终是numpy)。 我正在转换我编写的一些C代码到python。

我有一个文本数据文件,其中第一行是标题,然后每个奇数行是我的输入数据,每个偶数行是我的输出数据。 所有数据空间分开。 我很高兴能够使用嵌套列表推导将所有数据读入列表。 太棒了。

with open('data.txt', 'r') as f: # get all lines as a list of strings lines = list(f) # convert header row to list of ints and get info header = map(int, lines[0].split(' ')) num_samples = header[0] input_dim = header[1] output_dim = header[2] del header # bad ass list comprehensions inputs = [[float(x) for x in l.split()] for l in lines[1::2]] outputs = [[float(x) for x in l.split()] for l in lines[2::2]] del x, l, lines 

然后我想生成一个新列表,其中每个元素都是相应输入 – 输出对的函数。 我无法弄清楚如何使用任何特定于python的优化。 这是C风格的python:

 # calculate position pos_list = []; pos_y = 0 for i in range(num_samples): pantilt = outputs[i]; target = inputs[i]; if(pantilt[0] > 90): pantilt[0] -=180 pantilt[1] *= -1 elif pantilt[0] < -90: pantilt[0] += 180 pantilt[1] *= -1 tan_pan = math.tan(math.radians(pantilt[0])) tan_tilt = math.tan(math.radians(pantilt[1])) pos = [0, pos_y, 0] pos[2] = tan_tilt * (target[1] - pos[1]) / math.sqrt(tan_pan * tan_pan + 1) pos[0] = pos[2] * tan_pan pos[0] += target[0] pos[2] += target[2] pos_list.append(pos) del pantilt, target, tan_pan, tan_tilt, pos, pos_y 

我尝试用理解或地图来做,但无法弄清楚如何:

  • 为pos_list数组的每个元素绘制两个不同的列表(包括输入和输出)
  • 把算法的主体放在理解中。 它必须是一个单独的function还是有一种使用lambdas的时髦方式?
  • 它甚至可以在没有循环的情况下做到这一点,只需将它固定在numpy并将整个事物矢量化?

一种使用boolean-indexing/mask 矢量化方法

 import numpy as np def mask_vectorized(inputs,outputs,pos_y): # Create a copy of outputs array for editing purposes pantilt_2d = outputs[:,:2].copy() # Get mask correspindig to IF conditional statements in original code mask_col0_lt = pantilt_2d[:,0]<-90 mask_col0_gt = pantilt_2d[:,0]>90 # Edit the first column as per the statements in original code pantilt_2d[:,0][mask_col0_gt] -= 180 pantilt_2d[:,0][mask_col0_lt] += 180 # Edit the second column as per the statements in original code pantilt_2d[ mask_col0_lt | mask_col0_gt,1] *= -1 # Get vectorized tan_pan and tan_tilt tan_pan_tilt = np.tan(np.radians(pantilt_2d)) # Vectorized calculation for: "tan_tilt * (target[1] .." from original code V = (tan_pan_tilt[:,1]*(inputs[:,1] - pos_y))/np.sqrt((tan_pan_tilt[:,0]**2)+1) # Setup output numpy array pos_array_vectorized = np.empty((num_samples,3)) # Put in values into columns of output array pos_array_vectorized[:,0] = inputs[:,0] + tan_pan_tilt[:,0]*V pos_array_vectorized[:,1] = pos_y pos_array_vectorized[:,2] = inputs[:,2] + V # Convert to list, if so desired for the final output # (keeping as numpy array could boost up the performance further) return pos_array_vectorized.tolist() 

运行时测试

 In [415]: # Parameters and setup input arrays ...: num_samples = 1000 ...: outputs = np.random.randint(-180,180,(num_samples,5)) ...: inputs = np.random.rand(num_samples,6) ...: pos_y = 3.4 ...: In [416]: %timeit original(inputs,outputs,pos_y) 100 loops, best of 3: 2.44 ms per loop In [417]: %timeit mask_vectorized(inputs,outputs,pos_y) 10000 loops, best of 3: 181 µs per loop 

假设您将文件读入列表,如下所示:

 lines = open('data.txt', 'r').readlines() 

标题是这样的:

 lines[0] 

偶数行是:

 even = lines[1:][::2] 

奇怪的是:

 odd = lines[2:][::2] 

现在,您可以使用以下两个列表中的itertools.izip创建列表:

 itertools.izip(even, odd) 

这是一种类似于列表的东西(你可以循环它,或者只是在它周围写一个list( ... )以使它成为一个真正的列表),其每个条目都是一对输入输出数据。

如果有人偶然发现同一个问题,这里有四种基于Ami建议的变化(函数do1,do1b,do2,do3)

对于那些好奇的人来说,这里有基准测试(我有大约1000个输入输出数据对。可能基本上更多的数据基准会变化更多)

  • %timeit do3() – 100个循环,最佳3:2.72 ms每个循环
  • %timeit do2() – 100个循环,最佳3:2.73毫秒每个循环
  • %timeit do1b() – 100个循环,最佳3:2.74毫秒每个循环
  • %timeit do1() – 100个循环,最佳3:2.67 ms每个循环

….

 def load_file(filename = 'Sharpy_7.txt'): global file_data, num_samples, input_dim, output_dim with open(filename, 'r') as f: # get all lines as a list of strings file_data = list(f) # convert header row to list of ints and get info header = map(int, file_data[0].split(' ')) num_samples = header[0] input_dim = header[1] output_dim = header[2] f.close() def calc_pos2(d): target = d[0] pantilt = d[1] if(pantilt[0] > 90): pantilt[0] -=180 pantilt[1] *= -1 elif pantilt[0] < -90: pantilt[0] += 180 pantilt[1] *= -1 tan_pan = math.tan(math.radians(pantilt[0])) tan_tilt = math.tan(math.radians(pantilt[1])) pos = [0, 0, 0] pos[2] = tan_tilt * (target[1] - pos[1]) / math.sqrt(tan_pan * tan_pan + 1) pos[0] = pos[2] * tan_pan pos[0] += target[0] pos[2] += target[2] return pos def calc_pos(target, pantilt): if(pantilt[0] > 90): pantilt[0] -=180 pantilt[1] *= -1 elif pantilt[0] < -90: pantilt[0] += 180 pantilt[1] *= -1 tan_pan = math.tan(math.radians(pantilt[0])) tan_tilt = math.tan(math.radians(pantilt[1])) pos = [0, 0, 0] pos[2] = tan_tilt * (target[1] - pos[1]) / math.sqrt(tan_pan * tan_pan + 1) pos[0] = pos[2] * tan_pan pos[0] += target[0] pos[2] += target[2] return pos def calc_stats(): global pos_array, pos_avg, pos_std pos_array = np.asarray(pos_list) pos_avg = np.mean(pos_array, 0) pos_std = np.std(pos_array, 0) # map on itertools.izip def do3(): global pos_list # bad ass list comprehensions target_list = [[float(x) for x in l.split()] for l in file_data[1::2]] pantilt_list = [[float(x) for x in l.split()] for l in file_data[2::2]] # calculate position pos_list = map(calc_pos2, itertools.izip(target_list, pantilt_list)) # list comprehension on itertools.izip def do2(): global pos_list # bad ass list comprehensions target_list = [[float(x) for x in l.split()] for l in file_data[1::2]] pantilt_list = [[float(x) for x in l.split()] for l in file_data[2::2]] # calculate position pos_list = [calc_pos(d[0], d[1]) for d in itertools.izip(target_list, pantilt_list)] # for loop with function call def do1b(): global pos_list # bad ass list comprehensions target_list = [[float(x) for x in l.split()] for l in file_data[1::2]] pantilt_list = [[float(x) for x in l.split()] for l in file_data[2::2]] # calculate position pos_list = []; for i in range(num_samples): pos_list.append(calc_pos(target_list[i], pantilt_list[i])) # for loop with unrolled algorithm def do1(): global pos_list # bad ass list comprehensions target_list = [[float(x) for x in l.split()] for l in file_data[1::2]] pantilt_list = [[float(x) for x in l.split()] for l in file_data[2::2]] # calculate position pos_list = []; for i in range(num_samples): pantilt = pantilt_list[i]; target = target_list[i]; if(pantilt[0] > 90): pantilt[0] -=180 pantilt[1] *= -1 elif pantilt[0] < -90: pantilt[0] += 180 pantilt[1] *= -1 tan_pan = math.tan(math.radians(pantilt[0])) tan_tilt = math.tan(math.radians(pantilt[1])) pos = [0, 0, 0] pos[2] = tan_tilt * (target[1] - pos[1]) / math.sqrt(tan_pan * tan_pan + 1) pos[0] = pos[2] * tan_pan pos[0] += target[0] pos[2] += target[2] pos_list.append(pos)