Python通用資料格式轉換工具

wklken發表於2015-11-25

原文網址 : http://python.jobbole.com/83447/

Python

已獨立成專案在github上面 dataformat

涉及模組 os, getopt, sys

需求

在進行hadoop測試時，需要造大量資料，例如某個表存在56列，但實際程式邏輯只適用到某幾列，我們造的資料也只需要某幾列

構造幾列資料，轉化為對應資料表格式

原始碼

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#dataformat.py
#   wklken@yeah.net
#this script change data from your source to the dest data format
#2011-08-05 created version0.1
#2011-10-29 add row-row mapping ,default row value .rebuild all functions. version0.2 
#next:add data auto generate by re expression
#2011-12-17 add new functions, add timestamp creator.  version0.3
#2012-03-08 rebuild functions. version0.4
#2012-06-22 add function to support multi output separators
#2012-07-11 fix bug  line 44,add if
#2012-09-03 rebuild functions,add help msg! version0.5
#2012-11-08 last version edited by lingyue.wkl
#           this py: https://github.com/wklken/pytools/blob/master/data_process/dataformat.py

import os
import sys
import getopt
import time
import re

#read file and get each line without n
def read_file(path):
    f = open(path, "r")
    lines = f.readlines()
    f.close()
    return [line[:-1] for line in lines ]

#處理一行，轉為目標格式，返回目標行
def one_line_proc(parts, total, ft_map, outsp, empty_fill, fill_with_sno):
    outline = []
    #step1.獲取每一列的值
    for i in range(1, total + 1):
        if i in ft_map:
            fill_index = ft_map[i]
            #加入使用預設值列  若是以d開頭，後面是預設，否則取檔案對應列 done
            if fill_index.startswith("d"):
                #列預設值暫不開啟時間戳處理
                outline.append(fill_index[1:])
            else:
                outline.append(handler_specal_part(parts[int(fill_index) - 1]))
        else:
            #-s 選項生效，填充列號
            if fill_with_sno:
                outline.append(str(i))
            #否則，填充預設填充值
            else:
                outline.append(empty_fill)

    #step2.組裝加入輸出分隔符，支援多分隔符
    default_outsp = outsp.get(0,"t")
    result = []
    outsize = len(outline)
    for i in range(outsize):
        result.append(outline[i])
        if i  outsize - 1:
            result.append(outsp.get(i + 1, default_outsp))
    #step3.拼成一行返回
    return ''.join(result)

#處理入口，讀檔案，迴圈處理每一行，寫出
#輸入資料分隔符預設t,輸出資料預設分隔符t
def process(inpath, total, to, outpath, insp, outsp, empty_fill, fill_with_sno, error_line_out):
    ft_map = {}
    #有效輸入欄位數（去除預設值後的）
    in_count = 0
    used_row = []
    #step1-3相當於資料預處理，解析傳入選項

    #step1 處理對映列 不能和第二步合併
    for to_row in to:
        if r":" not in to_row and len(to_row.split(":")) == 2:
            used_row.append(int(to_row.split(":")[1]))
        if r"=" not in str(to_row) and len(str(to_row).split("=")) == 2:
            pass
        else:
            in_count += 1

    #step2 處理預設值列
    for to_row in to:
        #處理預設值列
        if r"=" not in str(to_row) and len(str(to_row).split("=")) == 2:
            ft_map.update({int(to_row.split("=")[0]): "d"+to_row.split("=")[1]})
            continue
        #處理列列對映
        elif r":" not in to_row and len(to_row.split(":")) == 2:
            ft_map.update({int(to_row.split(":")[0]): to_row.split(":")[1]})
            continue
        #其他普通列
        else:
            to_index = 0
            for i in range(1, total + 1):
                if i not in used_row:
                    to_index = i
                    break
            ft_map.update({int(to_row): str(to_index)})
            used_row.append(to_index)

    #setp3 處理輸出分隔符   outsp  0=t,1=    0代表預設的，其他前面帶列號的代表指定的
    if len(outsp) > 1 and len(outsp.split(",")) > 1:
        outsps = re.findall(r"d=.+?", outsp)
        outsp = {}
        for outsp_kv in  outsps:
            k,v = outsp_kv.split("=")
            outsp.update({int(k): v})
    else:
        outsp = {0: outsp}

    #step4 開始處理每一行
    lines = read_file(inpath)
    f = open(outpath, "w")
    result = []
    for line in lines:
        #多個輸入分隔符情況，使用正則切分成列
        if len(insp.split("|")) > 0:
            parts = re.split(insp, line)
        #否則使用正常字串切分成列
        else:
            parts = line.split(insp)

        #正常的，切分後欄位數大於等於配置的選項個數
        if len(parts) >= in_count:
            outline = one_line_proc(parts, total, ft_map, outsp, empty_fill, fill_with_sno)
            result.append(outline + "n")
        #不正常的，列數少於配置
        else:
            #若配置了-e 輸出，否則列數不符的記錄過濾
            if error_line_out:
                result.append(line + "n")

    #step5 輸出結果
    f.writelines(result)
    f.close()

#特殊的處理入口，處理維度為每一行,目前只有時間處理
def handler_specal_part(part_str):
    #timestamp 時間處理
    #時間列，預設必須 TS數字=時間
    if part_str.startswith("TS") and "=" in part_str:
        ts_format = {8: "%Y%m%d",
                     10: "%Y-%m-%d",
                     14: "%Y%m%d%H%M%S",
                     19: "%Y-%m-%d %H:%M:%S"}
        to_l = 0
        #step1 確認輸出的格式 TS8 TS10 TS14 TS19
        if part_str[2] != "=":
            to_l = int(part_str[2:part_str.index("=")])

        part_str = part_str.split("=")[1].strip()
        interval = 0
        #step2 存在時間+-的情況 確認加減區間
        if "+" in part_str:
            inputdate = part_str.split("+")[0].strip()
            interval = int(part_str.split("+")[1].strip())
        elif "-" in part_str:
            parts = part_str.split("-")
            if len(parts) == 2: #20101020 - XX
                inputdate = parts[0].strip()
                interval = -int(parts[1].strip())
            elif len(parts) == 3: #2010-10-20
                inputdate = part_str
            elif len(parts) == 4: #2010-10-20 - XX
                inputdate = "-".join(parts[:-1])
                interval = -int(parts[-1])
            else:
                inputdate = part_str
        else:
            inputdate = part_str.strip()
        #step3 將原始時間轉為目標時間
        part_str = get_timestamp(inputdate, ts_format, interval)

        #step4 如果定義了輸出格式，轉換成目標格式，返回
        if to_l > 0:
            part_str = time.strftime(ts_format.get(to_l), time.localtime(int(part_str)))
    return part_str

#將時間由秒轉化為目標格式
def get_timestamp(inputdate, ts_format, interval=0):
    if "now()" in inputdate:
        inputdate = time.strftime("%Y%m%d%H%M%S") 
    inputdate = inputdate.strip()
    try:
        size = len(inputdate)
        if size in ts_format:
            ts = time.strptime(inputdate, ts_format.get(size))
        else:
            print "the input date and time expression error,only allow 'YYYYmmdd[HHMMSS]' or 'YYYY-MM-DD HH:MM:SS'  "
            sys.exit(0)
    except:
        print "the input date and time expression error,only allow 'YYYYmmdd[HHMMSS]' or 'YYYY-MM-DD HH:MM:SS'  "
        sys.exit(0)
    return str(int(time.mktime(ts)) + interval)

#列印幫助資訊
def help_msg():
    print("功能：原資料檔案轉為目標資料格式")
    print("選項:")
    print("t -i inputfilepath  [必輸，input, 原檔案路徑]")
    print("t -t n              [必輸，total, n為數字，目標資料總的域個數]")
    print("t -a '1,3,4'        [必輸，array, 域編號字串，逗號分隔。指定域用原資料欄位填充，未指定用'0'填充]")
    print("t                          -a '3,5=abc,6:2'  第5列預設值abc填充,第6列使用輸入的第1列填充，第3列使用輸入第1列填充")
    print("t -o outputfilepath [可選，output, 預設為 inputfilepath.dist ]")
    print("t -F 'FS'           [可選，field Sep，原檔案域分隔符，預設為\t,支援多分隔符，eg.'t|||' ]")
    print("t -P 'OFS'          [可選，out FS，輸出檔案的域分隔符，預設為\t,可指定多個，多個需指定序號=分隔符,逗號分隔,預設分隔符序號0 ]")
    print("t -f 'fill_str'     [可選，fill，未選列的填充值，預設為空 ]")
    print("t -s                [可選，serial number,當配置時，-f無效，使用列號填充未指派的列]")
    print("t -e                [可選，error, 原始檔列切分不一致行/空行/註釋等，會被直接輸出，正確行按原邏輯處理]")
    sys.exit(0)

#判斷某個引數必須被定義
def must_be_defined(param, map, error_info):
    if param not in map:
       print error_info
       sys.exit(1)

#程式入口，讀入引數，執行
def main():
    #init default value
    insp = "t"
    outsp = "t"
    empty_fill = ''
    fill_with_sno = False
    error_line_out = False
    #handle options
    try:
        opts,args = getopt.getopt(sys.argv[1:],"F:P:t:a:i:o:f:hse")

        for op,value in opts:
          if op in ("-h", "-H", "--help"):
            help_msg()
          if op == "-i":
            inpath = value
          elif op == "-o":
            outpath = value
          elif op == "-t":
            total = int(value)
          elif op == "-a":
            to = value.split(",")
          elif op == "-F":
            insp = value.decode("string_escape")
          elif op == "-P":
            outsp = value.decode("string_escape")
          elif op == "-f":
            empty_fill = value
          elif op == "-s":
            fill_with_sno = True
          elif op == "-e":
            error_line_out = True
        if len(opts)  3:
          print(sys.argv[0]+" : the amount of params must great equal than 3")
          print("Command : ./dataformat.py -h")
          sys.exit(1)

    except getopt.GetoptError:
        print(sys.argv[0]+" : params are not defined well!")
        print("Command : ./dataformat.py -h")
        sys.exit(1)

    params_map = dir()

    must_be_defined('inpath', params_map, sys.argv[0]+" : -i param is needed,input file path must define!")
    must_be_defined('total', params_map, sys.argv[0]+" : -t param is needed,the fields of result file must define!")
    must_be_defined('to', params_map, sys.argv[0]+" : -a param is needed,must assign the field to put !")

    if not os.path.exists(inpath):
        print(sys.argv[0]+" file : %s is not exists"%inpath)
        sys.exit(1)

    if 'outpath' not in dir():
        outpath = inpath+".dist"

    process(inpath, total, to, outpath, insp, outsp, empty_fill, fill_with_sno, error_line_out)

if __name__ =="__main__":
    main()

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

#!/usr/bin/env python

# -*- coding: utf-8 -*-

#dataformat.py

# wklken@yeah.net

#this script change data from your source to the dest data format

#2011-08-05 created version0.1

#2011-10-29 add row-row mapping ,default row value .rebuild all functions. version0.2

#next:add data auto generate by re expression

#2011-12-17 add new functions, add timestamp creator. version0.3

#2012-03-08 rebuild functions. version0.4

#2012-06-22 add function to support multi output separators

#2012-07-11 fix bug line 44,add if

#2012-09-03 rebuild functions,add help msg! version0.5

#2012-11-08 last version edited by lingyue.wkl

# this py: https://github.com/wklken/pytools/blob/master/data_process/dataformat.py

import os

import sys

import getopt

import time

import re

#read file and get each line without n

def read_file(path):

f = open(path, "r")

lines = f.readlines()

f.close()

return [line[:-1] for line in lines ]

#處理一行，轉為目標格式，返回目標行

def one_line_proc(parts, total, ft_map, outsp, empty_fill, fill_with_sno):

outline = []

#step1.獲取每一列的值

for i in range(1, total + 1):

if i in ft_map:

fill_index = ft_map[i]

#加入使用預設值列若是以d開頭，後面是預設，否則取檔案對應列 done

if fill_index.startswith("d"):

#列預設值暫不開啟時間戳處理

outline.append(fill_index[1:])

else:

outline.append(handler_specal_part(parts[int(fill_index) - 1]))

else:

#-s 選項生效，填充列號

if fill_with_sno:

outline.append(str(i))

#否則，填充預設填充值

else:

outline.append(empty_fill)

#step2.組裝加入輸出分隔符，支援多分隔符

default_outsp = outsp.get(0,"t")

result = []

outsize = len(outline)

for i in range(outsize):

result.append(outline[i])

if i outsize - 1:

result.append(outsp.get(i + 1, default_outsp))

#step3.拼成一行返回

return ''.join(result)

#處理入口，讀檔案，迴圈處理每一行，寫出

#輸入資料分隔符預設t,輸出資料預設分隔符t

def process(inpath, total, to, outpath, insp, outsp, empty_fill, fill_with_sno, error_line_out):

ft_map = {}

#有效輸入欄位數（去除預設值後的）

in_count = 0

used_row = []

#step1-3相當於資料預處理，解析傳入選項

#step1 處理對映列不能和第二步合併

for to_row in to:

if r":" not in to_row and len(to_row.split(":")) == 2:

used_row.append(int(to_row.split(":")[1]))

if r"=" not in str(to_row) and len(str(to_row).split("=")) == 2:

pass

else:

in_count += 1

#step2 處理預設值列

for to_row in to:

#處理預設值列

if r"=" not in str(to_row) and len(str(to_row).split("=")) == 2:

ft_map.update({int(to_row.split("=")[0]): "d"+to_row.split("=")[1]})

continue

#處理列列對映

elif r":" not in to_row and len(to_row.split(":")) == 2:

ft_map.update({int(to_row.split(":")[0]): to_row.split(":")[1]})

continue

#其他普通列

else:

to_index = 0

for i in range(1, total + 1):

if i not in used_row:

to_index = i

break

ft_map.update({int(to_row): str(to_index)})

used_row.append(to_index)

#setp3 處理輸出分隔符 outsp 0=t,1= 0代表預設的，其他前面帶列號的代表指定的

if len(outsp) > 1 and len(outsp.split(",")) > 1:

outsps = re.findall(r"d=.+?", outsp)

outsp = {}

for outsp_kv in outsps:

k,v = outsp_kv.split("=")

outsp.update({int(k): v})

else:

outsp = {0: outsp}

#step4 開始處理每一行

lines = read_file(inpath)

f = open(outpath, "w")

result = []

for line in lines:

#多個輸入分隔符情況，使用正則切分成列

if len(insp.split("|")) > 0:

parts = re.split(insp, line)

#否則使用正常字串切分成列

else:

parts = line.split(insp)

#正常的，切分後欄位數大於等於配置的選項個數

if len(parts) >= in_count:

outline = one_line_proc(parts, total, ft_map, outsp, empty_fill, fill_with_sno)

result.append(outline + "n")

#不正常的，列數少於配置

else:

#若配置了-e 輸出，否則列數不符的記錄過濾

if error_line_out:

result.append(line + "n")

#step5 輸出結果

f.writelines(result)

f.close()

#特殊的處理入口，處理維度為每一行,目前只有時間處理

def handler_specal_part(part_str):

#timestamp 時間處理

#時間列，預設必須 TS數字=時間

if part_str.startswith("TS") and "=" in part_str:

ts_format = {8: "%Y%m%d",

10: "%Y-%m-%d",

14: "%Y%m%d%H%M%S",

19: "%Y-%m-%d %H:%M:%S"}

to_l = 0

#step1 確認輸出的格式 TS8 TS10 TS14 TS19

if part_str[2] != "=":

to_l = int(part_str[2:part_str.index("=")])

part_str = part_str.split("=")[1].strip()

interval = 0

#step2 存在時間+-的情況確認加減區間

if "+" in part_str:

inputdate = part_str.split("+")[0].strip()

interval = int(part_str.split("+")[1].strip())

elif "-" in part_str:

parts = part_str.split("-")

if len(parts) == 2: #20101020 - XX

inputdate = parts[0].strip()

interval = -int(parts[1].strip())

elif len(parts) == 3: #2010-10-20

inputdate = part_str

elif len(parts) == 4: #2010-10-20 - XX

inputdate = "-".join(parts[:-1])

interval = -int(parts[-1])

else:

inputdate = part_str

else:

inputdate = part_str.strip()

#step3 將原始時間轉為目標時間

part_str = get_timestamp(inputdate, ts_format, interval)

#step4 如果定義了輸出格式，轉換成目標格式，返回

if to_l > 0:

part_str = time.strftime(ts_format.get(to_l), time.localtime(int(part_str)))

return part_str

#將時間由秒轉化為目標格式

def get_timestamp(inputdate, ts_format, interval=0):

if "now()" in inputdate:

inputdate = time.strftime("%Y%m%d%H%M%S")

inputdate = inputdate.strip()

try:

size = len(inputdate)

if size in ts_format:

ts = time.strptime(inputdate, ts_format.get(size))

else:

print "the input date and time expression error,only allow 'YYYYmmdd[HHMMSS]' or 'YYYY-MM-DD HH:MM:SS' "

sys.exit(0)

except:

print "the input date and time expression error,only allow 'YYYYmmdd[HHMMSS]' or 'YYYY-MM-DD HH:MM:SS' "

sys.exit(0)

return str(int(time.mktime(ts)) + interval)

#列印幫助資訊

def help_msg():

print("功能：原資料檔案轉為目標資料格式")

print("選項:")

print("t -i inputfilepath [必輸，input, 原檔案路徑]")

print("t -t n [必輸，total, n為數字，目標資料總的域個數]")

print("t -a '1,3,4' [必輸，array, 域編號字串，逗號分隔。指定域用原資料欄位填充，未指定用'0'填充]")

print("t -a '3,5=abc,6:2' 第5列預設值abc填充,第6列使用輸入的第1列填充，第3列使用輸入第1列填充")

print("t -o outputfilepath [可選，output, 預設為 inputfilepath.dist ]")

print("t -F 'FS' [可選，field Sep，原檔案域分隔符，預設為\t,支援多分隔符，eg.'t|||' ]")

print("t -P 'OFS' [可選，out FS，輸出檔案的域分隔符，預設為\t,可指定多個，多個需指定序號=分隔符,逗號分隔,預設分隔符序號0 ]")

print("t -f 'fill_str' [可選，fill，未選列的填充值，預設為空 ]")

print("t -s [可選，serial number,當配置時，-f無效，使用列號填充未指派的列]")

print("t -e [可選，error, 原始檔列切分不一致行/空行/註釋等，會被直接輸出，正確行按原邏輯處理]")

sys.exit(0)

#判斷某個引數必須被定義

def must_be_defined(param, map, error_info):

if param not in map:

print error_info

sys.exit(1)

#程式入口，讀入引數，執行

def main():

#init default value

insp = "t"

outsp = "t"

empty_fill = ''

fill_with_sno = False

error_line_out = False

#handle options

try:

opts,args = getopt.getopt(sys.argv[1:],"F:P:t:a:i:o:f:hse")

for op,value in opts:

if op in ("-h", "-H", "--help"):

help_msg()

if op == "-i":

inpath = value

elif op == "-o":

outpath = value

elif op == "-t":

total = int(value)

elif op == "-a":

to = value.split(",")

elif op == "-F":

insp = value.decode("string_escape")

elif op == "-P":

outsp = value.decode("string_escape")

elif op == "-f":

empty_fill = value

elif op == "-s":

fill_with_sno = True

elif op == "-e":

error_line_out = True

if len(opts) 3:

print(sys.argv[0]+" : the amount of params must great equal than 3")

print("Command : ./dataformat.py -h")

sys.exit(1)

except getopt.GetoptError:

print(sys.argv[0]+" : params are not defined well!")

print("Command : ./dataformat.py -h")

sys.exit(1)

params_map = dir()

must_be_defined('inpath', params_map, sys.argv[0]+" : -i param is needed,input file path must define!")

must_be_defined('total', params_map, sys.argv[0]+" : -t param is needed,the fields of result file must define!")

must_be_defined('to', params_map, sys.argv[0]+" : -a param is needed,must assign the field to put !")

if not os.path.exists(inpath):

print(sys.argv[0]+" file : %s is not exists"%inpath)

sys.exit(1)

if 'outpath' not in dir():

outpath = inpath+".dist"

process(inpath, total, to, outpath, insp, outsp, empty_fill, fill_with_sno, error_line_out)

if __name__ =="__main__":

main()

使用說明

功能：可指定輸入分隔，輸出分隔，無配置欄位填充，某列預設值,可按順序填充，也可亂序對映填充

輸入：輸入檔案路徑

選項：

-i “path”
必設
輸入檔案路徑

-t n
必設
目標資料表總列數

-a “r1,r2”
必設
將要填充的列號列表，可配置預設值，可配置對映

-o “path”
可選
輸出檔案路徑，預設為 輸入檔案路徑.dist

-F “IFS”
可選
輸入檔案中欄位域分隔符，預設t

-P ”OFS”
可選
輸出檔案中欄位域分隔符，預設t

-f “”
可選
指定未配置列的填充內容，預設為空

-h
單獨
檢視幫助資訊

-i “path”

必設

輸入檔案路徑

-t n

必設

目標資料表總列數

-a “r1,r2”

必設

將要填充的列號列表，可配置預設值，可配置對映

-o “path”

可選

輸出檔案路徑，預設為輸入檔案路徑.dist

-F “IFS”

可選

輸入檔案中欄位域分隔符，預設t

-P ”OFS”

可選

輸出檔案中欄位域分隔符，預設t

-f “”

可選

指定未配置列的填充內容，預設為空

-h

單獨

檢視幫助資訊

列填充的配置示例：

普通用法【最常用】

命令：

./dataformat.py –i in_file –t 65 -a “22,39,63” –F “^I” –P “^A” –f “0”

1	./dataformat.py –i in_file –t 65 -a “22,39,63” –F “^I” –P “^A” –f “0”

說明：

in_file中欄位是以t分隔的[可不配-F,使用預設]。
將in_file的第1,2,3列分別填充到in_file.dist[use default]的第22,39,63列
in_file.dist共65列，以^A分隔，未配置列以0填充
-a中順序與原始檔列序有關，若-a “39,22,63” 則是將第1列填充到第39列，第二列填充到22列，第3列填充到63列

in_file中欄位是以t分隔的[可不配-F,使用預設]。

將in_file的第1,2,3列分別填充到in_file.dist[use default]的第22,39,63列

in_file.dist共65列，以^A分隔，未配置列以0填充

-a中順序與原始檔列序有關，若-a “39,22,63” 則是將第1列填充到第39列，第二列填充到22列，第3列填充到63列

列預設值用法:【需要對某些列填充相同的值，但不想在原始檔中維護】

命令:

./dataformat.py -i in_file –t 30 –a “3=tag_1,9,7,12=0.0” –o out_file

1	./dataformat.py -i in_file –t 30 –a “3=tag_1,9,7,12=0.0” –o out_file

說明:

in_file以t分隔，輸出out_file以t分隔
將in_file的第1列,第2列填充到out_file的第9列，第7列
out_file共30列，第3列均用字串”tag_1”填充，第12列用0.0填充，其他未配置列為空
注意：預設值 的取值，若是使用到等號和冒號，需轉義，加 = :

in_file以t分隔，輸出out_file以t分隔

將in_file的第1列,第2列填充到out_file的第9列，第7列

out_file共30列，第3列均用字串”tag_1”填充，第12列用0.0填充，其他未配置列為空

注意：預設值的取值，若是使用到等號和冒號，需轉義，加 = :

列亂序對映：

命令:

./dataformat.py –i in_file –t 56 –a “3:2,9,5:3,1=abc,11”

1	./dataformat.py –i in_file –t 56 –a “3:2,9,5:3,1=abc,11”

說明:

分隔，輸入，輸出，同上…..
冒號前面為輸出檔案列號，後面為輸入檔案列號
目標檔案第3列用輸入檔案第2列填充，目標檔案第5列用輸入檔案第3列填充
目標檔案第一列均填充“abc”
目標檔案第9列用輸入檔案第1列填充，第11列用輸入檔案第4列填充【未配置對映，使用從頭開始還沒有被用過的列】
指令碼會對簡單的欄位數量等對映邏輯進行檢測，複雜最好全配上，使用預設太抽象

分隔，輸入，輸出，同上…..

冒號前面為輸出檔案列號，後面為輸入檔案列號

目標檔案第3列用輸入檔案第2列填充，目標檔案第5列用輸入檔案第3列填充

目標檔案第一列均填充“abc”

目標檔案第9列用輸入檔案第1列填充，第11列用輸入檔案第4列填充【未配置對映，使用從頭開始還沒有被用過的列】

指令碼會對簡單的欄位數量等對映邏輯進行檢測，複雜最好全配上，使用預設太抽象

程式碼託管位置連結

打賞支援我寫出更多好文章，謝謝！
打賞作者

打賞支援我寫出更多好文章，謝謝！

任選一種支付方式

Python通用資料格式轉換工具

Hive資料格式轉換
2019-01-08
Hive
layui tree資料格式轉換
2019-11-19
UI
把JSON資料格式轉換為Python的類物件
2019-06-04
JSONPython物件
mxnet資料格式轉換為tensorflow，pytorch資料
2018-12-14
PyTorch
Python將xml格式轉換為json格式
2019-03-22
PythonXMLJSON
heic格式轉換jpg工具——轉易俠heic轉換器
2021-12-07
Oracle資料庫日期格式轉換操作
2018-05-10
Oracle資料庫
資料庫轉換工具，不同資料庫之前任意轉換
2020-08-07
資料庫
Python字典格式與JSON格式的相互轉換
2019-02-20
PythonJSON
文字格式轉換工具：Text Workflow for mac
2024-01-28
Mac
全能的視訊格式轉換工具
2021-10-16
萬能媒體格式轉換工具
2021-11-04
MakeMKV——MKV視訊格式轉換工具
2021-11-03
好用的音訊格式轉換工具
2021-09-28
音訊
Mac視訊格式轉換工具—iFlicks
2021-11-07
Mac
將json資料轉換為Python字典將json資料轉換為Python字典
2023-11-07
JSONPython
《娘道》kux影片格式如何轉換mp4通用格式
2018-11-13
UX
用兩種方法把JSON資料格式轉換為Python的類物件
2020-03-17
JSONPython物件
Java常用時間格式轉換工具類
2019-12-30
Java
Iridient Developer for mac - RAW影像格式轉換工具
2022-01-10
DeveloperMac
MKV視訊格式轉換工具：MakeMKV mac
2022-02-28
Mac
Iridient Developer for mac(RAW影像格式轉換工具)
2022-10-31
DeveloperMac
Permute 3 for mac(媒體格式轉換工具)
2022-05-24
Mac
MakeMKV for mac(MKV影片格式轉換工具)
2022-03-13
Mac
Python——格式轉換的學習筆記
2020-09-28
Python筆記
影片格式轉換工具：Cisdem Video Converter for Mac
2023-05-19
IDEMac
Doxillion Plus for mac(多格式文件轉換工具)
2020-07-28
Mac
工具推薦:免費好用的WebP格式轉換工具：AnyWebP
2024-10-05
Web
影像格式轉換
2020-12-24
Python3 資料型別轉換
2023-12-19
Python資料型別
python 與 Mysql 資料型別轉換
2020-05-11
PythonMySQL 資料型別
ODX 診斷資料庫轉換工具 — DDC
2022-03-30
資料庫
Permute 3 for Mac，全能音影片格式轉換工具！
2023-12-25
Mac
批次HEIC格式圖片轉換工具 iMazing HEIC Converter
2020-09-18
Total Video Converter Pro全能影片格式轉換工具
2022-02-07
IDE
萬能媒體格式轉換工具Permute 3 for Mac
2022-08-03
Mac
Permute 3 for mac(Mac音影片格式轉換工具)
2022-06-08
Mac
python--進位制轉換和資料交換
2020-12-07
Python
ABAP和XML資料格式互相轉換的兩種方式
2018-09-17
XML

Python通用資料格式轉換工具

需求

原始碼

使用說明

打賞支援我寫出更多好文章，謝謝！

相關文章