6 from pyarrow
import csv
9 ctypes._dlopen(
'libDBEngine.so', ctypes.RTLD_GLOBAL)
13 os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
14 "Tests/Import/datafiles"
19 engine = dbe.PyDbEngine(
21 enable_columnar_output=1,
25 assert bool(engine.closed) ==
False
30 table = csv.read_csv(root +
"/santander_top1000.csv")
32 engine.importArrowTable(
"santander", table)
33 assert bool(engine.closed) ==
False
34 r = engine.executeDML(
"select * from santander")
36 assert r.colCount() == 202
37 assert r.rowCount() == 999
41 'a': [1, 2, 3, 4, 5, 6],
42 'b': [2, 3, 4, 5, 6, 7],
43 'c': [3, 4, 5, 6, 7, 8],
44 'd': [4, 5, 6, 7, 8, 9],
45 'e': [
'5',
'6',
'7',
'8',
'9',
'0']
48 b
'a,b,c,d,e\n1,2,3,4,5\n2,3,4,5,6\n3,4,5,6,7\n4,5,6,7,8\n5,6,7,8,9\n6,7,8,9,0'
53 convert_options=csv.ConvertOptions(
64 engine.importArrowTable(
"usecols", table)
65 assert bool(engine.closed) ==
False
66 cursor = engine.executeDML(
"select * from usecols")
68 batch = cursor.getArrowRecordBatch()
70 assert batch.to_pydict() == target
74 'timestamp': [datetime.datetime(2010, 4, 1, 0, 0), datetime.datetime(2010, 4, 1, 0, 30), datetime.datetime(2010, 4, 1, 1, 0)],
75 'symbol': [
'USD/JPY',
'USD/JPY',
'USD/JPY'],
76 'high': [93.526, 93.475, 93.421],
77 'low': [93.361, 93.352, 93.326],
78 'open': [93.518, 93.385, 93.391],
79 'close': [93.382, 93.391, 93.384],
80 'spread': [0.005, 0.006, 0.006],
81 'volume': [3049, 2251, 1577]
84 b
'timestamp,symbol,high,low,open,close,spread,volume\n'
85 b
'2010-04-01 00:00:00,USD/JPY,93.52600,93.36100,93.51800,93.38200,0.00500,3049\n'
86 b
'2010-04-01 00:30:00,USD/JPY,93.47500,93.35200,93.38500,93.39100,0.00600,2251\n'
87 b
'2010-04-01 01:00:00,USD/JPY,93.42100,93.32600,93.39100,93.38400,0.00600,1577\n'
90 table = csv.read_csv(fp)
92 engine.importArrowTable(
"time_parsing", table)
93 assert bool(engine.closed) ==
False
94 cursor = engine.executeDML(
"select * from time_parsing")
96 batch = cursor.getArrowRecordBatch()
98 assert batch.to_pydict() == target
118 b
',CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE\n'
119 b
'0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0\n'
122 table = csv.read_csv(fp)
124 engine.importArrowTable(
"csv_fillna", table)
125 assert bool(engine.closed) ==
False
126 cursor = engine.executeDML(
"select CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE from csv_fillna")
128 batch = cursor.getArrowRecordBatch()
130 assert batch.to_pydict() == target
133 target = {
'a': [1, 2, 3],
'b': [1, 2, 3],
'c': [
None,
None,
None]}
134 fp = io.BytesIO(b
'a,b,c\n1,1,\n2,2,\n3,3,\n')
136 table = csv.read_csv(
138 convert_options=csv.ConvertOptions(
147 engine.importArrowTable(
"test_null_col", table)
148 assert bool(engine.closed) ==
False
149 cursor = engine.executeDML(
"select * from test_null_col")
151 batch = cursor.getArrowRecordBatch()
153 assert batch.to_pydict() == target
156 if __name__ ==
"__main__":
157 pytest.main([
"-v", __file__])