In [1]:
import pandas as pd
import numpy as np

# Generate random DateTimeIndex with a lngth of 100 million consecutive milliseconds
np.random.seed(12345)
range = pd.date_range('2019-01-02 06:00', periods=100000000, freq='ms')
# Create dataframe 
df = pd.DataFrame({ 'Timestamp': range, 'Column1': np.random.randn(len(range)) })
# Take a random sample of 250000 records to get data that looks more realistic
dfSampled = df.sample(frac=0.0025)
# Sort by the original index
dfSorted = dfSampled.sort_index()
In [2]:
display(dfSorted.shape)
pd.options.display.max_rows = 10
display(dfSorted)
(250000, 2)
Timestamp Column1
544 2019-01-02 06:00:00.544 0.128664
1630 2019-01-02 06:00:01.630 -0.511683
1650 2019-01-02 06:00:01.650 -0.378890
1836 2019-01-02 06:00:01.836 0.381219
1897 2019-01-02 06:00:01.897 1.531363
... ... ...
99997960 2019-01-03 09:46:37.960 0.304508
99998074 2019-01-03 09:46:38.074 1.942302
99998197 2019-01-03 09:46:38.197 -0.139574
99998864 2019-01-03 09:46:38.864 0.581686
99999256 2019-01-03 09:46:39.256 -0.660543

250000 rows × 2 columns

In [3]:
dfSorted.to_csv("timeseriesdata_250k.csv", sep=';', encoding='utf-8', index=False)