File Handling in Python
It is mandatory for any Data Scientist/Data Engineer/Data Analyst to know different file formats and ways to handling them, irrespective of the way they stored, whether file is in structured format or semi-structured format or unstructured format.
Reading from & Writing to plain text files in Python:
open(file, mode='r', buffering=-1, encoding=None, errors=None, newline=None, closefd=True, opener=None)
"r" "read mode" [default]
"rb" "read in binary"
r+ both reading and writing
rb+ both reading and writing in binary format
"w" "write mode"
"wb" "write in binary"
w+ both writing and reading
wb+ both writing and reading in binary format
"a" "append mode"
ab appending in binary format
a+ both appending and reading
ab+ both appending and reading in binary format
"U" "read files with Unix or Windows line endings"
f = open(filename[, mode[, buffersize])
fl = open("file.name")
fl = open("file.name","r")
fl = open("file.name","w") -- file contents will erased and file will be created
fl.close()
fl.read([N]) -- read N byes from file
str = fo.read(10)
fl.readline([N])
fl.readlines([N])
>>> lst= [ x for x in open("text.txt","r").readlines() ]
fl.write("new line")
fo.write("Python is a great language.\nYeah its great!!\n")
f.write("This is line %d\r\n" % (i+1))
fh.writelines(lines_of_text)
file.next()
seek(pos[, how])
position = fo.seek(0, 0);
position = fo.tell();
fileno()
file.truncate([size])
yield python_var_name
print "Name of the file: ", fl.name
print "Closed or not : ", fl.closed
print "Opening mode : ", fl.mode
print "Softspace flag : ", fl.softspace
outfile.writelines(infile.readlines())
for line in fh.readlines():
print(line, end="")
with -- Context Manager in Python
with open('sales.csv', 'r') as file:
print(file.read())
with open('values_2_plot.txt','r') as fl:
pl=csv.reader(fl,delimiter=',')
for row in pl:
x.append(int(row[0]))
y.append(int(row[1]))
import re
with open("filename") as origin_file:
for line in origin_file:
line = re.findall(r'something', line)
if line:
line = line[0].split('"')[1]
print line
line=iter(file)
next(line)
Reading CSV files in Python (Pandas):
import pandas as pd
py_data_frame=pd.read_csv('market_data.csv')
Reading XLSX files in Python (Pandas):
import pandas as pd
py_data_frame=pd.read_csv('market_data.xlsx', sheetname="Sheet3")
Reading JSON files in Python (Pandas):
import pandas as pd
py_data_frame=pd.read_json('filename.json')
Reading HTML files in Python (Pandas):
import pandas as pd
py_data_frame=pd.read_html('homepage.html')
Using BeautifulSoup Python Package:
soup = BeautifulSoup(html_doc)
Using urllib Python Package:
from urllib.request import urlopen, Request
conn = urlopen("https://satya-data.blogspot.com")
html = response.read()
Reading ZIP files in Python:
from zipfile import ZipFilezfile = zipfile.ZipFile('Test.zip', 'r')
df = zfile.read('train.csv')
No comments:
Post a Comment