常用模块

os
random
time   datetime
shutil
shelve
xml
configparser

1
2
3
4
5
6
7

# OS模块

os.environ             获取系统环境变量
os.name                输出字符串指示当前使用平台. win->"nt";Linux->"posix"
os.sep                 输出操作系统特定的路径分隔符. win下为"\\",Linux下为"/"
os.linesep             输出当前平台使用的行终止符. win下为"\t\n",Linux下为"\n"
os.pathsep             输出用于分割文件路径的字符串 win下为";" , Linux下为":"
os.curdir              返回当前目录: ('.')
os.pardir              获取当前目录的父目录字符串名: ('..')

                        
os.getcwd()                         获取当前工作目录,即当前python脚本工作的目录路径  
os.stat('path/filename')            获取文件/目录信息
os.chdir("dirname")                 改变当前脚本工作目录;相当于shell下cd
os.mkdir('dirname')                 生成单级目录;相当于shell中mkdir dirname
os.makedirs('dirname1/dirname2')    可生成多层递归目录
os.remove("filename")               删除一个文件
os.rmdir('dirname')                 删除单级空目录,若目录不为空则无法删除,报错;
                                    相当于shell中rmdir dirname
os.removedirs('dirname1')           若目录为空,则删除,并递归到上一级目录,若也为空,则删除.以此类推
os.listdir('dirname')               以列表方式返回指定目录下的所有文件和子目录,包括隐藏文件
os.rename("oldname","newname")      重命名文件/目录
os.system("bash command")           运行shell命令,结果会在控制台直接打印


os.path.join(path1[, path2[, ...]])  将多个路径组合后返回
os.path.isfile(path)                 若path是一个存在的文件,则返回True;否则返回False
os.path.isdir(path)                  若path是一个存在的目录,则返回True;否则返回False
os.path.exists(path)                 如果path存在,返回True;如果path不存在,返回False
os.path.isabs(path)                  如果path是绝对路径,返回True
os.path.split(path)                  将path分割成目录和文件名二元组返回
os.path.dirname(path)                返回path的目录. 其实就是os.path.split(path)的第一个元素
os.path.basename(path)               返回path的文件名. 若path以／或\结尾,则会返回空值.
                                     (实质就是绝对路径的最后一个 a/b/c 结果就为c 哪怕c是一个目录)
os.path.abspath(path)                返回path规范化的绝对路径
os.path.getatime(path)               返回path所指向的文件或者目录的最后存取时间
os.path.getmtime(path)               返回path所指向的文件或者目录的最后修改时间
os.path.getsize(path)                返回path的大小

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36

# random模块

import random

print(random.random())         # -- (0,1) 大于0且小于1之间的小数
print(random.randint(1, 3))    # -- [1,3] 大于等于1且小于等于3之间的整数
print(random.randrange(1, 3))  # -- [1,3) 大于等于1且小于3之间的整数
print(random.choice([1, '23', [4, 5]]))     # -- 1或者23或者[4,5]
print(random.sample([1, '23', [4, 5]], 2))  # -- 列表元素任意2个组合
print(random.uniform(1, 3))    # -- 大于1小于3的小数,如1.927109612082716

item = [1, 3, 5, 7, 9]
random.shuffle(item)           # -- 打乱item的顺序,相当于"洗牌"
print(item)

"""
0.2236487025002597
3
2
1
[[4, 5], '23']
2.904734587980278
[9, 3, 7, 1, 5]
"""

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22

生成随机验证码!!!

import random
def make_code(n):
    res=''
    for _ in range(n):
        s1=chr(random.randint(65,90))
        s2=str(random.randint(0,9))
        res+=random.choice([s1,s2])
    return res

print(make_code(9)) # 42RIK6VTT

1
2
3
4
5
6
7
8
9
10

# time&datetime模块

# time

时间分为三种格式

# 时间戳

计算时间间隔

时间戳. 表示的是从1970年1月1日00:00:00开始按秒计算的偏移量

import time

start = time.time()
time.sleep(3)
end = time.time()
print(end - start)  # 3.0033159255981445 -- 两个时间戳相减

1
2
3
4
5
6

# 格式化时间

控制显示的

格式化的时间字符串

import time

print(time.strftime("%Y-%m-%d %X"))           # 2022-04-29 14:41:43
print(time.strftime("%Y-%m-%d %H:%M:%S %p"))  # 2022-04-29 14:41:43 PM

1
2
3
4

# 结构化时间

便于取出时间的各个部分

时间对象｜结构化的时间

import time

t = time.localtime()  # -- 本地时区的结构化时间
# -- time.struct_time(tm_year=2022, tm_mon=9, tm_mday=26, tm_hour=20, tm_min=54, tm_sec=30, tm_wday=0, tm_yday=269, tm_isdst=0)
print(t)
print(t.tm_hour)  # -- 14
"""
补充:time.gmtime() UTC时区的结构化时间
"""

1
2
3
4
5
6
7
8
9

# 时间之间相互转换

format_string 格式化时间 <===> struct_time 结构化时间 <===> timestamp时间戳

# -- 将格式化时间转换为结构化时间
time.strptime('2011-03-07','%Y-%d-%m')

# -- 将当前结构化时间转换为格式化时间
time.strftime('%Y',time.localtime())

# -- 将当前的结构化时间转换为时间戳
time.mktime(time.localtime())

# -- 将时间戳转换为结构化时间
time.localtime(123456789)
time.gmtime(123456789)

"""
# -- 获取时间 linux上就是这样的
#    它等同于 time.strftime('%a %b %d %H:%M:%S %Y')
time.asctime()  # -- Sun Sep 11 00:43:43 2016
time.asctime(time.localtime())
time.ctime(123456789)
"""

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20

# time模块的弊端

1> 获取格式化字符串形式的时间麻烦
2> 时间戳与格式化时间之间的转换麻烦
3> 获取之前或者未来的时间麻烦

# datetime

import datetime

# -- 获取当前格式化时间
print(datetime.datetime.now())  # 2022-04-30 06:08:55.835488
# -- 将时间戳转换为格式化时间
print(datetime.datetime.fromtimestamp(123453567))  # 1973-11-30 04:39:27
# -- 获取3天后的时间
print(datetime.datetime.now() + datetime.timedelta(days=3))  # 2022-05-03 06:12:27.911605

# -- 灵活的替换时间的任意部分
s = datetime.datetime.now()
print(s.replace(year=2020))

1
2
3
4
5
6
7
8
9
10
11
12

# shutil模块!!

高级的文件、文件夹、压缩包处理模块

shutil.copyfileobj(fsrc, fdst[, length]) !!! 将文件内容拷贝到另一个文件中

import shutil

# -- shutil.copyfileobj(open('old.xml', 'r'), open('new.xml', 'w')) 不妥
# -- 实现自动close文件
with open('old.xml', 'r') as read_f, open('new.xml', 'w') as write_f:
    shutil.copyfileobj(read_f, write_f)

1
2
3
4
5
6

shutil.copyfile(src, dst) !!! 拷贝文件

shutil.copyfile('f1.log', 'f2.log')  # -- 目标文件不需事先存在

shutil.copymode(src, dst) 仅拷贝权限. 内容、组、用户均不变

shutil.copymode('f1.log', 'f2.log')	 # -- 目标文件必须存在,仅拷贝权限

shutil.copystat(src, dst) 仅拷贝状态的信息, 包括: mode bits, atime, mtime, flags

shutil.copystat('f1.log', 'f2.log')  # -- 目标文件必须存在

shutil.copytree(src, dst, symlinks=False, ignore=None) !!! 递归拷贝文件夹

# dst目标目录不能先存在,注意对folder2目录父级目录要有可写权限;
# ignore的意思是排除 '*.pyc', 'tmp*' 所有.pyc结尾的,tmp开头的
# 默认copy_function = shutil.copy2
shutil.copytree('folder1', 'folder2', ignore=shutil.ignore_patterns('*.pyc', 'tmp*'))

1
2
3
4

shutil.rmtree(path[, ignore_errors[, onerror]]) !!! 递归删除文件夹

shutil.rmtree('folder1')

shutil.move(src, dst) !!! 递归的移动文件夹,它类似mv命令,剪切操作可以移动文件!!

shutil.move('folder1', 'folder3')

# -- eg:把1.txt文件移动到new文件夹下.
shutil.move(f"a/b/c/1.txt", f"/Users/One_Piece/Desktop/new/")

1
2
3
4

shutil.make_archive(base_name, format,...) !!! 创建压缩包并返回文件路径, 例如: zip、tar

import shutil
import tarfile

"""
base_name:压缩包的文件名,也可以是压缩包的路径
          若是文件名时,压缩包保存至当前目录;若是路径,保存至指定路径
format:压缩包种类 eg: gztar打包压缩
root_dir:要压缩的文件夹的路径(默认当前目录)
"""
# -- 将 /data 下的文件打包放置当前程序目录
shutil.make_archive("data_bak", 'gztar', root_dir='/data')

# -- 解压
t = tarfile.open('data_bak.tar.gz', 'r')
t.extractall('/dc')  # -- 解压到'/dc'文件夹下
t.close()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16

# shelve模块

序列化与反序列化的. 它支持所有的python类型,所以只能在python里面用,但比pickle模块简单.

它只有一个open参数,返回类似字典的对象,可读可写;
key必须为字符串,值可以是python支持的所有数据类型

import shelve  # -- 基本上不会用的(￣▽￣),那就放都吃灰吧!!!

d = shelve.open('db.txt')  # -- 不用事先存在
dic_1 = {'pwd': 'admin123', 'age': 18, 'sex': 'male'}  # -- 内存中可能需要序列化的字典
# -- 存
d['egon'] = dic_1
d['dc'] = {'pwd': 'admin456', 'age': 20, 'sex': 'male'}
# -- 取
print(d['egon']['pwd'])  # admin123
d.close()  # -- 会新生成一个db.txt.db文件,window和mac可能不一样

# -- writeback默认为False
d2 = shelve.open('db.txt', writeback=True)
print(d2['dc']['age'])  # 20
# -- writeback为True,修改的内容才会写回文件!!否则不会变！
d2['dc']['age'] = 18
d2.close()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17

# xml模块

xml是实现不同语言或程序之间进行数据交换的协议, 跟json差不多,但json使用起来更简单.
不过在json还没诞生的黑暗年代,大家只能选择用xml.
至今很多传统公司如金融行业的很多系统的接口还主要是xml ╮(╯▽╰)╭ 还是要了解哈..

xml的格式如下: 通过<>节点来区别数据结构的: (标签标签属性标签包含的文本内容或子标签)

<?xml version="1.0"?>
<data>
    <country name="Liechtenstein">
        <rank updated="yes">2</rank>
        <year>2008</year>
        <gdppc>141100</gdppc>
        <neighbor name="Austria" direction="E"/>
        <neighbor name="Switzerland" direction="W"/>
    </country>
    <country name="Singapore">
        <rank updated="yes">5</rank>
        <year>2011</year>
        <gdppc>59900</gdppc>
        <neighbor name="Malaysia" direction="N"/>
    </country>
    <country name="Panama">
        <rank updated="yes">69</rank>
        <year>2011</year>
        <gdppc>13600</gdppc>
        <neighbor name="Costa Rica" direction="W"/>
        <neighbor name="Colombia" direction="E"/>
    </country>
</data>

<!-- 将xml数据用json格式表示
data = {
	"Liechtenstein": {
		"rank": {
			"updated": "TRUE",
			"text": 2
		},
		"year": {
			"text": 2008
		}
	},
	"Singapore": {},
	"Panama": {}
}
-->

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39

xml文档的增删改查

from cgitb import text
import xml.etree.ElementTree as ET

tree = ET.parse('xmltest.xml')
root = tree.getroot()  # 一上来就拿到一个根节点

# -- 对于任何标签都有三个特征: 标签名、标签属性、标签的文本内容
print(root.tag, root.attrib, root.text)  # data {}

"""
# --- >>>1.查<<<
"""
# -- 查找标签! 三种情况
# - 情况一:[全文搜索,找到所有]
# <_elementtree._element_iterator object at 0x7fe54477f2c0> 一个迭代器对象
print(root.iter('year'))
# [<Element 'year' at 0x7ff22a097c70>, <Element 'year' at 0x7ff22a097e50>, <Element 'year' at 0x7ff22a09a040>]
print(list(root.iter('year')))
# year {} 2008
# year {} 2011
# year {} 2011
for year in root.iter('year'):
    print(year.tag, year.attrib, year.text)
# - 情况二:[在root的子节点中找,只找第一个]
print(root.find('year'))  # None
print(root.find('country'))  # <Element 'country' at 0x7f928576cbd0>
print(root.find('country').attrib)  # {'name': 'Liechtenstein'}
# - 情况三:[在root的子节点中找,找所有]
# [<Element 'country' at 0x7fdf53798bd0>, <Element 'country' at 0x7fdf53798db0>, <Element 'country' at 0x7fdf53798f40>]
print(root.findall('country'))
# [{'name': 'Liechtenstein'}, {'name': 'Singapore'}, {'name': 'Panama'}]
print([country.attrib for country in root.findall('country')])

# -- 遍历整个文档
# 根据当前xml文件的布局 效果等同于root.iter('country') ; root.findall('country')
'''
国家{'name': 'Liechtenstein'}
rank {'updated': 'yes'} 2
year {} 2008
gdppc {} 141100
neighbor {'name': 'Austria', 'direction': 'E'} None
neighbor {'name': 'Switzerland', 'direction': 'W'} None
国家{'name': 'Singapore'}
rank {'updated': 'yes'} 5
... ... ...
'''
for country in root:  # 它默认是找root所有的子节点
    print('国家%s' % country.attrib)
    for item in country:
        print(item.tag, item.attrib, item.text)


"""
# --- >>>2.改<<<
"""
for year in root.iter('year'):
    print(year.tag, year.attrib, year.text)
    # 注意哦,这里只是在内存中完成了修改...
    year.attrib = {'updated': 'yes'}  # 修改文件的值 也必须是str类型
    year.text = str(int(year.text) + 1)  # 从文件里取出来的数据肯定是str类型

tree.write('b.xml')  # 写到硬盘里去 写入了一个新的文件b.xml,不必事先存在


"""
# --- >>>3.增<<<
"""
# 在rank标签的文本内容大于50的country节点下加一个egon标签
# 多次重复运行这段代码,只会增加一次
for country in root:
    rank = country.find('rank')
    # 找到符合条件的国家
    if int(rank.text) > 50:
        tag = ET.Element('egon')  # 造一个egon标签
        tag.attrib = {'updated': 'yes'}
        tag.text = 'yyds'
        country.append(tag)  # 添加标签

tree.write('b.xml')  # 将内存中的修改写入硬盘


"""
# --- >>>4.删<<<
"""
for country in root:
    tag = country.find('egon')
    # print(tag, bool(tag)) # -- 哪怕tag有值,其bool值也为False 很是奇怪..
    # 所以不能 if tag 直接判断!
    if tag is not None:
        country.remove(tag)  # 移除标签

tree.write('b.xml')

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92

# csv模块

读取csv文件

"""
书名,作者,出版社,价格  
精通scrapy网络爬虫,刘硕,清华大学出版社,46.00  
算法导论,Charles E.Leiserson,人民邮电出版社,85.00  
Python灰帽子,Justin Seitz,电子工业出版社,39.00  
... ...
"""
import csv
# -- 默认只读模式打开
rf = open('books.csv')
# -- 默认分隔符是逗号 创建一个reader对象(是可迭代的)
reader = csv.reader(rf, delimiter=';')
# -- 每一行的数据都会放到一个列表中
for book in reader:
    print(book)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15

写入csv文件

import csv
wf = open('demo.csv', 'w')
# -- 这里我们用空格作为分隔符
writer = csv.writer(wf, delimiter=' ')
# -- 写入一行数据
writer.writerow(['x', 'y', 'z'])
writer.writerow(['1', '2', '3'])
writer.writerow(['9', '8', '7'])
# -- flush()方法用来刷新缓冲区的,即将缓冲区中的数据立刻写入文件,同时清空缓冲区.
#    一般情况下,文件关闭会自动刷新缓冲区
wf.flush()

1
2
3
4
5
6
7
8
9
10
11

案例: 将价格高于80.00的书记录存储到另一个csv文件中

import csv
with open('books.csv') as rf:
    reader = csv.reader(rf)
    # -- 先读取头部信息
    headers = next(reader)
    with open('books_out.csv', 'w') as wf:
        writer = csv.writer(wf)
        writer.writerow(headers)

        for book in reader:
            price = book[-2]
            # -- 爬取的书籍价格信息字段可能为空
            if price and float(price) >= 80.00:
                writer.writerow(book)

1
2
3
4
5
6
7
8
9
10
11
12
13
14

# configparser模块

专门用于解析配置文件的. eg: a.ini 、a.cfg、a.cnf

注意配置文件config.ini内容的格式:

# 注释1
; 注释2

[section1] # section标题
# options配置项 k=v
k1 = v1
user=egon
age=18
is_admin=true
salary=31

[section2]
k1 = v1

1
2
3
4
5
6
7
8
9
10
11
12
13

configparser模块对配置文件 config.ini 进行读写操作

import configparser

config = configparser.ConfigParser()
config.read('config.ini')

# -- 查看所有的标题
print(config.sections())  #['section1', 'section2']

# -- 查看某个标题下面所有的配置项
# 只取了key值
print(config.options('section1'))  # ['k1', 'user', 'age', 'is_admin', 'salary']
# 取key和value
print(config.items('section1'))    # [('k1', 'v1'), ('user', 'egon'), ('age', '18'), ('is_admin', 'true'), ('salary', '31')]

# -- 查看标题section1下user的值
# 类型是str类型
res_str = config.get('section1', 'age')
print(res_str, type(res_str))  # 18 <class 'str'>
# 类型是int类型
res_int = config.getint('section1', 'age')
print(res_int, type(res_int))  # 18 <class 'int'>
# 类型是float类型
res_float = config.getfloat('section1', 'age')
print(res_float, type(res_float))  # 18.0 <class 'float'>
# 类型是bool类型
res_bool = config.getboolean('section1', 'is_admin')
print(res_bool, type(res_bool))    # True <class 'bool'>

# -- 删除操作 略..(懒得弄了 遇到了看egon的博客吧 (´▽｀))
# https://www.cnblogs.com/linhaifeng/articles/6384466.html#_label9

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30

← 必用模块模块导入与包→