CentOS 7中文乱码以及Python3编码异常

听说python3的编码已经统一为utf-8,本以为高枕无忧了,然鹅,今天还是给我搞出点事了。写的爬虫中有句保存文件的一直报错,文件名中包含中文。encode.jpg

最后通过设置系统区域语言解决了。

参考链接:

  1. https://stackoverflow.com/questions/41408791/python-3-unicodeencodeerror-ascii-codec-cant-encode-characters
  2. http://www.centoscn.com/CentOS/config/2015/0419/5210.html
  3. http://blog.csdn.net/u010383937/article/details/73161475

树莓派安装的centos7默认locale是POSIX,除了英文,其余的编码都不能正常显示。因此我的CentOS7一直都是各种乱码,由于并不影响我正常使用,所以一直没管它。
乱码.jpg

今天python的乱码解决后,这个问题也一并解决了。
通过

[root@centos-rpi3 ~]# locale

命令查看系统当前的locale。
systemencoding.jpg

通过

[root@centos-rpi3 ~]# locale -a
aa_DJ
aa_DJ.iso88591
aa_DJ.utf8
aa_ER
aa_ER@saaho
aa_ER.utf8
aa_ER.utf8@saaho
aa_ET
aa_ET.utf8
af_ZA
af_ZA.iso88591
af_ZA.utf8
am_ET
am_ET.utf8
an_ES
an_ES.iso885915
an_ES.utf8
ar_AE
ar_AE.iso88596
ar_AE.utf8
ar_BH
ar_BH.iso88596
ar_BH.utf8
ar_DZ
ar_DZ.iso88596
ar_DZ.utf8
ar_EG
ar_EG.iso88596
ar_EG.utf8
ar_IN
ar_IN.utf8
ar_IQ
ar_IQ.iso88596
ar_IQ.utf8
ar_JO
ar_JO.iso88596
ar_JO.utf8
ar_KW
ar_KW.iso88596
ar_KW.utf8
ar_LB
ar_LB.iso88596
ar_LB.utf8
ar_LY
ar_LY.iso88596
ar_LY.utf8
ar_MA
ar_MA.iso88596
ar_MA.utf8
ar_OM
ar_OM.iso88596
ar_OM.utf8
ar_QA
ar_QA.iso88596
ar_QA.utf8
ar_SA
ar_SA.iso88596
ar_SA.utf8
ar_SD
ar_SD.iso88596
ar_SD.utf8
ar_SY
ar_SY.iso88596
ar_SY.utf8
ar_TN
ar_TN.iso88596
ar_TN.utf8
ar_YE
ar_YE.iso88596
ar_YE.utf8
as_IN
as_IN.utf8
ast_ES
ast_ES.iso885915
ast_ES.utf8
ayc_PE
ayc_PE.utf8
az_AZ
az_AZ.utf8
be_BY
be_BY.cp1251
be_BY@latin
be_BY.utf8
be_BY.utf8@latin
bem_ZM
bem_ZM.utf8
ber_DZ
ber_DZ.utf8
ber_MA
ber_MA.utf8
bg_BG
bg_BG.cp1251
bg_BG.utf8
bho_IN
bho_IN.utf8
bn_BD
bn_BD.utf8
bn_IN
bn_IN.utf8
bo_CN
bo_CN.utf8
bo_IN
bo_IN.utf8
bokmal
bokm
br_FR
br_FR@euro
br_FR.iso88591
br_FR.iso885915@euro
br_FR.utf8
brx_IN
brx_IN.utf8
bs_BA
bs_BA.iso88592
bs_BA.utf8
byn_ER
byn_ER.utf8
C
ca_AD
ca_AD.iso885915
ca_AD.utf8
ca_ES
ca_ES@euro
ca_ES.iso88591
ca_ES.iso885915@euro
ca_ES.utf8
ca_FR
ca_FR.iso885915
ca_FR.utf8
ca_IT
ca_IT.iso885915
ca_IT.utf8
catalan
crh_UA
crh_UA.utf8
croatian
csb_PL
csb_PL.utf8
cs_CZ
cs_CZ.iso88592
cs_CZ.utf8
cv_RU
cv_RU.utf8
cy_GB
cy_GB.iso885914
cy_GB.utf8
czech
da_DK
da_DK.iso88591
da_DK.iso885915
da_DK.utf8
danish
dansk
de_AT
de_AT@euro
de_AT.iso88591
de_AT.iso885915@euro
de_AT.utf8
de_BE
de_BE@euro
de_BE.iso88591
de_BE.iso885915@euro
de_BE.utf8
de_CH
de_CH.iso88591
de_CH.utf8
de_DE
de_DE@euro
de_DE.iso88591
de_DE.iso885915@euro
de_DE.utf8
de_LU
de_LU@euro
de_LU.iso88591
de_LU.iso885915@euro
de_LU.utf8
deutsch
doi_IN
doi_IN.utf8
dutch
dv_MV
dv_MV.utf8
dz_BT
dz_BT.utf8
eesti
el_CY
el_CY.iso88597
el_CY.utf8
el_GR
el_GR.iso88597
el_GR.utf8
en_AG
en_AG.utf8
en_AU
en_AU.iso88591
en_AU.utf8
en_BW
en_BW.iso88591
en_BW.utf8
en_CA
en_CA.iso88591
en_CA.utf8
en_DK
en_DK.iso88591
en_DK.utf8
en_GB
en_GB.iso88591
en_GB.iso885915
en_GB.utf8
en_HK
en_HK.iso88591
en_HK.utf8
en_IE
en_IE@euro
en_IE.iso88591
en_IE.iso885915@euro
en_IE.utf8
en_IN
en_IN.utf8
en_NG
en_NG.utf8
en_NZ
en_NZ.iso88591
en_NZ.utf8
en_PH
en_PH.iso88591
en_PH.utf8
en_SG
en_SG.iso88591
en_SG.utf8
en_US
en_US.iso88591
en_US.iso885915
en_US.utf8
en_ZA
en_ZA.iso88591
en_ZA.utf8
en_ZM
en_ZM.utf8
en_ZW
en_ZW.iso88591
en_ZW.utf8
es_AR
es_AR.iso88591
es_AR.utf8
es_BO
es_BO.iso88591
es_BO.utf8
es_CL
es_CL.iso88591
es_CL.utf8
es_CO
es_CO.iso88591
es_CO.utf8
es_CR
es_CR.iso88591
es_CR.utf8
es_CU
es_CU.utf8
es_DO
es_DO.iso88591
es_DO.utf8
es_EC
es_EC.iso88591
es_EC.utf8
es_ES
es_ES@euro
es_ES.iso88591
es_ES.iso885915@euro
es_ES.utf8
es_GT
es_GT.iso88591
es_GT.utf8
es_HN
es_HN.iso88591
es_HN.utf8
es_MX
es_MX.iso88591
es_MX.utf8
es_NI
es_NI.iso88591
es_NI.utf8
es_PA
es_PA.iso88591
es_PA.utf8
es_PE
es_PE.iso88591
es_PE.utf8
es_PR
es_PR.iso88591
es_PR.utf8
es_PY
es_PY.iso88591
es_PY.utf8
es_SV
es_SV.iso88591
es_SV.utf8
estonian
es_US
es_US.iso88591
es_US.utf8
es_UY
es_UY.iso88591
es_UY.utf8
es_VE
es_VE.iso88591
es_VE.utf8
et_EE
et_EE.iso88591
et_EE.iso885915
et_EE.utf8
eu_ES
eu_ES@euro
eu_ES.iso88591
eu_ES.iso885915@euro
eu_ES.utf8
fa_IR
fa_IR.utf8
ff_SN
ff_SN.utf8
fi_FI
fi_FI@euro
fi_FI.iso88591
fi_FI.iso885915@euro
fi_FI.utf8
fil_PH
fil_PH.utf8
finnish
fo_FO
fo_FO.iso88591
fo_FO.utf8
fran栩s
fr_BE
fr_BE@euro
fr_BE.iso88591
fr_BE.iso885915@euro
fr_BE.utf8
fr_CA
fr_CA.iso88591
fr_CA.utf8
fr_CH
fr_CH.iso88591
fr_CH.utf8
french
fr_FR
fr_FR@euro
fr_FR.iso88591
fr_FR.iso885915@euro
fr_FR.utf8
fr_LU
fr_LU@euro
fr_LU.iso88591
fr_LU.iso885915@euro
fr_LU.utf8
fur_IT
fur_IT.utf8
fy_DE
fy_DE.utf8
fy_NL
fy_NL.utf8
ga_IE
ga_IE@euro
ga_IE.iso88591
ga_IE.iso885915@euro
ga_IE.utf8
galego
galician
gd_GB
gd_GB.iso885915
gd_GB.utf8
german
gez_ER
gez_ER@abegede
gez_ER.utf8
gez_ER.utf8@abegede
gez_ET
gez_ET@abegede
gez_ET.utf8
gez_ET.utf8@abegede
gl_ES
gl_ES@euro
gl_ES.iso88591
gl_ES.iso885915@euro
gl_ES.utf8
greek
gu_IN
gu_IN.utf8
gv_GB
gv_GB.iso88591
gv_GB.utf8
ha_NG
ha_NG.utf8
hebrew
he_IL
he_IL.iso88598
he_IL.utf8
hi_IN
hi_IN.utf8
hne_IN
hne_IN.utf8
hr_HR
hr_HR.iso88592
hr_HR.utf8
hrvatski
hsb_DE
hsb_DE.iso88592
hsb_DE.utf8
ht_HT
ht_HT.utf8
hu_HU
hu_HU.iso88592
hu_HU.utf8
hungarian
hy_AM
hy_AM.armscii8
hy_AM.utf8
ia_FR
ia_FR.utf8
icelandic
id_ID
id_ID.iso88591
id_ID.utf8
ig_NG
ig_NG.utf8
ik_CA
ik_CA.utf8
is_IS
is_IS.iso88591
is_IS.utf8
italian
it_CH
it_CH.iso88591
it_CH.utf8
it_IT
it_IT@euro
it_IT.iso88591
it_IT.iso885915@euro
it_IT.utf8
iu_CA
iu_CA.utf8
iw_IL
iw_IL.iso88598
iw_IL.utf8
ja_JP
ja_JP.eucjp
ja_JP.ujis
ja_JP.utf8
japanese
japanese.euc
ka_GE
ka_GE.georgianps
ka_GE.utf8
kk_KZ
kk_KZ.pt154
kk_KZ.utf8
kl_GL
kl_GL.iso88591
kl_GL.utf8
km_KH
km_KH.utf8
kn_IN
kn_IN.utf8
kok_IN
kok_IN.utf8
ko_KR
ko_KR.euckr
ko_KR.utf8
korean
korean.euc
ks_IN
ks_IN@devanagari
ks_IN.utf8
ks_IN.utf8@devanagari
ku_TR
ku_TR.iso88599
ku_TR.utf8
kw_GB
kw_GB.iso88591
kw_GB.utf8
ky_KG
ky_KG.utf8
lb_LU
lb_LU.utf8
lg_UG
lg_UG.iso885910
lg_UG.utf8
li_BE
li_BE.utf8
lij_IT
lij_IT.utf8
li_NL
li_NL.utf8
lithuanian
lo_LA
lo_LA.utf8
lt_LT
lt_LT.iso885913
lt_LT.utf8
lv_LV
lv_LV.iso885913
lv_LV.utf8
mag_IN
mag_IN.utf8
mai_IN
mai_IN.utf8
mg_MG
mg_MG.iso885915
mg_MG.utf8
mhr_RU
mhr_RU.utf8
mi_NZ
mi_NZ.iso885913
mi_NZ.utf8
mk_MK
mk_MK.iso88595
mk_MK.utf8
ml_IN
ml_IN.utf8
mni_IN
mni_IN.utf8
mn_MN
mn_MN.utf8
mr_IN
mr_IN.utf8
ms_MY
ms_MY.iso88591
ms_MY.utf8
mt_MT
mt_MT.iso88593
mt_MT.utf8
my_MM
my_MM.utf8
nan_TW@latin
nan_TW.utf8@latin
nb_NO
nb_NO.iso88591
nb_NO.utf8
nds_DE
nds_DE.utf8
nds_NL
nds_NL.utf8
ne_NP
ne_NP.utf8
nhn_MX
nhn_MX.utf8
niu_NU
niu_NU.utf8
niu_NZ
niu_NZ.utf8
nl_AW
nl_AW.utf8
nl_BE
nl_BE@euro
nl_BE.iso88591
nl_BE.iso885915@euro
nl_BE.utf8
nl_NL
nl_NL@euro
nl_NL.iso88591
nl_NL.iso885915@euro
nl_NL.utf8
nn_NO
nn_NO.iso88591
nn_NO.utf8
no_NO
no_NO.ISO-8859-1
norwegian
nr_ZA
nr_ZA.utf8
nso_ZA
nso_ZA.utf8
nynorsk
oc_FR
oc_FR.iso88591
oc_FR.utf8
om_ET
om_ET.utf8
om_KE
om_KE.iso88591
om_KE.utf8
or_IN
or_IN.utf8
os_RU
os_RU.utf8
pa_IN
pa_IN.utf8
pap_AN
pap_AN.utf8
pa_PK
pa_PK.utf8
pl_PL
pl_PL.iso88592
pl_PL.utf8
polish
portuguese
POSIX
ps_AF
ps_AF.utf8
pt_BR
pt_BR.iso88591
pt_BR.utf8
pt_PT
pt_PT@euro
pt_PT.iso88591
pt_PT.iso885915@euro
pt_PT.utf8
romanian
ro_RO
ro_RO.iso88592
ro_RO.utf8
ru_RU
ru_RU.iso88595
ru_RU.koi8r
ru_RU.utf8
russian
ru_UA
ru_UA.koi8u
ru_UA.utf8
rw_RW
rw_RW.utf8
sa_IN
sa_IN.utf8
sat_IN
sat_IN.utf8
sc_IT
sc_IT.utf8
sd_IN
sd_IN@devanagari
sd_IN.utf8
sd_IN.utf8@devanagari
se_NO
se_NO.utf8
shs_CA
shs_CA.utf8
sid_ET
sid_ET.utf8
si_LK
si_LK.utf8
sk_SK
sk_SK.iso88592
sk_SK.utf8
slovak
slovene
slovenian
sl_SI
sl_SI.iso88592
sl_SI.utf8
so_DJ
so_DJ.iso88591
so_DJ.utf8
so_ET
so_ET.utf8
so_KE
so_KE.iso88591
so_KE.utf8
so_SO
so_SO.iso88591
so_SO.utf8
spanish
sq_AL
sq_AL.iso88591
sq_AL.utf8
sq_MK
sq_MK.utf8
sr_ME
sr_ME.utf8
sr_RS
sr_RS@latin
sr_RS.utf8
sr_RS.utf8@latin
ss_ZA
ss_ZA.utf8
st_ZA
st_ZA.iso88591
st_ZA.utf8
sv_FI
sv_FI@euro
sv_FI.iso88591
sv_FI.iso885915@euro
sv_FI.utf8
sv_SE
sv_SE.iso88591
sv_SE.iso885915
sv_SE.utf8
swedish
sw_KE
sw_KE.utf8
sw_TZ
sw_TZ.utf8
szl_PL
szl_PL.utf8
ta_IN
ta_IN.utf8
ta_LK
ta_LK.utf8
te_IN
te_IN.utf8
tg_TJ
tg_TJ.koi8t
tg_TJ.utf8
thai
th_TH
th_TH.tis620
th_TH.utf8
ti_ER
ti_ER.utf8
ti_ET
ti_ET.utf8
tig_ER
tig_ER.utf8
tk_TM
tk_TM.utf8
tl_PH
tl_PH.iso88591
tl_PH.utf8
tn_ZA
tn_ZA.utf8
tr_CY
tr_CY.iso88599
tr_CY.utf8
tr_TR
tr_TR.iso88599
tr_TR.utf8
ts_ZA
ts_ZA.utf8
tt_RU
tt_RU@iqtelif
tt_RU.utf8
tt_RU.utf8@iqtelif
turkish
ug_CN
ug_CN.utf8
uk_UA
uk_UA.koi8u
uk_UA.utf8
unm_US
unm_US.utf8
ur_IN
ur_IN.utf8
ur_PK
ur_PK.utf8
uz_UZ
uz_UZ@cyrillic
uz_UZ.iso88591
uz_UZ.utf8@cyrillic
ve_ZA
ve_ZA.utf8
vi_VN
vi_VN.utf8
wa_BE
wa_BE@euro
wa_BE.iso88591
wa_BE.iso885915@euro
wa_BE.utf8
wae_CH
wae_CH.utf8
wal_ET
wal_ET.utf8
wo_SN
wo_SN.utf8
xh_ZA
xh_ZA.iso88591
xh_ZA.utf8
yi_US
yi_US.cp1255
yi_US.utf8
yo_NG
yo_NG.utf8
yue_HK
yue_HK.utf8
zh_CN
zh_CN.gb18030
zh_CN.gb2312
zh_CN.gbk
zh_CN.utf8
zh_HK
zh_HK.big5hkscs
zh_HK.utf8
zh_SG
zh_SG.gb2312
zh_SG.gbk
zh_SG.utf8
zh_TW
zh_TW.big5
zh_TW.euctw
zh_TW.utf8
zu_ZA
zu_ZA.iso88591
zu_ZA.utf8

查看所有已经安装的locale,可以看到是有中文的。
通过这一句命令设置locale
localectl set-locale LANG=zh_CN.utf8
等于号后面是你要设置的locale,从上面查看的列表选一个,这里我选utf-8编码的简体中文。
设置好了后需要退出再重新登录,查看系统的locale

[root@centos-rpi3 ~]# locale
LANG=zh_CN.utf8
LC_CTYPE="zh_CN.utf8"
LC_NUMERIC="zh_CN.utf8"
LC_TIME="zh_CN.utf8"
LC_COLLATE="zh_CN.utf8"
LC_MONETARY="zh_CN.utf8"
LC_MESSAGES="zh_CN.utf8"
LC_PAPER="zh_CN.utf8"
LC_NAME="zh_CN.utf8"
LC_ADDRESS="zh_CN.utf8"
LC_TELEPHONE="zh_CN.utf8"
LC_MEASUREMENT="zh_CN.utf8"
LC_IDENTIFICATION="zh_CN.utf8"
LC_ALL=

可以看到已经变成设置的值了。现在系统的文件中中文都显示正常,python脚本也可以成功运行了。
normal.jpg

如果运行脚本时遇到这个错

/usr/bin/python^M: 解释器错误: 没有那个文件或目录

那是文件编码的问题,脚本在Windows编写又拿到Linux上运行了。把文件的换行符换成Unix格式的即可。顺便说下,如果python是自己编译安装,默认路径是/usr/local/bin/python3,此时,脚本文件第一行指定脚本解释器的要写成自己的路径#!/usr/local/bin/python3,不要照搬其他人写的的了。

标签: none

添加新评论

ali-01.gifali-58.gifali-09.gifali-23.gifali-04.gifali-46.gifali-57.gifali-22.gifali-38.gifali-13.gifali-10.gifali-34.gifali-06.gifali-37.gifali-42.gifali-35.gifali-12.gifali-30.gifali-16.gifali-54.gifali-55.gifali-59.gif

加载中……