Uzanto:Maksim/aald
Aspekto
- ivsen.py
# -*- coding: UTF-8 -*-
__version__='$Id:'
import wikipedia, pagegenerators
import re, sys, os
import codecs
import perevod,imagetransfer1
from bib_tekst import *
from get_buf import *
from bib_kateg import *
te_bildoj = {
'ru':u'изображений',
'eo':u'bildoj',
}
te_ekslj= {
'ru':u'внешних ссылок',
'eo':u'eksteraj ligiloj',
}
te_komunejoj = {
'ru':u'викискладов',
'eo':u'komunejoj',
}
te_ligoj = {
'ru':u'ссылок',
'eo':u'ligoj',
}
te_tabeloj = {
'ru':u'таблиц',
'eo':u'tabeloj',
}
te_kategorioj = {
'ru':u'категорий',
'eo':u'kategorioj',
}
te_vi_di = {
'ru':u'см. обсуждение',
'eo':u'vidu diskuton',
}
te_diskuto= {
'ru':u'обсуждение',
'eo':u'diskuto',
}
te_al_bildoj = {
'ru':u'Дополнительные изображения',
'eo':u'Aldonaj bildoj',
}
te_al_ekslj= {
'ru':u'Дополнительные внешние ссылки',
'eo':u'Aldonaj eksteraj ligiloj',
}
te_aldon= {
'ru':u'Добавления',
'eo':u'Aldonaĵoj',
}
te_aldon_kat= {
'ru':u'Категория:Добавления для статей',
'eo':u'Kategorio:Aldonajxoj por artikoloj',
}
def vivod(b):
wikipedia.output(b)
otch.write(b)
otch.flush()
def ubrkoment (text):
# Ignore tekst within nowiki tags and HTML comments
nowikiOrHtmlCommentR = re.compile(r'<nowiki>.*?|<!--.*?-->', re.IGNORECASE | re.DOTALL)
match = nowikiOrHtmlCommentR.search(text)
while match:
text = text[:match.start()] + text[match.end():]
match = nowikiOrHtmlCommentR.search(text)
text=text.replace(u'-->',u'-- >')
return text
def prov_v_koment (text,pm):
# Ignore tekst within nowiki tags and HTML comments
nowikiOrHtmlCommentR = re.compile(r'.*?|<!--.*?-->', re.IGNORECASE | re.DOTALL)
for match in nowikiOrHtmlCommentR.finditer(text):
for p in pm:
if p>=match.start() and p<match.end():
return 1
return 0
def provcifr(t):
i=ord(t[0])
return (i>=ord(u'0')) & (i<=ord(u'9'))
def prov_te_format_kart(t):
minrzm=50
t=ubr_nk_prob(t)
if (t==u'thumb') or (t==u'thumbnail') or (t==u'frame'):
return 1
if (t==u'left') or (t==u'center') or (t==u'right'):
return 1
if len(t)<3:
return 0
x=0
i=0
while i<6 and i<len(t) and provcifr(t[i]):
x=x*10+ord(t[i])-ord(u'0')
i+=1
if (i>=1) and (t[i:]==u'px'):
if x<minrzm:
return 2
return 1
return 0
def linkedPages_podp(site,thistxt):
"""Gives the normal (not-interwiki, non-category) pages the page
links to, as a list of Page objects
"""
result = []
result_podp = []
thistxt = wikipedia.removeLanguageLinks(thistxt)
thistxt = wikipedia.removeCategoryLinks(thistxt, site)
thistxt = ubrkoment (thistxt)
# [[(?P<title>[^]|]*)(|[^]]*)?]]
i=0
Rlink = re.compile(r'\[\[(?P<title>[^\]\|]*)(\|[^\]]*)?\]\]')
for match in Rlink.finditer(thistxt):
title = match.group('title')
if site.lang == 'eo':
title = wikipedia.resolveEsperantoXConvention(title)
page = wikipedia.Page(site, title)
podp=u''
j=iskat(thistxt[match.start():match.end()],u'|')
if j!=-1:
j+=match.start()+1
kon=iskkonpodp(thistxt[j:],0,u'[[',u']]')
if kon!=-1:
podp=thistxt[j:j+kon]
result.append(page)
result_podp.append(podp)
i+=1
return (result,result_podp)
def razb_arg(t):
r=[]
p0=0
while p0<len(t):
p1=iskats(t,p0,u'|')
if p1==-1:
p1=len(t)
a=t[p0:p1]
a=ubr_nk_prob(a)
r.append(a)
p0=p1+1
return r
def image_podp(site,thistxt):
"""
Gives the images the page shows, as a list of Page objects.
This includes images in galleries.
"""
result = []
# Find normal images
(res1,res1_podp)=linkedPages_podp(site,thistxt)
j=0
while j<len(res1):
if res1[j].isImage():
result.append(imagetransfer1.Izobr(res1[j].title(),site.lang,res1_podp[j]))
j+=1
# Find images in galleries
galleryR = re.compile('<gallery>.*?</gallery>', re.DOTALL)
galleryEntryR = re.compile('(?P<title>(%s|%s):.+?)(\|.+)?\n' % (site.image_namespace(), site.family.image_namespace(code = '_default')))
for gallery in galleryR.findall(thistxt):
# for match in galleryEntryR.finditer(gallery):
# page = wikipedia.Page(site, match.group('title'))
# result.append(imagetransfer1.Izobr(page.title(),site.lang,u''))
for match in gallery.split(u'\n'):
# vivod(u'match=%s\n'%match)
podp=u''
p=iskat(match,u'|')
if p!=-1:
podp=ubr_nk_prob(match[p+1:])
else:
p=len(match)
ti=ubr_nk_prob(match[:p])
# vivod(u'ti=%s\n'%ti)
if len(ti)<1:
continue
page = wikipedia.Page(site, ti)
if not page.isImage():
continue
result.append(imagetransfer1.Izobr(page.title(),site.lang,podp))
j=0
while j<len(result):
t=result[j].iz
p=iskat(t,u':')
if p!=-1:
result[j].iz=t[p+1:]
t=result[j].podp
tn=u''
fpred=0
am=razb_arg(t)
for a in am:
ptf=prov_te_format_kart(a)
if ptf==0:
if fpred:
tn=tn+'|'
fpred=1
tn=tn+a
elif ptf==2:
result[j].tip=100
vivod(u' - m px %s\n'%result[j].iz)
break
result[j].podp=tn
j+=1
return result
class Ssilk:
def __init__(self, ss,jaz,podp):
self.ss = ss
self.jaz = jaz
self.podp = podp
def sravn(self,e1):
return self.ss!=e1.ss
# def tekst(self):
# return u'[[%s]]'%self.ss
def isk_ssilk(site,thistxt):
result = []
(res1,res1_podp)=linkedPages_podp(site,thistxt)
j=0
while j<len(res1):
if not res1[j].isImage():
result.append(Ssilk(res1[j].title(),site.lang,res1_podp[j]))
j+=1
return result
def isk_ssilk_smt(site,thistxt):
result = []
st=thistxt.split(u'\n')
for t in st:
p=iskats(t,0,u'[[')
if p==-1 or p>4:
continue
(res1,res1_podp)=linkedPages_podp(site,t)
j=0
# while j<len(res1):
if len(res1)>=1:
if not res1[j].isImage():
result.append(Ssilk(res1[j].title(),site.lang,res1_podp[j]))
j+=1
return result
def perv_upper(t):
if len(t)<1:
return u''
return t[0].upper()+t[1:]
class Shabl:
def __init__(self, nazv,jaz,arg):
self.nazv=perv_upper(nazv)
self.jaz=jaz
self.arg=[]
self.neisp=0
for a in arg:
self.arg.append(a)
def sravn(self,e1):
if len(self.arg)!=len(e1.arg):
return 1
if self.nazv!=e1.nazv:
return 1
for i in range(len(self.arg)):
if self.arg[i]!=e1.arg[i]:
return 1
return 0
def sravn_com(self,e1):
if len(self.arg)<1 or len(e1.arg)<1:
return 1
if self.nazv!=e1.nazv:
return 1
if (perv_upper(self.arg[0]).replace(u'_',u' ')!=
perv_upper(e1.arg[0]).replace(u'_',u' ')):
return 1
return 0
def tekst(self):
t=u'{{'+self.nazv
for a in self.arg:
t+=u'|'+a
t+=u'}}'
return t
def tekstn(self,n):
t=u'{{'+self.nazv
i=0
while i<n and i<len(self.arg):
a=self.arg[i]
t+=u'|'+a
i+=1
t+=u'}}'
return t
def isk_shabl(jaz,t,z):
r=[]
zm=[]
for z1 in z:
zm.append(z1[0].lower()+z1[1:])
p0=0
while p0<len(t):
p=iskats(t,p0,u'{{')
if p==-1:
break
pn=p+2
# if pn>=len(t)-1:
# break
# if t[pn]==u' ' or t[pn]==u'\n':
# pn+=1
# if t[pn:].startswith(z)) or t[pn:].startswith(z):
pk=iskkonpodp(t,pn,u'{{',u'}}')
if pk==-1:
break
am=razb_arg(t[pn:pk])
if len(am)>=1:
fsovp=0
for z1 in z:
if am[0]==z1:
fsovp=1
for z1 in zm:
if am[0]==z1:
fsovp=1
if fsovp:
am1=[]
for a in am[1:]:
am1.append(a)
r.append(Shabl(am[0],jaz,am1))
p0=pk+2
return r
def korr_com_shabl(sh):
if len(sh.arg)<1:
sh.neisp=1
# vivod(u' <-shcom len(arg)=0\n')
return
if sh.nazv==u'Commonscat':
sh.nazv=u'Commons'
sh.arg[0]=u'Category:'+sh.arg[0]
if sh.nazv!=u'Commons':
return
if sh.arg[0].startswith(u':Category:'):
sh.arg[0]=sh.arg[0][1:]
if sh.arg[0].startswith(u'Category:'):
sh.arg[0]=sh.arg[0][:9]+perv_upper(sh.arg[0][9:])
sh.arg[0]=sh.arg[0].replace(u'_',u' ')
class Eksl:
def __init__(self, adr,jaz,podp):
p=iskats(adr,8,'/')
if p==-1:
p=len(adr)
adr=adr[:p].lower()+adr[p:]
self.adr = adr
self.jaz = jaz
self.podp = podp
self.podp2 = u''
self.tip=100 # >=80 =nelzja ispolz
self.tip2=100
self.prioritet=0
self.njazisp=1
self.njazisp2=0
def vz_prioritet(self):
return self.prioritet
def sravn(self,e1):
return self.adr!=e1.adr
def tekst(self):
return u'%s %s'%(self.adr,self.podp)
def pertekst(self):
return u'%s %s'%(self.adr,self.perpodp.t)
def isk_eksl(jaz,t):
r=[]
p0=0
while p0<len(t):
p=iskats(t,p0,u'http://')
if p==-1:
break
flks=((p>0 and t[p-1]==u'[') or (p>1 and t[p-2:p]==u'[ '))
if flks:
pk=iskkonpodp(t,p,u'[',u']')
if pk==-1:
flks=0
if not flks:
pk=iskats(t,p,u'\n')
if pk==-1:
pk=len(t)
(p1,np1)=iskats_mn(t[:pk],p,[u' ',u'}',u')',u'<',u'>'])
if p1==-1:
p1=pk
adr=t[p:p1]
podp=t[p1+1:pk]
r.append(Eksl(adr,jaz,podp))
p0=p1
shm=isk_shabl(jaz,t,(u'El2',))
for sh in shm:
if len(sh.arg)>=1:
podp=u''
if len(sh.arg)>=2:
podp=sh.arg[1]
r.append(Eksl(u'http://'+sh.arg[0],jaz,podp))
for el in r:
while len(el.adr)>=1 and el.adr[len(el.adr)-1]==u'/':
el.adr=el.adr[:len(el.adr)-1]
return r
class Tabl:
def __init__(self, jaz,te):
self.jaz = jaz
self.te = te
def sravn(self,e1):
return self.te!=e1.te
def tekst(self):
return self.te+u'\n'
def isk_tabl(jaz,t):
ost=[u'{|',u'<table']
zst=[u'|}',u'</table>']
r=[]
p0=0
while p0<len(t):
(p,n)=iskats_mn(t,p0,ost)
if p==-1:
break
os=ost[n]
zs=zst[n]
pk=iskkonpodp(t,p+len(os),os,zs)
if pk==-1:
break
pk+=len(zs)
r.append(Tabl(jaz,t[p:pk]))
p0=pk
return r
t_zag_eksl = {
u'eo': (u'Eksteraj ligiloj', u'Ekstera ligilo', u'Eksteraj ligoj',
u'Ekstera ligo', u'Rete'),
u'io': (u'Externa ligili', u'Extera ligili'),
u'ru': (u'Внешние ссылки', u'Внешняя ссылка'),
}
t_zag_vian = {
u'de': (u'Siehe auch',u'Weitere themen'),
u'en': (u'See also',u'Miscellaneous topics'),
u'eo': (u'Vidu ankaux jenon:',u'Vidu ankaux jenon',u'Vidu ankaux'),
u'fr': (u'Voir aussi',),
u'io': (u'Videz anke',),
u'nl': (u'Zie ook',),
u'ru': (u'См. также', u'Смотри также', u'Смотрите также'),
}
def gl_zagolov(jaz,zag):
if not zag.has_key(jaz):
return u'*'
return zag[jaz][0]
def isk_zagolov(t,jaz,zag):
if not zag.has_key(jaz):
return (len(t),len(t))
tm=t.lower()
zj=zag[jaz]
p1=len(t)
while p1>1:
# if t[p1-1]==u'\n':
# p1-=1
p0=iskato(t,p1-2,u'\n')+1
for z in zj:
zm=z.lower()
i=iskats(tm[:p1],p0,zm)
if i!=-1:
fll=0
for c in t[p0:i]:
if c.isalnum():
fll=1
for c in t[i+len(z):p1]:
if c.isalnum():
fll=1
if prov_v_koment(t,(i,i+len(z),p1)):
fll=1
if fll==0:
return (p0,p1)
p1=p0
return (len(t),len(t))
def isk_kon_zagolov(t,p0,p1):
i=0
while p0+i<len(t) and t[p0+i]==u'=':
i+=1
if i==0:
i=100
if p1>0 and t[p1-1]==u'\n':
p1-=1
p=p1
while 1:
p=iskats(t,p,u'\n=')
if p==-1:
return len(t)
p+=1
j=1
while p+j<len(t) and t[p+j]==u'=':
j+=1
if j<=i:
return p
class Perev_podp:
def __init__(self,t):
self.t = t
self.ps = perevod.Perev_stat()
# self.ps = ps
def objed_podp(podp,podp1):
# if podp==u'':
# podp=podp1
# elif podp1!=u'':
# podp+=u' '+podp1
if podp.ps.nup+podp.ps.nvap < podp1.ps.nup+podp1.ps.nvap:
podp=podp1
return podp
def perevod_ob(slov,vhjaz,vihjaz,podp):
if podp==u'' or podp==u' ':
# return u''
return Perev_podp(u'')
(podpperev,st_perev)=perevod.perevod_stat(slov,vhjaz,vihjaz,podp)
# if fl_perev:
# podp=u'%s:%s %s->%s:%s' % (vhjaz,podp,vhjaz,vihjaz,podpperev)
# else:
# podp=u'%s:%s' % (vhjaz,podp)
# return podp
r=Perev_podp(podpperev)
r.ps=st_perev
return r
def prov_tekst_uzxe(t,s):
#X!!!
return iskats(t,0,s)!=-1
def prov_dobav_katcom1(nscom,zapriz,katcomsp,n):
n=perv_upper(ubr_nk_prob(n))
vivod(u' prov_dobav_katcom1 %s\n'%n)
p=iskats(n,0,u'#')
if p!=-1:
n=n[:p]
vivod(u' prov_dobav_katcom1# %s\n'%n)
t=u'Category:'+n
if nscom.has_key(t) and (not zapriz.has_key(n)):
t1=u'[['+t+u']]\n'
if not t1 in katcomsp:
katcomsp.append(t1)
vivod(u' <-- ++\n')
return 1
return 0
def prov_dobav_katcom(nscom,zapriz,katcomsp,vhs,jaz):
prov_dobav_katcom1(nscom,zapriz,katcomsp,vhs)
if jaz==u'en':
vhok=vhs[len(vhs)-1]
if vhok==u'y':
vhs1=vhs[:len(vhs)-1]+u'ies'
prov_dobav_katcom1(nscom,zapriz,katcomsp,vhs1)
else:
vhs1=vhs+u's'
prov_dobav_katcom1(nscom,zapriz,katcomsp,vhs1)
vhs1=vhs+u'es'
prov_dobav_katcom1(nscom,zapriz,katcomsp,vhs1)
def shcom_v_katcom(nscom,zapriz,katcomsp,sht):
vivod(u' shcom_v_katcom0 %d\n'%len(sht))
for sh in sht:
if sh.neisp or len(sh.arg)<1:
continue
n=sh.arg[0]
vivod(u' shcom_v_katcom0 %s\n'%n)
n2=n[iskat(n,u':')+1:]
usp=prov_dobav_katcom1(nscom,zapriz,katcomsp,n2)
if usp:
continue
sico=wikipedia.getSite(code = u'commons',fam = u'commons')
pli = wikipedia.Page(sico,n)
if pli.isCategory():
continue
try:
get1(pli)
ti = pli.get()
except wikipedia.NoPage:
b = u' - %s:%s\n' % (pli.site().lang,pli.title())
vivod(b)
continue
except wikipedia.IsRedirectPage:
n1=pli.getRedirectTarget()
b = u' + %s:%s\n -> %s\n' % (pli.site().lang,pli.title(),n1)
vivod(b)
n2=n1[iskat(n1,u':')+1:]
vivod(u' shcom_v_katcom %s\n'%n2)
usp=prov_dobav_katcom1(nscom,zapriz,katcomsp,n2)
if usp:
continue
pli = wikipedia.Page(pli.site(),pli.getRedirectTarget())
if pli.isCategory():
continue
try:
get1(pli)
ti = pli.get()
except wikipedia.NoPage:
b = u' - %s:%s\n' % (pli.site().lang,pli.title())
vivod(b)
continue
except wikipedia.IsRedirectPage:
n1=pli.getRedirectTarget()
n2=n1[iskat(n1,u':')+1:]
vivod(u' shcom_v_katcom %s\n'%n2)
prov_dobav_katcom1(nscom,zapriz,katcomsp,n2)
b = u' + %s:%s\n -> %s\n' % (pli.site().lang,pli.title(),pli.getRedirectTarget())
vivod(b)
continue
otch2.write(u'======shcom_v_katcom ti======\n%s\n-=======\n%s\n===============================\n' % (pli.title(),ti))
otch2.flush()
plikat = pli.categories()
vivod(u' shcom_v_katcom len plikat %d\n'%len(plikat))
for pka in plikat:
n1=pka.title()
vivod(u' shcom_v_katcom1 %s\n'%n1)
n2=n1[iskat(n1,u':')+1:]
prov_dobav_katcom1(nscom,zapriz,katcomsp,n2)
def shcom_prov_uzxe_povt(shcomi,shcomo):
for j in range(len(shcomo)):
sh=shcomo[j]
vivod(u' %s\n'%sh.tekst())
if sh.neisp:
vivod(u' <-neisp shcom\n')
continue
fuzxe=0
for sh0 in shcomi:
if not sh.sravn_com(sh0):
fuzxe=1
break
if fuzxe:
shcomo[j].neisp=1
vivod(u' <-uzxe shcom\n')
continue
fpovt=0
for k in range(j):
if (not shcomo[k].neisp) and (not sh.sravn_com(shcomo[k])):
fpovt=1
break
if fpovt:
shcomo[j].neisp=1
vivod(u' <-povt shcom\n')
continue
def put_prov(pl,nt,komm,me):
pl.put(nt, comment = komm, minorEdit = me)
return
ot=pl.get()
i=0
while i<3:
pl.put(nt, comment = komm, minorEdit = me)
pl1=wikipedia.Page(pl.site(), pl.title())
try:
t1=pl1.get()
except wikipedia.NoPage:
b = u' put_prov: - %s\n' % pl.title()
vivod(b)
t1=u''
except wikipedia.IsRedirectPage:
b = u' put_prov: - %s\n -> %s\n' % (pl.title(),pl.getRedirectTarget())
vivod(b)
return
if t1!='' and ot!=t1:
break
vivod(u'!!!! neud put() %s'%pl.title())
pl=pl1
i+=1
def vibor_iwi(tpl):
nmax=8
tpri={u'en': 0, u'de': 1, u'ja': 2, u'fr': 3, u'pl': 4, u'nl': 5, u'it': 6,
u'es': 7, u'sv': 8, u'pt': 9, u'zh': 10, u'he': 11, u'ru': 12,
u'no': 13, u'fi': 14, u'cs': 15, u'da': 16, u'hu': 17, u'eo': 18,
u'ca': 19, u'bg': 20, u'sl': 21, u'sk': 22, u'et': 23, u'sr': 24,
u'id': 25, u'ro': 26, u'nn': 27, u'ko': 28, u'gl': 29, u'hr': 30,
u'uk': 31, u'lt': 32, u'ms': 33, u'vi': 34, u'tr': 35, u'th': 36,
u'io': 37, u'el': 38, u'ar': 39, u'is': 40, u'lb': 41, u'tt': 42,
u'simple': 43, u'bs': 44, u'ka': 45, u'fa': 46, u'sh': 47, u'af': 48,
u'br': 49, u'la': 50,}
ts=[]
for i in range(len(tpl)):
jaz=tpl[i].site().lang
z=1000
if tpri.has_key(jaz):
z=tpri[jaz]
ts.append((z,i))
vivod(u'vibor_iwi1: z=%d jaz=%s\n'%(z,tpl[i].site().lang))
ts.sort()
ntpl=[]
n=0
for z,i in ts:
ntpl.append(tpl[i])
vivod(u'vibor_iwi2: z=%d jaz=%s\n'%(z,tpl[i].site().lang))
n+=1
if n>=nmax:
vivod(u'vibor_iwi: otbr!\n')
break
return ntpl
def rabota(slov,nscom,nssvoj,zapriz,tbl_pl,plprov,fproviz):
si0=plprov.site()
pl=wikipedia.Page(si0, plprov.title())
vivod(u'%s\n'%pl.title())
try:
plprov=get_tbl(tbl_pl,plprov)
ot = plprov.get()
except wikipedia.NoPage:
b = u'- %s\n' % plprov.title()
vivod(b)
return
except wikipedia.IsRedirectPage:
b = u'+ %s\n-> %s\n' % (plprov.title(), plprov.getRedirectTarget())
vivod(b)
return
if len(plprov.interwiki())<1:
vivod(u'-iwi\n')
return
try:
ot = pl.get()
except wikipedia.NoPage:
b = u'-2 %s\n' % pl.title()
vivod(b)
return
except wikipedia.IsRedirectPage:
b = u'+2 %s\n-> %s\n' % (pl.title(),pl.getRedirectTarget())
vivod(b)
return
otch.write(u'+ %s\n' % pl.title())
otch.flush()
otch2.write(u'%s\n-=======\n%s\n========\n' % (pl.title(),ot))
otch2.flush()
oiwi = pl.interwiki()
niwi = {}
for pl2 in oiwi:
if pl.site()!=pl2.site():
niwi[pl2.site()] = pl2
if len(niwi)<1:
vivod(u'-iwi2\n')
return
plkat = pl.categories()
nt = wikipedia.removeLanguageLinks(ot)
# nt = wikipedia.removeCategoryLinks(nt,pl.site())
nt = nt.replace(u'\r',u'')
while u' \n' in nt:
nt = nt.replace(u' \n',u'\n')
if len(nt)<1 or nt[len(nt)-1]!=u'\n':
nt+=u'\n'
fzam=0
jaz0=pl.site().lang
ntbk=ubrkoment(nt)
shapam=pl.site().family.disambig(jaz0)
fapa=0
for shapa in shapam:
if (iskats(ot,0,u'{{'+shapa+'}}')!=-1 or
iskats(ot,0,u'{{'+shapa[0].upper()+shapa[1:]+'}}')!=-1):
fapa=1
break
if fapa:
b = u'apa %s\n' % pl.title()
vivod(b)
return
res0=image_podp(pl.site(),ntbk)
vivod(u'len(res0)=%d\n'%len(res0))
kati=[]
for pka in plkat:
kati.append(Kateg(jaz0,pka.title(),1))
nomio=[(pl.title(),jaz0)]
shcomi=[]
eksli=[]
ssilki=[]
shcomi=isk_shabl(jaz0,ntbk,(u'Commons',u'Commonscat'))
for sh in shcomi:
korr_com_shabl(sh)
if not fproviz:
# eksli=isk_eksl(jaz0,ntbk)
ssilki=isk_ssilk(pl.site(),ntbk)
reso=[]
shcomo=[]
ekslo=[]
ssilko=[]
tablo=[]
kato=[]
itbot = imagetransfer1.ImageTransferBot(
targetSite = wikipedia.getSite(),otch=otch,opis=otch2,
otchnc=otchnc,nscom=nscom,otchkc=otchkc,otchup=otchup)
itbot.tbl_pl=tbl_pl
# for j in range(len(res0)):
# res0[j].tip=10
# iz=res0[j].iz
# b2=res0[j].podp
# b2=b2.replace(u'\n',u' ')
# b = u' b0 %s %s\n' % (iz,b2)
# vivod(b)
# itbot.zagruz_info(res0,1)
# if fproviz:
# fnajd=0
# itbot.prov_nekatco(res0)
# for j in range(len(res0)):
# if res0[j].nekatco:
# fnajd=1
# if not fnajd:
# return
niwisp=[]
for pli in niwi.itervalues():
niwisp.append(pli)
for pli in vibor_iwi(niwisp):
# if pli.site().lang!=u'en' and pli.site().lang!=u'hr':
# continue
try:
# get1(pli)
pli=get_tbl(tbl_pl,pli)
ti = pli.get()
except wikipedia.NoPage:
b = u' - %s:%s\n' % (pli.site().lang,pli.title())
vivod(b)
continue
except wikipedia.IsRedirectPage:
b = u' + %s:%s\n -> %s\n' % (pli.site().lang,pli.title(),pli.getRedirectTarget())
vivod(b)
pli = wikipedia.Page(pli.site(),pli.getRedirectTarget())
try:
# get1(pli)
pli=get_tbl(tbl_pl,pli)
ti = pli.get()
except wikipedia.NoPage:
b = u' - %s:%s\n' % (pli.site().lang,pli.title())
vivod(b)
continue
except wikipedia.IsRedirectPage:
b = u' + %s:%s\n -> %s\n' % (pli.site().lang,pli.title(),pli.getRedirectTarget())
vivod(b)
continue
if pli.isCategory():
b = u' +kat %s:%s\n' % (pli.site().lang,pli.title())
vivod(b)
continue
if pli.isImage():
b = u' +bil %s:%s\n' % (pli.site().lang,pli.title())
vivod(b)
continue
b = u' + %s:%s\n' % (pli.site().lang,pli.title())
vivod(b)
ti=ubrkoment(ti)
ti = ti.replace(u'\r',u'')
while u' ' in ti:
ti = ti.replace(u' ',u' ')
while u' \n' in ti:
ti = ti.replace(u' \n',u'\n')
(p0,p1)=isk_zagolov(ti,pli.site().lang,t_zag_vian)
p2=isk_kon_zagolov(ti,p0,p1)
plikat = pli.categories()
for pka in plikat:
kato.append(Kateg(pli.site().lang,pka.title(),1))
nomio.append( (pli.title(),pli.site().lang) )
shcoma=isk_shabl(pli.site().lang,ti,(u'Commons',u'Commonscat'))
for sh in shcoma:
korr_com_shabl(sh)
shcomo.append(sh)
if not fproviz:
if len(res0)==0:
res1=image_podp(pli.site(),ti)
for iz in res1:
reso.append(iz)
# eksla=isk_eksl(pli.site().lang,ti)
# for eksl in eksla:
# ekslo.append(eksl)
ssilka=isk_ssilk_smt(pli.site(),ti[p1:p2])
for ssilk in ssilka:
ssilko.append(ssilk)
# tabla=isk_tabl(pli.site().lang,ti)
# for tabl in tabla:
# tablo.append(tabl)
nnov_shcom=0
nnov_eksl=0
nnov_ssilk=0
nnov_tabl=0
nnov_kat=0
ntdi=u''
nnovdi_eksl=0
sico=wikipedia.getSite(code = u'commons',fam = u'commons')
nt_shcom=u''
shcom_prov_uzxe_povt(shcomi,shcomo)
if len(shcomo)>0:
nkpshpl=[]
for sh in shcomo+shcomi:
if sh.neisp:
continue
sh.pl=wikipedia.Page(sico,sh.arg[0])
nkpshpl.append(sh.pl)
sozd_buf_tbl(tbl_pl,nkpshpl,otch)
for sh in shcomo+shcomi:
if sh.neisp:
continue
try:
sh.pl=get_tbl(tbl_pl,sh.pl)
sh.pl.get()
b = u' + shcom:%s\n' % sh.pl.title()
vivod(b)
sh.arg[0]=sh.pl.title()
except wikipedia.NoPage:
b = u' - shcom:%s\n' % sh.pl.title()
vivod(b)
sh.neisp=1
except wikipedia.IsRedirectPage:
b = u' + shcom:%s\n -> %s\n' % (sh.pl.title(),
sh.pl.getRedirectTarget())
vivod(b)
sh.arg[0]=sh.pl.getRedirectTarget()
shcom_prov_uzxe_povt(shcomi,shcomo)
for j in range(len(shcomo)):
if shcomo[j].neisp:
continue
nt_shcom+= u'%s\n'%shcomo[j].tekstn(1)
nnov_shcom+=1
fzam=1
if nnov_shcom:
nt_shcom+= u'<br clear=all>\n'
nt_tabl=u''
for j in range(len(tablo)):
ta=tablo[j]
tat=ta.tekst().replace(u'\n',u' ')
vivod(u' tabl: %s\n'%tat)
if prov_tekst_uzxe(nt,ta.tekst()):
vivod(u' <-uzxe tabl\n')
continue
fpovt=0
for k in range(j):
if not ta.sravn(tablo[k]):
fpovt=1
break
if fpovt:
vivod(u' <-povt tabl\n')
continue
tap=perevod_ob(slov,ta.jaz,jaz0,ta.tekst()).t
nt_tabl+= u'<!-- %s -->\n'%tap
nnov_tabl+=1
fzam=1
for j in range(len(ekslo)):
eksl=ekslo[j]
ekslo[j].perpodp=perevod_ob(slov,eksl.jaz,jaz0,eksl.podp)
ekslo[j].perpodp2=Perev_podp(u'')
nt_eksl=u''
ntdi_eksl=u''
for j in range(len(ekslo)):
ekslo[j].tip=0
eksl=ekslo[j]
perpodp=eksl.perpodp
njazisp=eksl.njazisp
# vivod(u' %s\n'%eksl.tekst())
vivod(u' %s %d %s\n'%(eksl.jaz,eksl.njazisp,eksl.adr))
fuzxe=0
for eksl0 in eksli:
if not eksl.sravn(eksl0):
fuzxe=1
break
if fuzxe:
ekslo[j].tip=80
vivod(u' <-uzxe eksl\n')
continue
if zapriz.has_key(eksl.adr):
ekslo[j].tip=80
vivod(u' <-zapr\n')
continue
fpovt=0
for k in range(len(ekslo)):
if (k!=j) and not eksl.sravn(ekslo[k]):
if k<j:
fpovt=1
break
perpodp1=ekslo[k].perpodp
perpodp=objed_podp(perpodp,perpodp1)
njazisp+=ekslo[k].njazisp
if fpovt:
ekslo[j].tip=80
vivod(u' <-povt eksl\n')
continue
ekslo[j].perpodp2=perpodp
ekslo[j].njazisp2=njazisp
for j in range(len(ekslo)):
ekslo[j].perpodp=ekslo[j].perpodp2
ekslo[j].njazisp=ekslo[j].njazisp2
if ekslo[j].tip<80:
ekslo[j].prioritet=ekslo[j].njazisp
ekslo.sort(key=Eksl.vz_prioritet,reverse=True)
maksn_eksl=4
for j in range(len(ekslo)):
if ekslo[j].tip>=80:
continue
if ekslo[j].perpodp.t!=u'':
ekslo[j].perpodp.t=u'<!-- '+ekslo[j].perpodp.t+u' -->'
nt1= u'{{el}} %s\n'%ekslo[j].pertekst()
if j+len(eksli)<maksn_eksl:
nt_eksl+=nt1
nnov_eksl+=1
fzam=1
else:
# ntdi_eksl+=nt1
# nnovdi_eksl+=1
pass
for j in range(len(ssilko)):
ss=ssilko[j]
pe=perevod.perevod_iwi(slov,ss.jaz,jaz0,ss.ss)
vivod(u' ss %s:%s -> %s\n'%(ss.jaz,ss.ss,pe))
ssilko[j].ss=pe
nt_ssilk=u''
for j in range(len(ssilko)):
ss=ssilko[j]
if ss.ss==u'':
continue
vivod(u' %s\n'%ss.ss)
fuzxe=0
for ss0 in ssilki:
if iskats(ss.ss,0,u'[['+ss0.ss+u']]')!=-1:
fuzxe=1
break
if fuzxe:
vivod(u' <-uzxe ssilk\n')
continue
fpovt=0
for k in range(j):
if not ss.sravn(ssilko[k]):
fpovt=1
break
if fpovt:
vivod(u' <-povt ssilk\n')
continue
nt_ssilk+= u'* %s\n'%ss.ss
nnov_ssilk+=1
fzam=1
for j in range(len(reso)):
reso[j].perpodp2=Perev_podp(u'')
if reso[j].tip>=80:
continue
iz=reso[j].iz
if ((len(iz)>=6 and iz[0:2].isdigit() and iz[2:5]==u'px-') or
(len(iz)>=7 and iz[0:3].isdigit() and iz[3:6]==u'px-')):
reso[j].tip=90
vivod(u' - px- %s\n'%iz)
continue
ik=iskato(iz,len(iz)-1,u'.')
if ik==-1:
ik=len(iz)
izi=iz[:ik]
nalf=0
ncif=0
for c in izi:
if c.isdigit():
ncif+=1
# if c.isalpha():
else:
nalf+=1
if ncif>=4 and ncif>=nalf-2:
reso[j].tip=80
vivod(u' - cif %s\n'%iz)
continue
podp=reso[j].podp
jaz=reso[j].jaz
podp=podp.replace(u'\n',u' ')
while u' ' in podp:
podp = podp.replace(u' ',u' ')
if podp==u' ':
podp=u''
perpodp=perevod_ob(slov,jaz,jaz0,podp)
reso[j].perpodp=perpodp
for j in range(len(reso)):
reso[j].tip2=reso[j].tip
if reso[j].tip>=80:
continue
iz=reso[j].iz
perpodp=reso[j].perpodp
jaz=reso[j].jaz
njazisp=reso[j].njazisp
b = u' b %s:%s %s\n' % (jaz,iz,podp)
vivod(b)
if zapriz.has_key(iz):
reso[j].tip2=80
vivod(u' <-zapr\n')
continue
fuzxe=0
for k in range(len(res0)):
if res0[k].tip<80 and res0[k].iz==iz:
fuzxe=1
break
if fuzxe:
reso[j].tip2=80
vivod(u' <-uzxe\n')
continue
fpovt=0
k=0
while k<len(reso):
if reso[k].tip<80 and (k!=j) and (reso[k].iz==iz):
if k<j:
fpovt=1
break
perpodp1=reso[k].perpodp
perpodp=objed_podp(perpodp,perpodp1)
njazisp+=reso[k].njazisp
if perpodp1.t!=u'':
b = u' %s\n' % (perpodp1.t)
vivod(b)
k+=1
if fpovt:
reso[j].tip2=80
vivod(u' <-povt\n')
continue
reso[j].perpodp2=perpodp
reso[j].njazisp2=njazisp
for j in range(len(reso)):
reso[j].tip=reso[j].tip2
reso[j].perpodp=reso[j].perpodp2
reso[j].njazisp=reso[j].njazisp2
itbot.iskat(reso)
itbot.zagruz_info(reso,1)
kprefi=pl.site().family.image_namespace(pl.site().lang)
for j in range(len(reso)):
reso[j].tip2=reso[j].tip
if reso[j].tip>=80:
continue
iz=reso[j].iz
if (
#reso[j].tip>=50 and
(iskat(iz,u'flag')!=-1 or iskat(iz,u'Flag')!=-1 or
iskat(iz,u'FLAG')!=-1 or
iskat(iz,u'bandiera')!=-1 or
iskat(iz,u'Bandiera')!=-1 or
iskat(iz,u'BANDIERA')!=-1)):
reso[j].tip2=80
vivod(u' - flag %s\n'%iz)
continue
perpodp=reso[j].perpodp
njazisp=reso[j].njazisp
fuzxe=0
for k in range(len(res0)):
if res0[k].tip<80 and (res0[k].iz==iz or res0[k].dl==reso[j].dl):
fuzxe=1
break
if fuzxe:
reso[j].tip2=80
vivod(u' <-uzxe2\n')
continue
fpovt=0
for k in range(len(reso)):
if (reso[k].tip<80 and (k!=j) and
(reso[k].iz==iz or reso[k].dl==reso[j].dl)):
if reso[k].tip<reso[j].tip or (reso[k].tip==reso[j].tip and k<j):
fpovt=1
break
if reso[k].iz==iz:
perpodp1=reso[k].perpodp
perpodp=objed_podp(perpodp,perpodp1)
njazisp+=reso[k].njazisp
if fpovt:
reso[j].tip2=80
vivod(u' <-povt2\n')
continue
reso[j].perpodp2=perpodp
reso[j].njazisp2=njazisp
for j in range(len(reso)):
reso[j].tip=reso[j].tip2
reso[j].perpodp=reso[j].perpodp2
reso[j].njazisp=reso[j].njazisp2
if reso[j].tip<80:
reso[j].prioritet=reso[j].njazisp*1000+1000-reso[j].tip
reso.sort(key=imagetransfer1.Izobr.vz_prioritet,reverse=True)
for j in range(len(reso)):
vivod(u' [[%s:%s]] dl=%d nj=%d tip=%d\n'%
(kprefi,reso[j].iz,reso[j].dl,reso[j].njazisp,reso[j].tip))
maksn_iz=4
jn=0
for j in range(len(reso)):
if reso[j].tip>=80:
continue
if reso[j].tip>=50 and jn+len(res0)>=maksn_iz:
reso[j].tip=90
vivod(u'-otbr %s\n'%reso[j].iz)
jn+=1
flkatcom=0
for j in range(len(reso)):
if reso[j].tip==50:
flkatcom=1
itbot.fkatcom_ns=0
if flkatcom or fproviz:
katcomsp=[]
katprob=[]
for (n,jaz) in nomio:
katprob.append( (n,jaz) )
prov_dobav_katcom(nscom,zapriz,katcomsp,n,jaz)
if len(katcomsp)<1:
shcom_v_katcom(nscom,zapriz,katcomsp,shcomi+shcomo)
if len(katcomsp)<1:
for ka in kati+kato:
katprob.append( (ka.n,ka.jaz) )
prov_dobav_katcom(nscom,zapriz,katcomsp,ka.n,ka.jaz)
if len(katcomsp)<1:
vivod(u'- katcom\n')
for (n1,jaz) in katprob:
n2=n1.replace(u'(',u' ').replace(u')',u' ')
for n in n2.split(u' '):
if len(n)>=4:
prov_dobav_katcom(nscom,zapriz,katcomsp,n,jaz)
if len(katcomsp)<1:
vivod(u'- katcom2\n')
itbot.fkatcom_ns=1
for (n,jaz) in katprob:
t=u'Category:'+n
t1=u'[['+t+u']]\n'
if (not t1 in katcomsp) and (not zapriz.has_key(n)):
katcomsp.append(t1)
katcom=u''
for t in katcomsp:
katcom+=t
itbot.katcom=katcom
vivod(u'========katcom=\n%s==========\n' % katcom)
if itbot.fkatcom_ns:
for j in range(len(reso)):
if reso[j].tip==50:
reso[j].tip=60
if not fproviz:
itbot.kopir_izobr(reso)
nnov_iz=0
nnovdi_iz=0
jn=0
nt_iz=u''
ntdi_iz=u''
nt_izga=u'<gallery>\n'
ntdi_izga=u'<gallery>\n'
for j in range(len(reso)):
if reso[j].tip>=80:
continue
t=u''
if reso[j].perpodp.t!=u'':
t=u'|<!-- %s -->' % reso[j].perpodp.t
nt1= u'[[%s:%s|thumb|left|180px%s]]\n'%(kprefi,reso[j].iz,t)
nt2=(u'%s:%s|<!-- thumb|left|180px|%s -->\n'%
(kprefi,reso[j].iz,reso[j].perpodp.t))
if jn+len(res0)<maksn_iz:
nt_iz+=nt1
nt_izga+=nt2
nnov_iz+=1
fzam=1
else:
ntdi_iz+=nt1
ntdi_izga+=nt2
nnovdi_iz+=1
jn+=1
nt_izga+=u'</gallery>\n'
ntdi_izga+=u'</gallery>\n'
if nnov_iz>2:
nt_iz=nt_izga
if nnovdi_iz>2:
ntdi_iz=ntdi_izga
if nnov_iz:
nt_iz+= u'<br clear=all>\n'
nt+=nt_tabl
nt+=nt_iz
if len(nt_ssilk)>5:
(pzssilk0,pzssilk1)=isk_zagolov(nt,jaz0,t_zag_vian)
if pzssilk0==pzssilk1:
nt+=u'\n== '+gl_zagolov(jaz0,t_zag_vian)+u' ==\n'
pzssilk1=len(nt)
nt=nt[:pzssilk1]+nt_ssilk+nt[pzssilk1:]
nt+=nt_shcom
if len(nt_eksl)>5:
(pzeksl0,pzeksl1)=isk_zagolov(nt,jaz0,t_zag_eksl)
if pzeksl0==pzeksl1:
nt+=u'\n== '+gl_zagolov(jaz0,t_zag_eksl)+u' ==\n'
pzeksl1=len(nt)
nt=nt[:pzeksl1]+nt_eksl+nt[pzeksl1:]
if nnovdi_iz>0:
nt_shalka=u'{{%s 2}}'%wikipedia.translate(si0,te_al_bildoj)
if iskat(nt,nt_shalka)==-1:
nt+=nt_shalka+u'\n'
ntdi+=u'\n== %s ==\n'%wikipedia.translate(si0,te_al_bildoj)+ntdi_iz
fzam=1
if nnovdi_eksl>0:
nt_shalel=u'{{%s 2}}'%wikipedia.translate(si0,te_al_ekslj)
if iskat(nt,nt_shalel)==-1:
nt+=nt_shalel+u'\n'
ntdi+=u'\n== %s ==\n'%wikipedia.translate(si0,te_al_ekslj)+ntdi_eksl
fzam=1
nt_kat_sp=[]
glb=0
if len(kati)==0:
nt_kat_sp=perev_kateg(slov,nssvoj,pl.site(),kato,1,otch)
nnov_kat=len(nt_kat_sp)
if nnov_kat>0:
fzam=1
nt_kat=kateg_v_tekst(nt_kat_sp)
nt+=nt_kat
if fproviz:
for j in range(len(res0)):
# if res0[j].tip>=80 or res0[j].nekatco:
if res0[j].nekatco:
tc=res0[j].plcot+u'\n\n'+katcom
otch2.write(u'%s\n+=======\n%s\n====================================\n' % (res0[j].plco.title(),tc))
otch2.flush()
res0[j].plco.put(tc,comment = u'', minorEdit = False)
vivod(u'++ kat -> %s\n' % res0[j].plco.title())
if not fnajd:
return
if (not fproviz) and fzam:
if nt[len(nt)-1:]==u'\n':
nt=nt[:len(nt)-1]
# nt_stkat=wikipedia.replaceCategoryLinks(u'', ocat, pl.site())
# nt+=wikipedia.replaceLanguageLinks(nt_stkat, niwi)
nt+=u'\n'+wikipedia.replaceLanguageLinks(u'', niwi)
otch2.write(u'%s\n+=======\n%s\n====================================\n' % (pl.title(),nt))
otch2.flush()
t = u"AAld:"
if nnov_iz>0:
t+=u' + %d %s'%(nnov_iz,wikipedia.translate(si0,te_bildoj))
if nnov_shcom>0:
t+=u' + %d %s'%(nnov_shcom,wikipedia.translate(si0,te_komunejoj))
if nnov_eksl>0:
t+=u' + %d %s'%(nnov_eksl,wikipedia.translate(si0,te_ekslj))
if nnov_ssilk>0:
t+=u' + %d %s'%(nnov_ssilk,wikipedia.translate(si0,te_ligoj))
if nnov_tabl>0:
t+=u' + %d %s'%(nnov_tabl,wikipedia.translate(si0,te_tabeloj))
if nnov_kat>0:
t+=u' + %d %s'%(nnov_kat,wikipedia.translate(si0,te_kategorioj))
if nnovdi_iz>0 or nnovdi_eksl>0:
t+=u' + (%s)'%wikipedia.translate(si0,te_vi_di)
vivod(t+u'\n')
try:
put_prov(pl,nt,t,False)
except wikipedia.EditConflict:
vivod('!!!EditConflict!!!\n')
except wikipedia.LockedPage:
vivod('!!!LockedPage!!!\n')
if (not fproviz) and (nnovdi_iz>0 or nnovdi_eksl>0):
# diprefi=pl.site().family.namespace(pl.site().lang, 1, '_default')
# pldi=wikipedia.Page(pl.site(),diprefi+u':'+pl.title())
pldi=wikipedia.Page(pl.site(),pl.title()+u'/'+
wikipedia.translate(si0,te_aldon))
try:
tdi = pldi.get()
vivod(u'+ %s\n' % pldi.title())
except wikipedia.NoPage:
tdi=u''
vivod(u'- %s\n' % pldi.title())
except wikipedia.IsRedirectPage:
tdi=u'--> [['+pldi.getRedirectTarget()+u']]\n----\n'
vivod(u'+ %s\n-> %s\n' % (pldi.title(),pldi.getRedirectTarget()))
ntdi+=u'\n[[%s]]\n'%wikipedia.translate(si0,te_aldon_kat)
tdi+=u'\n'+ntdi
otch2.write(u'%s\n+=======\n%s\n====================================\n' % (pldi.title(),ntdi))
otch2.flush()
# t = u"AAld (%s):"%wikipedia.translate(si0,te_diskuto)
t = u"AAld (%s):"%(wikipedia.translate(si0,te_aldon).lower())
if nnovdi_iz>0:
t+=u' + %d %s'%(nnovdi_iz,wikipedia.translate(si0,te_bildoj))
if nnovdi_eksl>0:
t+=u' + %d %s'%(nnovdi_eksl,wikipedia.translate(si0,te_ekslj))
vivod(t+u'\n')
try:
pldi.put(tdi, comment = t, minorEdit = False)
except wikipedia.EditConflict:
vivod('!!!EditConflict!!!\n')
except wikipedia.LockedPage:
vivod('!!!LockedPage!!!\n')
return
def zapis_fimen(fimen,nomerf,pl):
fimen.write(u'%d %s\n' % (nomerf,pl.title()))
fimen.flush()
def provstop(fstop):
return os.access(fstop,os.F_OK)
def ivsenmain():
fstop='ivsenstop.txt'
try:
os.unlink(fstop)
except:
pass
if provstop(fstop):
wikipedia.output(u'ivsenstop0!\n')
return
n=u''
skon=u''
nomerf=-1
f=codecs.open(filename+'.txt','rb',encoding='utf-8')
frl=f.readlines()
mysite = wikipedia.getSite()
slov={}
nscom={}
nssvoj={}
zapriz={}
zaprobr={}
fproviz=0
i=0
j=0
fl1=1
for s in frl:
# wikipedia.output(u'%d\n' % ord(s[0]))
if ord(s[0]) == 65279:
s=s[1:]
# wikipedia.output(u'%d %d %s %s\n' % (i,j,n,s))
s=s.replace(u'\r',u'')
if s[len(s)-1]==u'\n':
s=s[:len(s)-1]
if fl1:
if s==u'':
fproviz=1
else:
slovdop={}
perevod.zagruzslov(slov,slovdop,nssvoj,u'',mysite.lang,s)
if slovdop.has_key(u'c'):
nscom=slovdop[u'c']
if slovdop.has_key(u'z'):
zapriz=slovdop[u'z']
if slovdop.has_key(u'zo'):
zaprobr=slovdop[u'zo']
fl1=0
else:
if len(s)>=1:
skon=s
frl=[]
f.close()
nomerf=0
i=0
while (i<len(skon)) and provcifr(skon[i]):
nomerf=nomerf*10+ord(skon[i])-ord(u'0')
i+=1
i=iskat(skon,u' ')
if i!=-1:
nomsta=skon[i+1:]
else:
nomsta=u'!'
nomerf+=1
global otch,otch2,otchnc,otchkc,otchup
otch = codecs.open(u'%s_ot%06d.txt' % (filename,nomerf), 'w', 'utf-8')
otch2 = codecs.open(u'%s_op%06d.txt' % (filename,nomerf), 'w', 'utf-8')
otchnc = codecs.open(u'%s_nc%06d.txt' % (filename,nomerf), 'w', 'utf-8')
otchkc = codecs.open(u'%s_kc%06d.txt' % (filename,nomerf), 'w', 'utf-8')
otchup = codecs.open(u'%s_up%06d.txt' % (filename,nomerf), 'w', 'utf-8')
fimen=codecs.open(filename+'.txt','a',encoding='utf-8')
otch.write(u'fproviz=%d\n\n'%fproviz)
otch.flush()
paggen=pagegenerators.AllpagesPageGenerator(nomsta, 0)
flperv=1
nkp=[]
maxnkp=16
for pla in paggen:
if flperv==1:
vivod(u'-uzxe %s\n'%pla.title())
flperv=0
continue
if zaprobr.has_key(pla.title()):
vivod(u'-zaprobr %s\n'%pla.title())
continue
nkp.append(pla)
if len(nkp)<maxnkp:
continue
tbl={}
sozd_buf_tbl(tbl,nkp,otch)
nkpi=[]
tdliz=[]
for pl in nkp:
try:
pl=get_tbl(tbl,pl)
ot = pl.get()
pl_iz=image_podp(pl.site(),ubrkoment(ot))
vivod(u'%s len(pl_iz)=%d\n'%(pl.title(),len(pl_iz)))
for pli in vibor_iwi(pl.interwiki()):
nkpi.append(pli)
if len(pl_iz)==0:
tdliz.append(pli)
except wikipedia.NoPage:
pass
except wikipedia.IsRedirectPage:
pass
sozd_buf_tbl(tbl,nkpi,otch)
tiz=[]
for pl in tdliz:
try:
pl=get_tbl(tbl,pl)
ot = pl.get()
tiz+=image_podp(pl.site(),ubrkoment(ot))
except wikipedia.NoPage:
pass
except wikipedia.IsRedirectPage:
pass
nkpiz=[]
sico=wikipedia.getSite(code = u'commons',fam = u'commons')
msprefi=mysite.family.image_namespace(mysite.lang)
for izo in tiz:
ctit=u'Image:'+izo.iz
if nscom.has_key(ctit):
vivod(u'tc+ %s\n' % ctit)
else:
nkpiz.append(wikipedia.Page(sico,ctit))
sourceSite=wikipedia.getSite(izo.jaz,fam = mysite.family)
isprefi=sourceSite.family.image_namespace(izo.jaz)
nkpiz.append(wikipedia.Page(sourceSite,isprefi+u':'+izo.iz))
nkpiz.append(wikipedia.Page(mysite,msprefi+u':'+izo.iz))
sozd_buf_tbl(tbl,nkpiz,otch)
vivod(u'\n\n********************************\n\n\n')
for pl in nkp:
rabota(slov,nscom,nssvoj,zapriz,tbl,pl,fproviz)
otch.write(u'\n\n\n')
otch.flush()
zapis_fimen(fimen,nomerf,pl)
vivod(u'\n\n********************************\n\n\n')
nkp=[]
if provstop(fstop):
wikipedia.output(u'ivsenstop!\n')
return
filename = wikipedia.argHandler(sys.argv[1], 'cht_cat')
#import logger
#sys.stdout = logger.Logger(sys.stdout, filename = u'%s_lo%06d.txt' % (filename,nomerf))
#mysite = wikipedia.getSite()
try:
# int(u'ert')
ivsenmain()
except:
wikipedia.stopme()
raise
else:
wikipedia.stopme()
</nowiki>
* imagetransfer1.py
# -*- coding: utf-8 -*-
__version__='$Id: imagetransfer.py,v 1.44 2005/08/14 19:47:52 wikipedian Exp $'
import re, sys, md5, urllib, codecs
import wikipedia, upload1, config, pagegenerators, family
from bib_tekst import *
from get_buf import *
copy_message = {
'en':u"This image was copied from %s. The original description was:\r\n\r\n%s",
'eo':u"La bildo estas kopiita de %s. La originala priskribo estas:\r\n\r\n%s",
'de':u"Dieses Bild wurde von %s kopiert. Die dortige Beschreibung lautete:\r\n\r\n%s",
'nl':u"Afbeelding gekopieerd vanaf %s. De beschrijving daar was:\r\n\r\n%s",
'pt':u"Esta imagem foi copiada de %s. A descrição original foi:\r\n\r\n%s",
'ru':u"Это изображение скопировано из %s. Его исходное описание:\r\n\r\n%s",
}
istf_message = {
'en':u"File history",
'eo':u"Historio de la dosiero",
'ru':u"История изменений файла",
}
#nowCommonsTemplate = {
# 'de': u'{{NowCommons|%s}}',
# 'fr': u'{{Désormais sur Commons|%s}}',
# 'en': u'{{NowCommons|Image:%s}}',
# 'nl': u'{{NuCommons|%s}}',
# 'pt': u'{{NowCommons|%s}}',
#}
nowCommonsTemplate = {
'ca': u'AraCommons',
'es': u'EnCommons',
'et': u'NüüdCommonsis',
'fr': u'Désormais sur Commons',
'nl': u'NuCommons',
'ru': u'Now Commons',
}
nowCommonsMessage = {
'de': u'Datei ist jetzt auf Wikimedia Commons verfügbar.',
'en': u'File is now available on Wikimedia Commons.',
'eo': u'La dosiero nun estas en Komunejo.',
'pt': u'Arquivo está agora na Wikimedia Commons.',
}
# Translations for license templates.
# Must only be given when they are in fact different.
licenseTemplates = {
('wikipedia:es', 'commons:commons'): {
u'DP': u'PD',
},
}
licenseTemplates_o = {
u'Bild-GFDL': u'GFDL',
u'Bild-GFDL-OpenGeoDB': u'GFDL-OpenGeoDB',
u'Bild-PD': u'PD',
u'Bild-PD-US': u'PD-USGov',
u'Domaine Public': u'PD',
u'Domaine public': u'PD',
u'DomainePublic': u'PD',
u'Domínio público': u'PD',
u'Eigenwerk': u'GFDL-self',
u'EigenWerk': u'GFDL-self',
u'Eigenwurk': u'GFDL-self',
u'EigenWurk': u'GFDL-self',
}
def ubrkoment (text):
# Ignore tekst within nowiki tags and HTML comments
nowikiOrHtmlCommentR = re.compile(r'<nowiki>.*?|<!--.*?-->', re.IGNORECASE | re.DOTALL)
match = nowikiOrHtmlCommentR.search(text)
while match:
text = text[:match.start()] + text[match.end():]
match = nowikiOrHtmlCommentR.search(text)
return text
def getVersionHistoryTable_ss(self):
"""
Returns the version history as a wiki table.
"""
uprefi=self.site().family.namespace(self.site().lang, 2, '_default')
result = '{| border="1"\n'
result += '! date/time || username || edit summary\n'
for time, username, summary in self.getVersionHistory():
result += '|----\n'
result += '| %s || [[:%s:%s:%s]] || %s\n' % (time, self.site().lang,uprefi,username, summary)
result += '|}\n'
return result
class Izobr:
def __init__(self, iz,jaz,podp):
iz = iz.replace(u'_', ' ')
iz = iz[0].upper()+iz[1:]
self.iz = iz
self.jaz = jaz
self.podp = podp
self.podp2 = u''
self.url=u''
self.dl=0 # dlina fajla
self.opis=u''
self.tip=70 # 10=uzxe v statje 20=uzxe v statje + oshibka chtenija
# 30=uzxe v commons 40=uzxe v nashem razdele
# 50=nado zagruz v commons 60=nado zagruz nash razdel
# 70=nado zagruz
# >=80 =nelzja ispolz (oshibka)
self.tip2=70
self.prioritet=0
self.njazisp=1
self.njazisp2=0
self.plco=None
self.plcot=u''
self.nekatco=0
def vz_prioritet(self):
return self.prioritet
def sravn(self,e1):
return self.iz!=e1.iz
# def tekst(self,pref):
# return u'%s %s'%(self.adr,self.podp)
#def stranica_url(pl):
def stranica_url(sourceSite,ti):
# sourceSite=pl.site()
# ti=pl.title()
# Spaces might occur, but internally they are represented by underscores.
# Change the name now, because otherwise we get the wrong MD5 hash.
encodedFilename = ti.replace(u' ', u'_').encode(sourceSite.encoding())
encodedFilename = urllib.quote(encodedFilename)
# TODO: This probably doesn't work on all wiki families
return 'http://%s/wiki/%s' % (sourceSite.hostname(),encodedFilename)
def url_soderzx(otch,url):
s=u''
try:
uo = wikipedia.MyURLopener()
file = uo.open(url)
info=file.info()
except IOError:
otch.write(u'url_soderzx: IOError\n')
otch.flush()
return u''
try:
s=file.read()
except IOError:
otch.write(u'url_soderzx: read IOError\n')
otch.flush()
return u''
otch.write(u'url_soderzx2 len(s)=%d\n'%len(s))
# if len(s)>=5:
# otch.write(u'url_soderzx2 s=%d %d %d %d %d\n'%(ord(s[0]),ord(s[1]),
# ord(s[2]),ord(s[3]),ord(s[4])))
otch.flush()
# if len(s)<10 or s.startswith('<html'):
# otch.write(u'url_soderzx3ne\n')
# otch.flush()
# return u''
return s
def izobr_url(sourceSite,im):
# Spaces might occur, but internally they are represented by underscores.
# Change the name now, because otherwise we get the wrong MD5 hash.
encodedFilename = im.replace(u' ', u'_').encode(sourceSite.encoding())
md5sum = md5.new(encodedFilename).hexdigest()
# if debug: print "MD5 hash is: %s" % md5sum
encodedFilename = urllib.quote(encodedFilename)
el = sourceSite.lang.encode(sourceSite.encoding())
el = urllib.quote(el)
# TODO: This probably doesn't work on all wiki families
# return 'http://%s/upload/%s/%s/%s' % (sourceSite.hostname(), md5sum[0], md5sum[:2], encodedFilename)
return ('http://upload.wikimedia.org/wikipedia/%s/%s/%s/%s' %
(el, md5sum[0], md5sum[:2], encodedFilename))
def url_dlina(otch,url):
dl=0
try:
uo = wikipedia.MyURLopener()
file = uo.open(url)
info=file.info()
except IOError:
return 0
dlt=info.getheader('Content-Length')
try:
dl=int(dlt)
except ValueError:
return 0
except TypeError:
return 0
otch.write(u'url_dlina1 dl=%s\n'%dl)
otch.flush()
try:
s=file.read(10)
except IOError:
return 0
otch.write(u'url_dlina2 len(s)=%d\n'%len(s))
if len(s)>=5:
otch.write(u'url_dlina2 s=%d %d %d %d %d\n'%(ord(s[0]),ord(s[1]),
ord(s[2]),ord(s[3]),ord(s[4])))
otch.flush()
if len(s)<10 or s.startswith('<html'):
otch.write(u'url_dlina3ne\n')
otch.flush()
return 0
return dl
class ImageTransferBot:
def __init__(self, targetSite, otch,opis,otchnc=None,
nscom=None,otchkc=None,otchup=None):
self.targetSite = targetSite
self.otch = otch
self.opis = opis
self.otchnc = otchnc
self.otchkc = otchkc
self.otchup = otchup
self.nscom = nscom
self.katcom = u''
self.fkatcom_ns = 0
self.sico=wikipedia.getSite(code = u'commons',fam = u'commons')
self.tbl_pl={}
def vivod(self,b):
wikipedia.output(b)
self.otch.write(b)
self.otch.flush()
def prov_nekatco1(self, izo):
sico=self.sico
izo.plco = wikipedia.Page(sico,u'Image:'+izo.iz)
izo.nekatco=0
try:
izo.plcot=izo.plco.get()
except wikipedia.NoPage:
self.vivod(u'c- %s\n' % izo.plco.title())
return
except wikipedia.IsRedirectPage:
izo.nekatco=1
izo.plcot=u'--> [['+izo.plco.getRedirectTarget()+u']]\n----\n'
self.vivod(u'c+ %s\n-> %s\n' % (izo.plco.title(),izo.plco.getRedirectTarget()))
self.vivod(u'<- nekat\n')
return
self.vivod(u'c+ %s\n' % izo.plco.title())
plkat = izo.plco.categories()
if len(plkat)<1:
izo.nekatco=1
self.vivod(u'<- nekat\n')
return
def podgot_istf(self,ishsi,s):
if len(s)<10:
self.vivod(u'!!!osh cht ist fajla!!!\n')
return u''
s=unicode(s,'utf-8','replace')
p=iskats(s,0,u'<h2 id=\"filehistory\">')
if p!=-1:
konz=u'</h2>'
p=iskats(s,p,konz)
if p!=-1:
p+=len(konz)
pk=iskats(s,p,u'<br /><ul><li>')
if pk==-1:
pk=iskats(s,p,u'<h2')
if pk!=-1:
s=s[p:pk]
else:
s=s[p:]
s=s.replace(u'\r',u'').replace(u'\n',u' ')
s=s.replace(u'<br>',u'\n\n')
s=s.replace(u'<br />',u'\n\n')
s=s.replace(u'<li>',u'\n* ')
fl=0
t=u'==== '+wikipedia.translate(self.targetSite,istf_message)+u' ====\n'
nachss=u'<a href=\"/wiki/'
i=0
while i<len(s):
c=s[i]
if s[i:].startswith(nachss):
p0=i+len(nachss)
p=iskats(s,p0,u'\"')
fl=1
if p!=-1:
ss=s[p0:p]
i=p
# ss=wikipedia.unescape(ss)
ss=ss.encode('utf-8','replace')
ss=urllib.unquote(ss)
ss=unicode(ss,'utf-8','replace')
t+=u' [[:%s:%s]] '%(ishsi.lang,ss)
# konss=u'</a>'
# p=iskats(s,i,konss)
# if p!=-1:
# i=p+len(konss)
# fl=0
elif c==u'<':
fl=1
elif c==u'>':
fl=0
elif fl==0:
t+=c
i+=1
t = t.replace(u'{',u'{ ')
t = t.replace(u'}',u' }')
while u' ' in t:
t = t.replace(u' ',u' ')
t = t.replace(u' \n',u'\n')
t = t.replace(u'\n ',u'\n')
return t
def iskat1(self, izo):
if izo.tip!=70:
return
im=izo.iz
targetSite=self.targetSite
sourceSite=wikipedia.getSite(izo.jaz,fam = targetSite.family)
isprefi=sourceSite.family.image_namespace(izo.jaz)
sourceImagePage=wikipedia.Page(sourceSite,isprefi+u':'+izo.iz)
# need to strip off "Afbeelding:", "Image:" etc.
# we only need the substring following the first colon
# im = sourceImagePage.title().split(":", 1)[1]
# im = im.replace(u'_', ' ')
if len(im)<1:
izo.tip=90
return
# Also, the first letter should be capitalized
# TODO: Don't capitalize on non-capitalizing wikis
im = im[0].upper()+im[1:]
sico=self.sico
b=u'\n0 %s\n' % sourceImagePage.title()
self.vivod(b)
tit=u'Image:'+im
if self.nscom and self.nscom.has_key(tit):
b=u'tc+ %s\n' % tit
self.vivod(b)
izo.tip=30
return
# urlp=izobr_url(targetSite,im)
# self.vivod(u'p %s\nurl=%s\n' % (im,urlp))
# dl=url_dlina(self.otch,urlp)
# self.vivod(u' Content-Length=%d\n'%dl)
# if dl>0:
# izo.dl=dl
# izo.tip=40
# self.vivod(u'dlu+ %s\n' % im)
# return
try:
# get1(sourceImagePage)
sourceImagePage=get_tbl(self.tbl_pl,sourceImagePage)
dcom = sourceImagePage.get()
except wikipedia.NoPage:
b=u'i- %s\n' % sourceImagePage.title()
self.vivod(b)
dcom=u''
except wikipedia.IsRedirectPage:
dcom=u''
for old, new in licenseTemplates_o.iteritems():
rnew = '{{%s}}' % new
rold = re.compile('{{%s}}' % old)
dcom = wikipedia.replaceExceptNowikiAndComments(dcom, rold, rnew)
old = old[0].lower()+old[1:]
rold = re.compile('{{%s}}' % old)
dcom = wikipedia.replaceExceptNowikiAndComments(dcom, rold, rnew)
if licenseTemplates.has_key((sourceSite.sitename(), 'commons:commons')):
for old, new in licenseTemplates[(sourceSite.sitename(), 'commons:commons')].iteritems():
rnew = '{{%s}}' % new
rold = re.compile('{{%s}}' % old)
dcom = wikipedia.replaceExceptNowikiAndComments(dcom, rold, rnew)
old = old[0].lower()+old[1:]
rold = re.compile('{{%s}}' % old)
dcom = wikipedia.replaceExceptNowikiAndComments(dcom, rold, rnew)
dcomnk=ubrkoment(dcom)
ncti=u'{{NowCommons|'
p=iskat(dcomnk,ncti)
if p==-1:
ncti=u'{{nowCommons|'
p=iskat(dcomnk,ncti)
if p==-1 and nowCommonsTemplate.has_key(sourceSite.lang):
t=nowCommonsTemplate[sourceSite.lang]
ncti=u'{{'+t+u'|'
p=iskat(dcomnk,ncti)
if p==-1:
ncti=u'{{'+perv_lower(t)+u'|'
p=iskat(dcomnk,ncti)
if p!=-1:
self.vivod(u'najden NowCommons\n')
p+=len(ncti)
p1=iskats(dcomnk,p,u'}}')
if p1!=-1:
p2=iskats(dcomnk[:p1],p,u'|')
if p2!=-1 and p2<p1:
p1=p2
imc=ubr_nk_prob(dcomnk[p:p1])
if imc.startswith(u':'):
imc=imc[1:]
# p=iskat(imc,u':')
# if p!=-1:
# imc=imc[p+1:]
if imc.startswith(u'Image:') or imc.startswith(u'image:'):
imc=imc[6:]
imc = ubr_nk_prob(imc.replace(u'_', u' '))
imc = imc[0].upper()+imc[1:]
tit=u'Image:'+imc
if self.nscom and self.nscom.has_key(tit):
b=u'tsc+ %s\n' % tit
self.vivod(b)
izo.iz=imc
izo.tip=30
return
plcoi = wikipedia.Page(sico,tit)
# get1(plcoi)
plcoi=get_tbl(self.tbl_pl,plcoi)
plcoie=plcoi.exists()
if plcoie:
b=u'sc+ %s\n' % tit
self.vivod(b)
izo.iz=imc
izo.tip=30
return
else:
b=u'sc- %s\n' % tit
self.vivod(b)
plco = wikipedia.Page(sico,u'Image:'+im)
# get1(plco)
plco=get_tbl(self.tbl_pl,plco)
plcoe=plco.exists()
if plcoe:
b=u'c+ %s\n' % plco.title()
self.vivod(b)
izo.tip=30
return
else:
b=u'c- %s\n' % plco.title()
self.vivod(b)
targetPr=self.targetSite.family.image_namespace(self.targetSite.language())
pl = wikipedia.Page(self.targetSite,wikipedia.UnicodeToAsciiHtml(targetPr+':'+im))
# get1(pl)
pl=get_tbl(self.tbl_pl,pl)
ple=pl.exists()
if ple:
b=u'u+ %s\n' % im
self.vivod(b)
izo.tip=40
return
# localize the text that should be printed on the image description page
dcom = wikipedia.translate(self.targetSite, copy_message) % (sourceSite, dcom)
sv_shabl=[u'GFDL',u'PD-',u'GPL',u'LGPL',u'Cc-by',u'Cc-sa',
u'CC-BY',u'CC-SA',u'CC|',
u'BSD}}',u'CeCILL}}',u'FWL}}',u'MPL}}',u'Self_GFDL',
u'Bild-PD-',u'Bild-GFDL',
# u'Bild-PD',u'PD',u'Domaine public',u'Domínio público',
# u'Public Domain',u'Public domain',
# u'Eigenwerk',u'Eigenwurk',u'EigenWerk',u'EigenWurk',
u'Bild-CC']
sv_tekst=[u'Lizenz: GFDL',
u'Lizenz: [[GFDL]]',
u'Lizenz: GNU FDL',
u'Lizenz: [[GNU FDL]]',
u'Lizenz: GNU-FDL',
u'Lizenz: [[GNU-FDL]]',
u'Lizenz: GPL',
u'Lizenz: [[GPL]]',
u'Lizenz: LGPL',
u'Lizenz: [[LGPL]]',
# u'Lizenz: PD',
# u'Lizenz: [[PD]]',
# u'Lizenz: Public Domain',u'Lizenz: public domain',
# u'Lizenz: [[Public Domain]]',
u'Lizenz: CC', u'Lizenz: Cc', u'Lizenz: cc',
u'Lizenz: [[CC]]',]
ne_shabl=[u'Copyvio',u'Fairuse',u'Fairusein',u'Logo',
u'CopyrightedFreeUse',
u'CopyrightedFreeUseProvidedThat',
u'Deletion request',
u'Screenshot',u'Noncommercial',u'PolandGov',
u'Nonderivative',u'NZCrownCopyright',
u'Redundant',u'No source',
u'Ordnance Survey Copyright',
u'CrownCopyright',
u'EU image',
u'Cc-nc',u'Cc-by-nc',u'Cc-nd',u'Cc-by-nd',
u'CC-NC',u'CC-BY-NC',u'CC-ND',u'CC-BY-ND',
]
sv_tekst2=[]
for z1 in sv_tekst:
sv_tekst2.append(z1)
sv_tekst2.append(z1.replace(u': ',u':'))
z2=z1.replace(u'Lizenz:',u'Lizenzstatus:')
sv_tekst2.append(z2)
sv_tekst2.append(z2.replace(u': ',u':'))
sv_shabl+=perv_lower_s(sv_shabl)
sv_tekst2+=perv_lower_s(sv_tekst2)
ne_shabl+=perv_lower_s(ne_shabl)
fsvob=0
for z1 in sv_shabl:
if iskat(dcomnk,u'{{'+z1)!=-1:
fsvob=1
for z1 in sv_tekst2:
if iskat(dcomnk,z1)!=-1:
fsvob=1
for z1 in ne_shabl:
if iskat(dcomnk,u'{{'+z1)!=-1:
fsvob=0
self.vivod(u'!!ne_shabl!!\n')
izo.tip=60
if fsvob:
izo.tip=50
self.vivod(u'++->c\n')
izo.opis=dcom
def zagruz_info1(self, izo, fco):
if izo.tip>=80:
return
im=izo.iz
targetSite=self.targetSite
sourceSite=wikipedia.getSite(izo.jaz,fam = targetSite.family)
if izo.dl<=0 and izo.tip!=30:
izo.url=izobr_url(sourceSite,im)
self.vivod(u'0 %s\nurl=%s\n' % (im,izo.url))
izo.dl=url_dlina(self.otch,izo.url)
self.vivod(u' Content-Length=%d\n'%izo.dl)
if izo.dl<=0 and fco:
self.vivod(u'iz c:\n')
izo.url=izobr_url(self.sico,im)
self.vivod(u'0 %s\nurl=%s\n' % (im,izo.url))
izo.dl=url_dlina(self.otch,izo.url)
self.vivod(u' Content-Length=%d\n'%izo.dl)
if izo.dl<=0:
izo.tip=90
izo.dl=0
def kopir_izobr1(self, izo):
if izo.tip!=50 and izo.tip!=60:
return
b=u'\n -> %s\n' % izo.iz
self.vivod(b)
sico=self.sico
targetSite = self.targetSite
sourceSite=wikipedia.getSite(izo.jaz,fam = targetSite.family)
isprefi=sourceSite.family.image_namespace(izo.jaz)
sourceImagePage=wikipedia.Page(sourceSite,isprefi+u':'+izo.iz)
# TODO: Only the page's version history is shown, but the file's
# version history would be more helpful
# izo.opis += '\n\n' + sourceImagePage.getVersionHistoryTable()
izo.opis += '\n\n' + getVersionHistoryTable_ss(sourceImagePage)
str_url=stranica_url(sourceSite,isprefi+u':'+izo.iz)
self.vivod(u'str_url=%s\n'%str_url)
istf=url_soderzx(self.otch,str_url)
istf=self.podgot_istf(sourceSite,istf)
izo.opis += u"\n" + istf
# add interwiki link
izo.opis += u"\n\n" + sourceImagePage.aslink(forceInterwiki = True)
t_ot_c=u''
if izo.tip==50:
targetSite = sico
if self.katcom!=u'':
izo.opis += '\n\n' + self.katcom
self.vivod(u'++->c\n')
self.otchnc.write(u'[[:%s:%s]]\n'%(sourceSite.lang,
sourceImagePage.title()))
self.otchnc.flush()
t=u''
if self.fkatcom_ns:
t=u'!!!!'
self.otchkc.write(u'Image:%s\n==========%s=\n%s===============\n'%
(izo.iz,t,self.katcom))
self.otchkc.flush()
t_ot_c=u'->c'
bot = upload1.UploadRobot(url = izo.url, description = izo.opis,
targetSite = targetSite, urlEncoding = sourceSite.encoding(),
otchup=self.otchup)
# try to upload
tosh=u''
(targetFilename,tosh) = bot.run()
if tosh!=u'':
self.vivod(u'!!!! osh -> povt %s %s\n'%(izo.iz,tosh))
(targetFilename,tosh) = bot.run()
if tosh!=u'':
self.vivod(u'!!!! osh !!!! %s %s\n'%(izo.iz,tosh))
izo.tip=90
else:
b=u'++ %s\n'% izo.iz
self.vivod(b)
b=(u'======zagr%s====\n%s\n========\n%s\n========================\n'%
(t_ot_c,izo.iz,izo.opis))
self.opis.write(b)
self.opis.flush()
return
def prov_nekatco(self,generator):
for izo in generator:
self.prov_nekatco1(izo)
def iskat(self,generator):
for izo in generator:
self.iskat1(izo)
def zagruz_info(self,generator,fco):
for izo in generator:
self.zagruz_info1(izo,fco)
def kopir_izobr(self,generator):
for izo in generator:
self.kopir_izobr1(izo)
def main():
pageTitle = []
page = None
gen = None
# fnscom = wikipedia.argHandler(sys.argv[1], 'cht_cat')
# filename = wikipedia.argHandler(sys.argv[2], 'cht_cat')
filename = wikipedia.argHandler(sys.argv[1], 'cht_cat')
mysite = wikipedia.getSite()
vihprefi=mysite.family.image_namespace(mysite.lang)
# gen = pagegenerators.TextfilePageGenerator(filename+'.txt')
f=codecs.open(filename+'.txt','rb',encoding='utf-8')
otch = codecs.open(u'%s_ot.txt' % filename, 'w', 'utf-8')
otch2 = codecs.open(u'%s_op.txt' % filename, 'w', 'utf-8')
otchnc = codecs.open(u'%s_nc.txt' % filename, 'w', 'utf-8')
otchkc = codecs.open(u'%s_kc.txt' % filename, 'w', 'utf-8')
otchup = codecs.open(u'%s_up.txt' % filename, 'w', 'utf-8')
otchiw = codecs.open(u'%s_iw.txt' % filename, 'w', 'utf-8')
katcom=u''
if len(sys.argv)>=3:
fnkc = wikipedia.argHandler(sys.argv[2], 'cht_cat')
fkc = codecs.open(fnkc, 'rb', 'utf-8')
katcom=fkc.read()
fkc.close()
nscom={}
# perevod.zagruz_ns(nscom,fnscom)
itbot = ImageTransferBot(
targetSite = mysite,otch=otch,opis=otch2,
otchnc=otchnc,nscom=nscom,otchkc=otchkc,otchup=otchup)
# reso = []
# for pl in gen:
# reso.append(Izobr(pl.title(),pl.site().lang,u''))
# for j in range(len(reso)):
# reso[j].tip=70
# for pl in gen:
# reso = []
# t=pl.title()
# p=iskat(t,u':')
# if p!=-1:
# t=t[p+1:]
# reso.append(Izobr(t,pl.site().lang,u''))
for s in f.readlines():
if ord(s[0]) == 65279:
s=s[1:]
itbot.katcom=katcom
im=s
p=iskat(s,u']]')
if p!=-1:
im=s[:p]
kc=ubr_nk_prob(s[p+2:])
if kc!=u'':
itbot.katcom=kc
if im.startswith(u'[['):
im=im[2:]
imt=im.split(u':',2)
if len(imt)<3:
continue
imt[2]=perv_upper(ubr_nk_prob(imt[2]))
reso=[Izobr(imt[2],imt[0],u'')]
reso[0].tip=70
sourceSite=wikipedia.getSite(imt[0],fam = mysite.family)
isprefi=sourceSite.family.image_namespace(imt[0])
otch.write(u'\n\n%s:%s:%s\n==katcom==\n%s\n==========\n'%
(imt[0],isprefi,imt[2],itbot.katcom))
otch.flush()
itbot.iskat(reso)
itbot.zagruz_info(reso,1)
if itbot.katcom==u'':
if reso[0].tip==50:
reso[0].tip=60
itbot.kopir_izobr(reso)
if reso[0].tip<80:
otchiw.write(u'%s:%s:%s\n%s:%s:%s\n\n'%
(mysite.lang,vihprefi,reso[0].iz,imt[0],isprefi,imt[2]))
otchiw.flush()
else:
otch.write(u'!!!- %s:%s:%s\n'%
(imt[0],isprefi,imt[2]))
otch.flush()
if __name__ == "__main__":
try:
main()
finally:
wikipedia.stopme()
</nowiki>
* perevod.py
# -*- coding: UTF-8 -*-
__version__='$Id:'
import wikipedia
import re, sys
import codecs
from bib_tekst import *
def vivod(b):
# wikipedia.output(b)
# otch.write(b)
# otch.flush()
pass
class Sl_sta:
def __init__(self,vihs):
self.iwi = []
self.tp = [vihs]
self.riwi=0
def dobavslov2(slov,vhjaz,vihjaz,vhs,vihs,fiwi,fltolk):
if vhs==u'Circumscribed' or vhs==u'circumscribed':
a=-1
b=-1
if slov.has_key((vhjaz,vhs)):
a=len(slov[(vhjaz,vhs)].tp)
b=len(slov[(vhjaz,vhs)].iwi)
wikipedia.output(u'2 %s %s %d %d %d %d'%(vhs,vihs,fiwi,fltolk,a,b))
if slov.has_key((vhjaz,vhs)):
if fltolk==2:
return
if fltolk==1:
slov[(vhjaz,vhs)].tp=[vihs]
elif not vihs in slov[(vhjaz,vhs)].tp:
slov[(vhjaz,vhs)].tp.append(vihs)
if fiwi:
if not vihs in slov[(vhjaz,vhs)].iwi:
slov[(vhjaz,vhs)].iwi.append(vihs)
else:
slov[(vhjaz,vhs)]=Sl_sta(vihs)
if fiwi:
slov[(vhjaz,vhs)].iwi.append(vihs)
if fiwi==2:
slov[(vhjaz,vhs)].riwi=1
# i=len(slov[(vhjaz,vhs)].tp)-1
# otch.write(u'%d %s:%s:%s\n'%(i,vhjaz,vhs,vihs))
# otch.flush()
def eo_pluralo(t):
(p,n)=iskats_mn(t,0,[u' de ',u' en ',u' el ',u' al ',u' je ',u' por '])
if p==-1:
p=len(t)
p=t[:p].replace(u'a ',u'aj ')+'j'+t[p:]
# wikipedia.output(p)
return p
en_soglasn=[u'b',u'c',u'd',u'f',u'g',u'h',u'j',u'k',u'l',u'm',u'n',u'p',u'q',u'r',u's',u't',u'v',u'w',u'x',u'z']
def fleksii_en_eo_glag(slov,vhs,osneo,fltolk):
vhok=vhs[len(vhs)-1]
if vhok==u'e':
vhs1=vhs[:len(vhs)-1]+u'es'
vihs1=osneo+u'as'
dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk)
vhs1=vhs[:len(vhs)-1]+u'ed'
vihs1=osneo+u'is'
dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk)
vhs1=vhs[:len(vhs)-1]+u'ing'
vihs1=osneo+u'anta'
dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk)
elif vhok==u'y':
vhs1=vhs[:len(vhs)-1]+u'ies'
vihs1=osneo+u'as'
dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk)
vhs1=vhs+u'es'
dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk)
vhs1=vhs+u's'
dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk)
vhs1=vhs[:len(vhs)-1]+u'ied'
vihs1=osneo+u'is'
dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk)
vhs1=vhs+u'ed'
dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk)
vhs1=vhs[:len(vhs)-1]+u'ing'
vihs1=osneo+u'anta'
dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk)
vhs1=vhs+u'ing'
dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk)
else:
vhs1=vhs+u's'
vihs1=osneo+u'as'
dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk)
vhs1=vhs+u'ed'
vihs1=osneo+u'is'
dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk)
if vhok in en_soglasn:
vhs1=vhs+vhok+u'ed'
dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk)
vhs1=vhs+u'ing'
vihs1=osneo+u'anta'
dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk)
if vhok in en_soglasn:
vhs1=vhs+vhok+u'ing'
dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk)
def fleksii_en_eo(slov,vhs,vihs,fltolk):
# if (u' ' in vhs) or (u' ' in vihs):
# return
if len(vhs)<3 or len(vihs)<3:
return
tipeo=vihs[len(vihs)-1]
osneo=vihs[:len(vihs)-1]
vhok=vhs[len(vhs)-1]
if tipeo==u'o':
vhs1=vhs+u'\'s'
# vihs1=u'de '+vihs
vihs1=osneo+u'a'
dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk)
vihs1=eo_pluralo(vihs)
if vhok==u'y':
vhs1=vhs[:len(vhs)-1]+u'ies'
dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk)
vhs1=vhs+u's'
dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk)
vhs1=vhs+u'es'
dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk)
osneo1=osneo
if osneo1[len(osneo1)-2:]==u'il':
osneo1=osneo1[:len(osneo1)-2]
if (not u' ' in vhs) and (not u' ' in vihs):
fleksii_en_eo_glag(slov,vhs,osneo1,fltolk)
elif tipeo==u'i':
fleksii_en_eo_glag(slov,vhs,osneo,fltolk)
elif tipeo==u'a':
vhs1=vhs+u'ly'
vihs1=osneo+u'e'
dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk)
vhs1=vhs+u'er'
vihs1=u'pli '+vihs.lower()
dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk)
vhs1=vhs+u'est'
vihs1=u'plej '+vihs.lower()
dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk)
def dobavslov(slov,vhjaz,vihjaz,vhs,vihs,fiwi,fltolk):
vhs=ubr_nk_prob(vhs)
if vhs==u'':
return
if vhs==u'Circumscribed' or vhs==u'circumscribed':
wikipedia.output(u'%s %s %d %d'%(vhs,vihs,fiwi,fltolk))
vihs=ubr_nk_prob(vihs)
if vihs==u'':
return
vhs=vhs[0].upper()+vhs[1:]
vihs=vihs[0].upper()+vihs[1:]
dobavslov2(slov,vhjaz,vihjaz,vhs,vihs,fiwi,fltolk)
if vhjaz==u'en' and vihjaz==u'eo':
fltolk1=fltolk
if fltolk1==1:
fltolk1=0
fleksii_en_eo(slov,vhs,vihs,fltolk1)
def zagruzslov_revo(slov,vhjaz,vihjaz,fslov,ftdob):
f=codecs.open(fslov,'rb',encoding='utf-8')
for s in f.readlines():
# wikipedia.output(u'%d\n' % ord(s[0]))
if ord(s[0]) == 65279:
s=s[1:]
# wikipedia.output(u'%d %d %s %s\n' % (i,j,n,s))
s=s.replace(u'\r',u'')
if s[len(s)-1]==u'\n':
s=s[:len(s)-1]
fltolk=0
if ftdob:
fltolk=2
elif len(s)>=1 and s[0]==u'!':
fltolk=1
s=s[1:]
i=iskat (s,u':')
if i!=-1:
vhs=s[:i]
vihst=s[i+1:].replace(u';',u',').split(u',')
if (u',' in vhs) or (u';' in vhs):
j=0
sk=0
vhst=[]
while j<len(vhs):
if vhs[j]==u'(' or vhs[j]==u'[':
sk+=1
elif vhs[j]==u')' or vhs[j]==u']':
sk-=1
if sk==0 and (vhs[j]==u',' or vhs[j]==u';'):
vhst.append(vhs[:j])
vhs=vhs[j+1:]
j=0
j+=1
vhst.append(vhs)
else:
vhst=[vhs]
for vhs in vhst:
p=iskat (vhs,u'(')
zakrsk=u')'
if p==-1:
p=iskat (vhs,u'[')
zakrsk=u']'
if p!=-1:
p1=iskat (vhs,zakrsk)
if p1!=-1:
if p==0 or vhs[p-1]==u' ':
vhs=vhs[:p]+vhs[p1+1:]
else:
vhs1=vhs[:p]+vhs[p1+1:]
fltolk1=fltolk
for vihs in vihst:
dobavslov(slov,vhjaz,vihjaz,vhs1,vihs,0,fltolk1)
if fltolk1==1:
fltolk1=0
fltolk1=fltolk
for vihs in vihst:
dobavslov(slov,vhjaz,vihjaz,vhs,vihs,0,fltolk1)
if fltolk1==1:
fltolk1=0
return
def zagruzslov_viki(slov,vhjaz,vihjaz,fslov,friwi):
f=codecs.open(fslov,'rb',encoding='utf-8')
j=0
jt0={}
st0={}
fiwi=1
if friwi:
fiwi=2
vihs=u''
for s in f.readlines():
# wikipedia.output(u'%d\n' % ord(s[0]))
if ord(s[0]) == 65279:
s=s[1:]
# wikipedia.output(u'%d %d %s %s\n' % (i,j,n,s))
s=s.replace(u'\r',u'')
if s[len(s)-1]==u'\n':
s=s[:len(s)-1]
if s == u'':
if j>=1 and vihs!=u'':
k=0
while k<j:
dobavslov(slov,jt0[k],vihjaz,st0[k],vihs,fiwi,0)
k+=1
j=0
jt0={}
st0={}
vihs=u''
else:
i=iskat (s,u':')
if i!=-1:
if s[:i]==vihjaz:
vihs=s[i+1:]
else:
if vhjaz==u'' or vhjaz==s[:i]:
jt0[j]=s[:i]
st0[j]=s[i+1:]
j+=1
return
def zagruz_ns(ns,fi):
f=codecs.open(fi,'rb',encoding='utf-8')
for s in f.readlines():
if ord(s[0]) == 65279:
s=s[1:]
s=ubr_nk_prob(s.replace(u'\r',u''))
if s != u'':
ns[s]=1
return
def zagruzslov(slov,slovdop,nssvoj,vhjaz,vihjaz,fslov):
f=codecs.open(fslov,'rb',encoding='utf-8')
for s in f.readlines():
if ord(s[0]) == 65279:
s=s[1:]
s=s.replace(u'\r',u'')
if s[len(s)-1]==u'\n':
s=s[:len(s)-1]
p=iskat(s,u'#')
if p!=-1:
s=s[:p]
if len(s)<1:
continue
st0=s.split(u' ',1)
tip=st0[0]
s=st0[1]
if tip==u'w':
fi=ubr_nk_prob(s)
zagruzslov_viki(slov,vhjaz,vihjaz,fi,0)
elif tip==u'wr':
fi=ubr_nk_prob(s)
zagruzslov_viki(slov,vhjaz,vihjaz,fi,1)
elif tip[0]==u'r':
st=s.split(u' ',2)
ftdob=0
if len(tip)>=2 and tip[1]==u'd':
ftdob=1
if (vhjaz==u'' or st[0]==vhjaz) and st[1]==vihjaz:
zagruzslov_revo(slov,st[0],vihjaz,ubr_nk_prob(st[2]),ftdob)
elif tip==u's':
st=s.split(u' ',1)
if st[0]==vihjaz:
zagruz_ns(nssvoj,ubr_nk_prob(st[1]))
# elif tip==u'c':
# fi=ubr_nk_prob(s)
# zagruz_ns(nscom,fi)
# elif tip==u'z':
# fi=ubr_nk_prob(s)
# zagruz_ns(zapriz,fi)
else:
if not slovdop.has_key(tip):
slovdop[tip]={}
fi=ubr_nk_prob(s)
zagruz_ns(slovdop[tip],fi)
class Perev_stat:
def __init__(self):
self.flp = 0
self.nup = 0
self.nnp = 0
self.nvap = 0
self.snp = []
self.svap = []
def korr_uktbl(uktbl,us,un,i):
if uktbl==None:
return i
while i<len(uktbl) and uktbl[i]<=us:
uktbl[i]+=un-us
if i>0 and uktbl[i]<uktbl[i-1]:
uktbl[i]=uktbl[i-1]
i+=1
return i
class Dslov:
def __init__(self,ti,na,ok,ok1,ok2):
self.ti = ti
self.na = na
self.ok = ok
self.ok1 = ok1
self.ok2 = ok2
def perevod_korr_eo(s,uktbl):
prom=[]
slov=[]
spslov=[]
p=0
while 1:
p0=p
while p<len(s) and not s[p].isalpha():
p+=1
prom.append(s[p0:p])
if p>=len(s):
break
p1=p
while p<len(s) and (s[p].isalpha() or
(p<len(s)-1 and s[p]==u'-' and s[p+1].isalpha())):
p+=1
slov.append(s[p1:p])
spslov.append(p1)
dslov=[]
for sl in slov:
# sl=sl.lower()
ti=0
ok2=u''
ok1=u''
if RBRm(sl,[u'la',u'kaj',u'tra',u'pro',u'do',u'na']):
na=sl
ok=u''
else:
na=sl[:len(sl)-1]
ok=sl[len(sl)-1]
if len(na)>=2 and RBR(ok,u'n'):
ok2=ok
ok=u''
sl=na
if RBRm(sl,[u'si',u'li',u'ĝi',u'ŝi']):
ti=110
na=sl
ok=u''
elif RBRm(sl,[u'ili',u'oni']):
ti=130
na=sl
ok=u''
else:
na=sl[:len(sl)-1]
ok=sl[len(sl)-1]
if len(na)>=2 and RBR(ok,u'j'):
ok1=ok
ok=u''
sl=na
na=sl[:len(sl)-1]
ok=sl[len(sl)-1]
if RBRm(sl,[u'iu',u'kiu',u'tiu',u'ĉiu',u'neniu']):
ti=210
else:
if RBR(ok,u'o'):
ti=100
elif RBR(ok,u'a'):
ti=200
elif ok1==u'' and RBR(ok,u'e'):
ti=300
elif ok1==u'' and ok2==u'' and RBRm(ok,[u'i',u'u']):
ti=400
elif (ok1==u'' and ok2==u'' and RBR(ok,u's') and
len(sl)>=4 and
RBRm(sl[len(sl)-2],[u'i',u'a',u'o',u'u'])):
na=sl[:len(sl)-2]
ok=sl[len(sl)-2:]
ti=400
else:
ti=0
if RBR(ok1,u'j'):
ti+=20
dslov.append(Dslov(ti,na,ok,ok1,ok2))
vivod(u' %s ti=%d\n'%(na+ok+ok1+ok2,ti))
i=len(dslov)-1
while i>=0:
if dslov[i].ti==400 and dslov[i].ok==u'is' and (i<1 or
dslov[i-1].ti==400 or
(dslov[i-1].ti==300 and (i<2 or dslov[i-2].ti==400)) ):
dslov[i].ti=200
dslov[i].na+=u'it'
dslov[i].ok=u'a'
i-=1
i=len(dslov)-1
while i>=0:
ti0=dslov[i].ti
# vivod(u' i=%d ti0=%d\n'%(i,ti0))
j=i-1
if ti0>=100 and ti0<200:
while j>=0:
if not (dslov[j].ti>=100 and dslov[j].ti<300):
break
if dslov[j].ok2!=u'':
break
if prom[j+1]!=u' ' and prom[j+1]!=u'\n':
break
vivod(u' j=%d ti=%d\n'%(j,dslov[j].ti))
if dslov[j].ti>=100 and dslov[j].ti<200:
dslov[j].ti+=100
dslov[j].ok=u'a'
if dslov[j].ti==210 or dslov[j].ti==230:
dslov[j].ti==200
if ti0>=120 and dslov[j].ti>=200 and dslov[j].ti<220:
dslov[j].ti+=20
dslov[j].ok1=u'j'
elif ti0<120 and dslov[j].ti>=220 and dslov[j].ti<240:
dslov[j].ti-=20
dslov[j].ok1=u''
j-=1
i=j
rez=u''
iuktbl=0
i=0
while i<len(dslov):
rez+=prom[i]
iuktbl=korr_uktbl(uktbl,spslov[i],len(rez),iuktbl)
ds=dslov[i]
rez+=ds.na+ds.ok+ds.ok1+ds.ok2
i+=1
rez+=prom[i]
korr_uktbl(uktbl,len(s),len(rez),iuktbl)
return rez
neperevt=[u'<br',u'<tr>',u'</tr>',u'<td>',u'</td>',u'<font',u'</font>',
u'<span',u'</span>',u'<b>',u'</b>',u'<i>',u'</i>',u'<s>',u'</s>',
u'<sub',u'</sub>',u'<sup',u'</sup>',u'<gallery>',u'</gallery>',
u'<center>',u'</center>',u'<blockquote>',u'</blockquote>',
u'<div',u'</div>',u'<small>',u'</small>']
neperevat=[u'width= ',u'colspan= ',u'border= ',u'style= ',u'align= ',u'id= ',
u'bgcolor= ',u'color= ',u'clear= ',u'valign= ',u'cellpadding= ',
u'cellspacing= ',
u'http://',
u'width=',u'colspan=',u'border=',u'style=',u'align=',u'id=',
u'bgcolor=',u'color=',u'clear=',u'valign=',u'cellpadding=',
u'cellspacing=',]
def perevod_stat(slov,vhjaz,vihjaz,s,uktbl=None):
pes=Perev_stat()
maxdl=48
while u' ' in s:
s = s.replace(u' ',u' ')
if len(s)<1:
return (u'',pes)
rez=u''
iuktbl=0
i=0
while i<len(s):
while i<len(s) and s[i]==u' ':
rez+=s[i]
i+=1
if i>=len(s):
break
fl=1
while fl:
fl=0
if i+3<len(s) and s[i]==u'&':
j=iskat(s[i:i+10],u';')
if j!=-1 and ( (s[i+1]==u'#' and s[i+2:i+j].isdigit()) or
s[i+1:i+j].isalpha() ):
np=s[i:i+j+1]
rez+=np
i+=len(np)
fl=1
break
for np in neperevt:
if s[i:i+len(np)].lower()==np:
rez+=np
i+=len(np)
fl=1
break
for np in neperevat:
if s[i:i+len(np)].lower()==np:
dl=len(np)
if i+dl<len(s) and s[i+dl]==u'"':
j=iskat(s[i+len(np)+1:],u'"')
if j!=-1:
dl+=2+j
else:
# j=iskat(s[i+dl:],u' ')
# if j==-1:
# j=iskat(s[i+dl:],u'\n')
(j,jj)=iskats_mn(s[i+dl:],0,[u' ',u'\n',u'|'])
if j!=-1:
dl+=j
iuktbl=korr_uktbl(uktbl,i,len(rez),iuktbl)
rez+=s[i:i+dl]
i+=dl
fl=1
break
if i>=len(s):
break
dl=maxdl
if i+dl>len(s):
dl=len(s)-i
while dl>0:
if dl>0 and i+dl<len(s) and s[i+dl].isalnum():
dl-=1
continue
tu=s[i:i+dl]
tu0=tu[0].upper()
flmb=(tu0!=tu[0])
tu=tu0+tu[1:]
if slov.has_key((vhjaz,tu)):
pes.flp=1
tp=slov[(vhjaz,tu)].tp
npe=len(tp)
if npe==1:
pe=tp[0]
if pe==u'_':
if i+dl<len(s) and s[i+dl]==u' ' and (s[i].islower() or
i<2 or (s[i-1] in [u'.',u'\n']) or
(s[i-2] in [u'.',u'\n'])):
pe=u''
if (i+dl+1<len(s) and s[i].isupper() and
s[i+dl+1].islower()):
s=s[:i+dl+1]+s[i+dl+1].upper()+s[i+dl+2:]
dl+=1
else:
pe=s[i:i+dl]
elif flmb:
pe=pe[0].lower()+pe[1:]
pes.nup+=1
else:
pe=u'('
k=0
while k<npe:
pe1=tp[k]
if flmb:
pe1=pe1[0].lower()+pe1[1:]
pe+=pe1
k+=1
if k<npe:
pe+=u', '
pe+=u')'
pes.nvap+=1
pes.svap.append(tu)
iuktbl=korr_uktbl(uktbl,i,len(rez),iuktbl)
rez+=pe
break
dl-=1
if dl>0:
i+=dl
else:
j=1
while i+j<len(s) and s[i+j-1].isalnum() and s[i+j].isalnum():
j+=1
iuktbl=korr_uktbl(uktbl,i,len(rez),iuktbl)
if s[i].isalnum() and not s[i:i+j].isdigit():
rez+=u'_'+s[i:i+j]+u'_'
pes.nnp+=1
pes.snp.append(perv_upper(s[i:i+j]))
else:
rez+=s[i:i+j]
i+=j
korr_uktbl(uktbl,len(s),len(rez),iuktbl)
if vihjaz==u'eo':
rez=perevod_korr_eo(rez,uktbl)
return (rez,pes)
def perevod(slov,vhjaz,vihjaz,s,uktbl=None):
(t,stat)=perevod_stat(slov,vhjaz,vihjaz,s,uktbl)
return (t,stat.flp)
def perevod_iwi_spis(slov,vhjaz,vihjaz,s):
s = s.replace(u'_',u' ')
while u' ' in s:
s = s.replace(u' ',u' ')
s=ubr_nk_prob(s)
if len(s)<1:
return []
tu=s
tu0=tu[0].upper()
flmb=(tu0!=tu[0])
tu=tu0+tu[1:]
if not slov.has_key((vhjaz,tu)):
return []
return slov[(vhjaz,tu)].iwi
def perevod_iwi(slov,vhjaz,vihjaz,s):
iwi=perevod_iwi_spis(slov,vhjaz,vihjaz,s)
npe=len(iwi)
if npe<1:
return u''
if npe==1:
pe=iwi[0]
# if flmb:
# pe=pe[0].lower()+pe[1:]
pe=u'[['+pe+u']]'
else:
pe=u''
k=0
while k<npe:
pe1=iwi[k]
# if flmb:
# pe1=pe1[0].lower()+pe1[1:]
pe+=u'[['+pe1+u']]'
k+=1
if k<npe:
pe+=u', '
return pe
def perev_uch_nezsl(osnp,osvap,stat):
for t in stat.snp:
if osnp.has_key(t):
osnp[t]+=1
else:
osnp[t]=1
for t in stat.svap:
if osvap.has_key(t):
osvap[t]+=1
else:
osvap[t]=1
def perev_uch_nezsl_rt(osnp,osvap,stat):
for t in stat.snp:
osnp[t]=1
for t in stat.svap:
osvap[t]=1
def perev_pech_nezsl(slov,vhjaz,vihjaz,osnp,osvap,fnesl,kolotch):
ssnp=[]
for t, n in osnp.iteritems():
ssnp.append((n,t))
ssnp.sort(reverse=True)
i=0
for n, t in ssnp:
fnesl.write(u'%d %s\n'%(n,t))
fnesl.flush()
i+=1
if i>=kolotch:
break
ssvap=[]
for t, n in osvap.iteritems():
ssvap.append((n,t))
fnesl.write(u'\n')
fnesl.flush()
ssvap.sort(reverse=True)
i=0
for n, t in ssvap:
fnesl.write(u'%d %s\n'%(n,t))
fnesl.flush()
i+=1
if i>=kolotch:
break
fnesl.write(u'\n')
fnesl.flush()
i=0
for n, t in ssnp:
fnesl.write(u'%s\n'%t)
fnesl.flush()
i+=1
if i>=kolotch:
break
fnesl.write(u'\n')
fnesl.flush()
i=0
for n, t in ssvap:
(t2,stat2)=perevod_stat(slov,vhjaz,vihjaz,t)
if len(t2)>=1 and t2[0]==u'(':
t2=t2[1:]
if len(t2)>=1 and t2[len(t2)-1]==u')':
t2=t2[:len(t2)-1]
fnesl.write(u'! %s : %s\n'%(t,t2))
fnesl.flush()
i+=1
if i>=kolotch:
break
def main(slov,vhjaz,vihjaz,fvh,fvih):
kolotch=500
f0=codecs.open(fvh,'rb',encoding='utf-8')
f1=codecs.open(fvih, 'w', 'utf-8')
osnp={}
osvap={}
for s in f0.readlines():
# wikipedia.output(u'%d\n' % ord(s[0]))
if ord(s[0]) == 65279:
s=s[1:]
# wikipedia.output(u'%d %d %s %s\n' % (i,j,n,s))
s=s.replace(u'\r',u'').replace(u'_',u' ')
# if s[len(s)-1]!=u'\n':
# s=s+u'\n'
# (rez,fl_perev)=perevod(slov,vhjaz,vihjaz,s)
(rez,stat)=perevod_stat(slov,vhjaz,vihjaz,s)
f1.write(rez)
f1.flush()
perev_uch_nezsl(osnp,osvap,stat)
perev_pech_nezsl(slov,vhjaz,vihjaz,osnp,osvap,otch,kolotch)
if __name__ == "__main__":
try:
vhjaz = wikipedia.argHandler(sys.argv[1], 'perevod')
vihjaz = wikipedia.argHandler(sys.argv[2], 'perevod')
fslov = wikipedia.argHandler(sys.argv[3], 'perevod')
fvh = wikipedia.argHandler(sys.argv[4], 'perevod')
fvih = wikipedia.argHandler(sys.argv[5], 'perevod')
fotch = wikipedia.argHandler(sys.argv[6], 'perevod')
otch = codecs.open(fotch, 'w', 'utf-8')
#otch2 = codecs.open(fotch2, 'w', 'utf-8')
slov={}
nssvoj={}
slovdop={}
zagruzslov(slov,slovdop,nssvoj,vhjaz,vihjaz,fslov)
main(slov,vhjaz,vihjaz,fvh,fvih)
finally:
wikipedia.stopme()
* upload1.py
# -*- coding: utf-8 -*-
"""
Script to upload images to wikipedia.
Arguments:
-keep Keep the filename as is
If any other arguments are given, the first is the URL or filename
to upload, and the rest is a proposed description to go with the
upload. If none of these are given, the user is asked for the
file or URL to upload. The bot will then upload the image to the wiki.
The script will ask for the location of an image, if not given as a parameter,
and for a description.
"""
#
# (C) Rob W.W. Hooft, Andre Engels 2003-2004
#
# Distribute under the terms of the PSF license.
#
__version__='$Id: upload.py,v 1.31 2005/08/13 15:45:35 wikipedian Exp $'
import os, sys, re
import urllib, httplib
import wikipedia, config, mediawiki_messages
def iskats(t,i,s):
lt=len(t)
ls=len(s)
while i<=lt-ls:
if t[i:i+ls]==s:
return i
i+=1
return -1
def post_multipart(host, selector, fields, files, cookies):
"""
Post fields and files to an http host as multipart/form-data.
fields is a sequence of (name, value) elements for regular form fields.
files is a sequence of (name, filename, value) elements for data to be uploaded as files
Return the server's response page.
"""
content_type, body = encode_multipart_formdata(fields, files)
conn = httplib.HTTPConnection(host)
conn.putrequest('POST', selector)
conn.putheader('content-type', content_type)
conn.putheader('content-length', str(len(body)))
conn.putheader("User-agent", "RobHooftWikiRobot/1.0")
conn.putheader('Host', host)
if cookies:
conn.putheader('Cookie',cookies)
conn.endheaders()
conn.send(body)
response = conn.getresponse()
returned_html = response.read()
conn.close()
return response, returned_html
def encode_multipart_formdata(fields, files):
"""
fields is a sequence of (name, value) elements for regular form fields.
files is a sequence of (name, filename, value) elements for data to be uploaded as files
Return (content_type, body) ready for httplib.HTTP instance
"""
BOUNDARY = '----------ThIs_Is_tHe_bouNdaRY_$'
CRLF = '\r\n'
L = []
for (key, value) in fields:
L.append('--' + BOUNDARY)
L.append('Content-Disposition: form-data; name="%s"' % key)
L.append('')
L.append(value)
for (key, filename, value) in files:
L.append('--' + BOUNDARY)
L.append('Content-Disposition: form-data; name="%s"; filename="%s"' % (key, filename))
L.append('Content-Type: %s' % get_content_type(filename))
L.append('')
L.append(value)
L.append('--' + BOUNDARY + '--')
L.append('')
body = CRLF.join(L)
content_type = 'multipart/form-data; boundary=%s' % BOUNDARY
return content_type, body
def get_content_type(filename):
import mimetypes
return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
class UploadRobot:
def __init__(self, url, description = u'', keepFilename = False, targetSite = None, urlEncoding = None,otchup=None):
self.url = url
self.otchup = otchup
self.urlEncoding = urlEncoding
self.description = description
self.keepFilename = keepFilename
if config.upload_to_commons:
self.targetSite = targetSite or wikipedia.getSite('commons', 'commons')
else:
self.targetSite = targetSite or wikipedia.getSite()
self.targetSite.forceLogin()
def urlOK(self):
'''
Returns true iff the URL references an online site or an
existing local file.
'''
return self.url != '' and ('://' in self.url or os.path.exists(self.url))
def upload_image(self, debug=False):
"""Gets the image at URL self.url, and uploads it to the target wiki.
Returns (filename,tekst oshibki)
"""
tosh=u''
# Get file contents
if '://' in self.url:
uo = wikipedia.MyURLopener()
try:
file = uo.open(self.url)
except IOError:
return (u'',u'wikipedia.MyURLopener open IOError')
else:
# Opening local files with MyURLopener would be possible, but we
# don't do it because it only accepts ASCII characters in the
# filename.
file = open(self.url)
wikipedia.output(u'Reading file %s' % self.url)
try:
contents = file.read()
except IOError:
return (u'',u'read IOError')
file.close()
if len(contents)<10 or contents.startswith('<html'):
tosh="Couldn't download the image."
print tosh
self.otchup.write(u'======oshibka======\n%s\n==================\n' % tosh)
self.otchup.flush()
return (u'',tosh)
# Isolate the pure name
filename = self.url
if '/' in filename:
filename = filename.split('/')[-1]
if '\\' in filename:
filename = filename.split('\\')[-1]
if self.urlEncoding:
filename = urllib.unquote(filename)
filename = filename.decode(self.urlEncoding)
if not self.keepFilename:
wikipedia.output(u"The filename on the target wiki will default to: %s" % filename)
# ask newfn until it's valid
ok = False
# FIXME: these 2 belong somewhere else, presumably in family
forbidden = '/' # to be extended
allowed_formats = (u'jpg', u'jpeg', u'png', u'gif', u'svg', u'ogg')
newfn = filename
# while not ok:
# ok = True
# newfn = wikipedia.input(u'Enter a better name, or press enter to accept:')
# if newfn == "":
# newfn = filename
# ext = os.path.splitext(newfn)[1].lower().strip('.')
# for c in forbidden:
# if c in newfn:
# print "Invalid character: %s. Please try again" % c
# ok = False
# if ext not in allowed_formats and ok:
# ans = wikipedia.input(u"File format is not %s but %s. Continue [y/N]? " % (allowed_formats, ext))
# if not ans.lower().startswith('y'):
# ok = False
# if not ok:
# newfn = wikipedia.input(u'Enter a better name, or press enter to accept:')
if newfn != '':
filename = newfn
# MediaWiki doesn't allow spaces in the file name.
# Replace them here to avoid an extra confirmation form
filename = filename.replace(' ', '_')
# Convert the filename (currently Unicode) to the encoding used on the
# target wiki
encodedFilename = filename.encode(self.targetSite.encoding())
# A proper description for the submission.
wikipedia.output(u"The suggested description is:")
wikipedia.output(self.description)
# choice = wikipedia.inputChoice(u'Do you want to change this description?', ['Yes', 'No'], ['y', 'N'], 'n')
# if choice not in ['n', 'N']:
# newDescription = wikipedia.ui.editText(self.description)
# # if user didn't press Cancel:
# if newDescription:
# self.description = newDescription
formdata = {}
formdata["wpUploadDescription"] = self.description
# if self.targetSite.version() >= '1.5':
# formdata["wpUploadCopyStatus"] = wikipedia.input(u"Copyright status: ")
# formdata["wpUploadSource"] = wikipedia.input(u"Source of image: ")
formdata["wpUploadAffirm"] = "1"
formdata["wpUpload"] = "upload bestand"
formdata["wpIgnoreWarning"] = "1"
# try to encode the strings to the encoding used by the target site.
# if that's not possible (e.g. because there are non-Latin-1 characters and
# the home Wikipedia uses Latin-1), convert all non-ASCII characters to
# HTML entities.
for key in formdata:
assert isinstance(key, basestring), "ERROR: %s is not a string but %s" % (key, type(key))
try:
formdata[key] = formdata[key].encode(self.targetSite.encoding())
except (UnicodeEncodeError, UnicodeDecodeError):
formdata[key] = wikipedia.UnicodeToAsciiHtml(formdata[key]).encode(self.targetSite.encoding())
# don't upload if we're in debug mode
if not debug:
wikipedia.output(u'Uploading file to %s...' % self.targetSite)
response, returned_html = post_multipart(self.targetSite.hostname(),
self.targetSite.upload_address(),
formdata.items(),
(('wpUploadFile', encodedFilename, contents),),
cookies = self.targetSite.cookies()
)
returned_html = returned_html.decode(self.targetSite.encoding())
# Do we know how the "success!" HTML page should look like?
# ATTENTION: if you changed your Wikimedia Commons account not to show
# an English interface, this detection will fail!
#success_msg = mediawiki_messages.get('successfulupload', site = self.targetSite)
#success_msgR = re.compile(re.escape(success_msg))
#if success_msgR.search(returned_html):
# wikipedia.output(u"Upload successful.")
# dump the HTML page
# wikipedia.output(u'%s\n\n' % returned_html)
# wikipedia.output(u'%i %s' % (response.status, response.reason))
self.otchup.write(u'%s\n============\n%s\n==================\n' % (filename,returned_html))
self.otchup.flush()
fosh=0
tosh=returned_html
if len(tosh)>20:
fosh=1
tnosh='<span class=\'error\'>'
p1=iskats(tosh,0,tnosh)
if p1!=-1:
p1+=len(tnosh)
p2=iskats(tosh,p1,'</span>')
if p2!=-1:
tosh=tosh[p1:p2]
if not (response.status in [200, 302]):
fosh=1
if not fosh:
tosh=u''
if fosh and len(tosh)<2:
tosh=u'!!!!oshibka!!!!'
tosh=tosh[:300].replace('\n',' ').replace('\r',' ')
if fosh:
self.otchup.write(u'======oshibka======\n%s\n' % tosh)
self.otchup.flush()
else:
wikipedia.output(u"Upload successful.")
self.otchup.write(u'<- ++\n')
self.otchup.flush()
self.otchup.write(u'============================\n')
self.otchup.flush()
return (filename,tosh)
def run(self):
# while not self.urlOK():
# if not self.url:
# wikipedia.output(u'No input filename given')
# else:
# wikipedia.output(u'Invalid input filename given. Try again.')
# self.url = wikipedia.input(u'File or URL where image is now:')
return self.upload_image()
def main(args):
url = u''
description = []
keepFilename = False
for arg in args:
arg = wikipedia.argHandler(arg, 'upload')
if arg:
if arg.startswith('-keep'):
keepFilename = True
elif url == u'':
url = arg
else:
description.append(arg)
description = u' '.join(description)
bot = UploadRobot(url, description, keepFilename)
bot.run()
if __name__ == "__main__":
try:
main(sys.argv[1:])
finally:
wikipedia.stopme()
* com-izobr.py
# -*- coding: UTF-8 -*-
__version__='$Id:'
import wikipedia, pagegenerators
import re, sys
import codecs
import perevod,imagetransfer1
def iskat(t,s):
lt=len(t)
ls=len(s)
i=0
while i<=lt-ls:
if t[i:i+ls]==s:
return i
i+=1
return -1
def iskats(t,i,s):
lt=len(t)
ls=len(s)
while i<=lt-ls:
if t[i:i+ls]==s:
return i
i+=1
return -1
def iskato(t,i,s):
ls=len(s)
while i>=0:
if t[i:i+ls]==s:
return i
i-=1
return -1
def vivod(b):
wikipedia.output(b)
otch.write(b)
otch.flush()
def ubr_nk_prob(t):
if len(t)<1:
return t
while (t[0]==u' ') or (t[0]==u'\n') or (t[0]==u'\t') or (t[0]==u'\r'):
t=t[1:]
if len(t)<1:
return t
while (t[len(t)-1]==u' ') or (t[len(t)-1]==u'\n') or (t[len(t)-1]==u'\t') or (t[len(t)-1]==u'\r'):
t=t[:len(t)-1]
if len(t)<1:
return t
return t
otch.write(u'\n\n\n')
otch.flush()
zapis_fimen(fimen,nomerf,pl)
return
def main():
# mysite = wikipedia.getSite()
sico=wikipedia.getSite(code = u'commons',fam = u'commons')
global otch
otch = codecs.open(u'%s' % filename, 'w', 'utf-8')
for page in sico.allpages(start = u'', namespace = 6):
vivod(page.title()+u'\n')
filename = wikipedia.argHandler(sys.argv[1], 'cht_cat')
for arg in sys.argv[2:]:
arg = wikipedia.argHandler(arg, 'ivsen')
if arg:
if arg=="-log":
import logger
sys.stdout = logger.Logger(sys.stdout, filename = 'com-izobr.log')
try:
main()
except:
wikipedia.stopme()
raise
else:
wikipedia.stopme()
* com-kateg.py
# -*- coding: UTF-8 -*-
__version__='$Id:'
import wikipedia, pagegenerators
import re, sys
import codecs
import perevod,imagetransfer1
def iskat(t,s):
lt=len(t)
ls=len(s)
i=0
while i<=lt-ls:
if t[i:i+ls]==s:
return i
i+=1
return -1
def iskats(t,i,s):
lt=len(t)
ls=len(s)
while i<=lt-ls:
if t[i:i+ls]==s:
return i
i+=1
return -1
def iskato(t,i,s):
ls=len(s)
while i>=0:
if t[i:i+ls]==s:
return i
i-=1
return -1
def vivod(b):
wikipedia.output(b)
otch.write(b)
otch.flush()
def ubr_nk_prob(t):
if len(t)<1:
return t
while (t[0]==u' ') or (t[0]==u'\n') or (t[0]==u'\t') or (t[0]==u'\r'):
t=t[1:]
if len(t)<1:
return t
while (t[len(t)-1]==u' ') or (t[len(t)-1]==u'\n') or (t[len(t)-1]==u'\t') or (t[len(t)-1]==u'\r'):
t=t[:len(t)-1]
if len(t)<1:
return t
return t
otch.write(u'\n\n\n')
otch.flush()
zapis_fimen(fimen,nomerf,pl)
return
def main():
# mysite = wikipedia.getSite()
sico=wikipedia.getSite(code = u'commons',fam = u'commons')
global otch
otch = codecs.open(u'%s' % filename, 'w', 'utf-8')
for page in sico.allpages(start = u'', namespace = 14):
vivod(page.title()+u'\n')
filename = wikipedia.argHandler(sys.argv[1], 'cht_cat')
for arg in sys.argv[2:]:
arg = wikipedia.argHandler(arg, 'ivsen')
if arg:
if arg=="-log":
import logger
sys.stdout = logger.Logger(sys.stdout, filename = 'com-izobr.log')
try:
main()
except:
wikipedia.stopme()
raise
else:
wikipedia.stopme()
* slov_iz_xml.py
# -*- coding: UTF-8 -*-
__version__='$Id:'
import wikipedia
import re, sys
import codecs
import xmlreader
from bib_tekst import *
def ubrkoment (text):
# Ignore tekst within nowiki tags and HTML comments
nowikiOrHtmlCommentR = re.compile(r'<nowiki>.*?|<!--.*?-->', re.IGNORECASE | re.DOTALL)
match = nowikiOrHtmlCommentR.search(text)
while match:
text = text[:match.start()] + text[match.end():]
match = nowikiOrHtmlCommentR.search(text)
return text
def korr_im (jaz,t,pref):
for p in pref:
if t.startswith(p):
t=t[len(p):]
break
t=perv_upper(t)
if u':' in t:
if ( (not t.startswith(u'Category:')) and
(not t.startswith(u'Image:')) ):
katprefi=mysite.family.category_namespace(jaz)
if not t.startswith(katprefi+u':'):
izprefi=mysite.family.image_namespace(jaz)
if not t.startswith(izprefi+u':'):
return u''
return t
def main(vhjaz,fvh,fvih,fipref):
tzfl=0
tzst={}
tzno={}
pref=[]
if fipref!=u'':
fpref=codecs.open(fipref,'rb',encoding='utf-8')
for s in fpref.readlines():
if ord(s[0]) == 65279:
s=s[1:]
s=s.replace(u'\r',u'')
if s[len(s)-1]==u'\n':
s=s[:len(s)-1]
pref.append(s)
fpref.close()
n=u''
# f0=codecs.open(fvh,'rb',encoding='utf-8')
f1=codecs.open(fvih, 'w', 'utf-8')
insite=wikipedia.getSite(vhjaz,fam = u'wikipedia')
tblredir = {}
# open xml dump and read page titles out of it
dump = xmlreader.XmlDump(fvh)
redirR = wikipedia.getSite().redirectRegex()
readPagesCount = 0
for entry in dump.parse():
readPagesCount += 1
# always print status message after 10000 pages
if readPagesCount % 10000 == 0:
print '%i pages read...' % readPagesCount
m = redirR.search(entry.text)
if m:
"""
target = m.group(1)
# There might be redirects to another wiki. Ignore these.
for code in wikipedia.getSite().family.langs.keys():
if target.startswith('%s:' % code) or target.startswith(':%s:' % code):
wikipedia.output(u'NOTE: Ignoring %s which is a redirect to %s:' % (entry.title, code))
target = None
break
# if the redirect does not link to another wiki
if target:
target = target.replace(' ', '_')
# remove leading and trailing whitespace
target = target.strip()
# capitalize the first letter
if not wikipedia.getSite().nocapitalize:
target = target[0].upper() + target[1:]
if '#' in target:
target = target[:target.index('#')]
if '|' in target:
wikipedia.output(u'HINT: %s is a redirect with a pipelink.' % entry.title)
target = target[:target.index('|')]
tblredir[entry.title] = target
"""
pass
else:
te=ubrkoment(entry.text)
fperv=1
interwikiR = re.compile(r'\[\[([a-z\-]+)\s?:([^\[\]\n]*)\]\]')
for lang, pagetitle in interwikiR.findall(te):
# Check if it really is in fact an interwiki link to a known
# language, or if it's e.g. a category tag or an internal link
if lang in insite.family.langs:
if '|' in pagetitle:
# ignore text after the pipe
pagetitle = pagetitle[:pagetitle.index('|')]
b1=korr_im (lang, pagetitle,pref)
if b1==u'':
continue
if fperv:
# b=u'%s:%s\n' % (vhjaz,entry.title)
b0=korr_im(vhjaz,entry.title,pref)
if b0==u'':
break
b=u'%s:%s\n' % (vhjaz,b0)
f1.write(b)
fperv=0
# b=u'%s:%s\n' % (lang, pagetitle)
b=u'%s:%s\n' % (lang, b1)
f1.write(b)
if fperv==0:
f1.write(u'\n')
f1.flush()
vhjaz = wikipedia.argHandler(sys.argv[1], 'slov_iz_xml')
fvh = wikipedia.argHandler(sys.argv[2], 'slov_iz_xml')
fvih = wikipedia.argHandler(sys.argv[3], 'slov_iz_xml')
fpref=u''
if len(sys.argv)>=5:
fpref = wikipedia.argHandler(sys.argv[4], 'slov_iz_xml')
#fotch = wikipedia.argHandler(sys.argv[4], 'slov_iz_xml')
#mysite = wikipedia.getSite()
#otch = codecs.open(fotch, 'w', 'utf-8')
mysite = wikipedia.getSite()
try:
main(vhjaz,fvh,fvih,fpref)
except:
wikipedia.stopme()
raise
else:
wikipedia.stopme()
</nowiki>
* svoj-iz-ka.py
# -*- coding: UTF-8 -*-
__version__='$Id:'
import wikipedia, pagegenerators
import re, sys
import codecs
import perevod,imagetransfer1
def iskat(t,s):
lt=len(t)
ls=len(s)
i=0
while i<=lt-ls:
if t[i:i+ls]==s:
return i
i+=1
return -1
def iskats(t,i,s):
lt=len(t)
ls=len(s)
while i<=lt-ls:
if t[i:i+ls]==s:
return i
i+=1
return -1
def iskato(t,i,s):
ls=len(s)
while i>=0:
if t[i:i+ls]==s:
return i
i-=1
return -1
def vivod(b):
wikipedia.output(b)
otch.write(b)
otch.flush()
def ubr_nk_prob(t):
if len(t)<1:
return t
while (t[0]==u' ') or (t[0]==u'\n') or (t[0]==u'\t') or (t[0]==u'\r'):
t=t[1:]
if len(t)<1:
return t
while (t[len(t)-1]==u' ') or (t[len(t)-1]==u'\n') or (t[len(t)-1]==u'\t') or (t[len(t)-1]==u'\r'):
t=t[:len(t)-1]
if len(t)<1:
return t
return t
otch.write(u'\n\n\n')
otch.flush()
zapis_fimen(fimen,nomerf,pl)
return
def main():
mysite = wikipedia.getSite()
# sico=wikipedia.getSite(code = u'commons',fam = u'commons')
global otch
otch = codecs.open(u'%s' % filename, 'w', 'utf-8')
for page in mysite.allpages(start = u'', namespace = 6):
vivod(page.title()+u'\n')
for page in mysite.allpages(start = u'', namespace = 14):
vivod(page.title()+u'\n')
filename = wikipedia.argHandler(sys.argv[1], 'cht_cat')
for arg in sys.argv[2:]:
arg = wikipedia.argHandler(arg, 'ivsen')
if arg:
if arg=="-log":
import logger
sys.stdout = logger.Logger(sys.stdout, filename = 'com-izobr.log')
try:
main()
except:
wikipedia.stopme()
raise
else:
wikipedia.stopme()
* bib_kateg.py
# -*- coding: UTF-8 -*-
__version__='$Id:'
import wikipedia
import re, sys, os
import codecs
import perevod
from bib_tekst import *
from get_buf import *
def vivod(otch,b):
wikipedia.output(b)
otch.write(b)
otch.flush()
class Kateg:
def __init__(self, jaz,n,f=0):
if f:
p=iskat(n,u':')
if p!=-1:
n=n[p+1:]
self.jaz = jaz
self.n = perv_upper(ubr_nk_prob(n))
def perev_kateg(slov,nssvoj,si0,kato,fint,otch):
jaz0=si0.lang
nt_kat_sp=[]
glb=0
tbl_pl={}
kato1=kato
while 1:
katprefi=si0.family.category_namespace(jaz0)
for ka in kato1:
jaz=ka.jaz
issite=wikipedia.getSite(jaz,fam = si0.family)
iskatprefi=issite.family.category_namespace(jaz)
t=iskatprefi+u':'+ka.n
vivod(otch,u' prob kat %s:%s\n'%(jaz,t))
t0s=perevod.perevod_iwi_spis(slov,jaz,jaz0,t)
t1s=[]
for t0 in t0s:
if nssvoj.has_key(t0):
vivod(otch,u' +kat %s\n'%t0)
t1s.append(t0)
if len(t1s)<1:
t2s=perevod.perevod_iwi_spis(slov,jaz,jaz0,ka.n)
if len(t2s)>0:
t2=t2s[0]
vivod(otch,u' +perev sta %s\n'%t2)
t2=katprefi+u':'+t2
if nssvoj.has_key(t2):
vivod(otch,u' +perev->kat\n')
t1=t2
t1s=[t1]
elif nssvoj.has_key(t2+u'j'):
vivod(otch,u' +perev->kat +\'j\'\n')
t1=t2+u'j'
t1s=[t1]
for t1 in t1s:
vivod(otch,u' [[%s]]\n'%t1)
if not (t1 in nt_kat_sp):
nt_kat_sp.append(t1)
if (not fint) or len(nt_kat_sp)>0 or glb>=10:
break
nkpshpl=[]
for ka in kato1:
jaz=ka.jaz
issite=wikipedia.getSite(jaz,fam = si0.family)
iskatprefi=issite.family.category_namespace(jaz)
t=iskatprefi+u':'+ka.n
kapl=wikipedia.Page(issite,t)
nkpshpl.append(kapl)
sozd_buf_tbl(tbl_pl,nkpshpl,otch)
kato2=[]
for kapl in nkpshpl:
try:
kapl=get_tbl(tbl_pl,kapl)
kapl.get()
b = u' + kateg:%s\n' % kapl.title()
vivod(otch,b)
# otch2.write(u'%s\n%s\n========\n'%(kapl.title(),kapl.get()))
# otch2.flush()
plikat = kapl.categories()
for pka in plikat:
kato2.append(Kateg(kapl.site().lang,pka.title(),1))
vivod(otch,u' + %s\n'%pka.title())
except wikipedia.NoPage:
b = u' - kateg:%s\n' % kapl.title()
vivod(otch,b)
except wikipedia.IsRedirectPage:
b = u' + kateg:%s\n -> %s\n' % (kapl.title(),
kapl.getRedirectTarget())
vivod(otch,b)
kato2.append(Kateg(kapl.site().lang,kapl.getRedirectTarget(),1))
kato1=kato2
glb+=1
vivod(otch,u' * kateg:glb=%d\n' % glb)
return nt_kat_sp
def kateg_v_tekst(nt_kat_sp):
nt_kat=u''
for t in nt_kat_sp:
t= u'[[%s]]\n'%t
nt_kat+=t
return nt_kat
* bib_tekst.py
def RBR(a,b):
return a.lower()==b.lower()
def RBRm(a,b):
for b1 in b:
if a.lower()==b1.lower():
return 1
return 0
def perv_upper(t):
if len(t)<1:
return u''
return t[0].upper()+t[1:]
def perv_lower(t):
if len(t)<1:
return u''
return t[0].lower()+t[1:]
def perv_lower_s(s):
r=[]
for t in s:
r.append(perv_lower(t))
return r
def iskat(t,s):
lt=len(t)
ls=len(s)
i=0
while i<=lt-ls:
if t[i:i+ls]==s:
return i
i+=1
return -1
def iskats(t,i,s):
lt=len(t)
ls=len(s)
while i<=lt-ls:
if t[i:i+ls]==s:
return i
i+=1
return -1
def iskato(t,i,s):
ls=len(s)
while i>=0:
if t[i:i+ls]==s:
return i
i-=1
return -1
def iskats_mn(t,p0,ss):
i=p0
while i<len(t):
for j in range(len(ss)):
if t[i:].startswith(ss[j]):
return (i,j)
i+=1
return (-1,0)
def iskkonpodp(t,i,os,zs):
j=0
while i<len(t):
if t[i:].startswith(os):
j+=1
i+=len(os)
continue
elif t[i:].startswith(zs):
j-=1
if j==-1:
return i
i+=len(zs)
continue
i+=1
return -1
def ubr_nk_prob(t):
i=0
while i<len(t) and ((t[i]==u' ') or (t[i]==u'\n') or (t[i]==u'\t') or
(t[i]==u'\r')):
i+=1
j=len(t)-1
while j>i and ((t[j]==u' ') or (t[j]==u'\n') or (t[j]==u'\t') or
(t[j]==u'\r')):
j-=1
return t[i:j+1]
* get_buf.py
import wikipedia, pagegenerators
import re, sys, os
import codecs,xmlreader
def vivod(otch,b):
wikipedia.output(b)
otch.write(b)
otch.flush()
def getall_ex(si,s):
try:
wikipedia.getall(si,s)
except xmlreader.xml.sax._exceptions.SAXParseException:
pass
def get1(pl):
getall_ex(pl.site(), [pl])
def get_tbl(tbl,pl):
fa=pl.site().family.name
la=pl.site().lang
ti=pl.title()
if tbl.has_key((fa,la,ti)):
return tbl[(fa,la,ti)]
get1(pl)
return pl
def sozd_buf_tbl(tbl,nkp,otch):
f=[0 for i in range(len(nkp))]
for i in range(len(nkp)):
if f[i]:
continue
pl=nkp[i]
fa=pl.site().family.name
la=pl.site().lang
vivod(otch,u'sozd_buf_tbl %s %s %s\n'%(fa,la,pl.title()))
ind=(fa,la,pl.title())
if tbl.has_key(ind):
vivod(otch,u' <- uzxe\n')
continue
tbl[ind]=pl
t1=[pl]
j=i+1
while j<len(nkp):
if nkp[j].site().family.name==fa and nkp[j].site().lang==la:
vivod(otch,u'sozd_buf_tbl %s %s %s\n'%(fa,la,nkp[j].title()))
ind=(fa,la,nkp[j].title())
if not tbl.has_key(ind):
tbl[ind]=nkp[j]
t1.append(nkp[j])
else:
vivod(otch,u' <- uzxe\n')
f[j]=1
j+=1
getall_ex(pl.site(), t1)
vivod(otch,u'\n')